Skip to content

Commit

Permalink
Merge pull request #72 from fzi-forschungszentrum-informatik/Debug_SETS
Browse files Browse the repository at this point in the history
Debug sets
  • Loading branch information
JHoelli authored Nov 25, 2024
2 parents 4a553dd + 2bfd101 commit b78b32e
Show file tree
Hide file tree
Showing 10 changed files with 1,411 additions and 125 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -662,8 +662,8 @@ def transform(self, X, y=None):
shapelet_distances.append(dist)

min_dist = min(min_dist, dist)

output[i][s] = dist
#TODO THIS WAS CHANGED
output[i][s] = min_dist

self.shapelets[s].distances[i] = np.asarray(shapelet_distances)

Expand Down
56 changes: 37 additions & 19 deletions TSInterpret/InterpretabilityModels/counterfactual/SETS/sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@
get_all_shapelet_locations_scaled_threshold,
get_all_shapelet_locations_scaled_threshold_test,
get_nearest_neighbor,
get_shapelets_locations_test,
get_shapelets_locations_test,get_shapelets_distances
)


# cast to tf format
def to_tff(x):
return np.expand_dims(np.swapaxes(x, 0, 1), axis=0)

return np.expand_dims(np.swapaxes(x, 0, 1), axis=0)

def fit_shapelets(
data,
Expand All @@ -34,6 +33,7 @@ def fit_shapelets(
random.seed(random_seed)
X_train, y_train = data


# make deep copy for reusability
fitted_shapelets = copy.deepcopy(st_shapelets)

Expand All @@ -51,6 +51,7 @@ def fit_shapelets(
all_heat_maps = {}

for c in np.unique(y_train):
#print(c)
all_shapelets_class[c] = []
all_heat_maps[c] = []

Expand Down Expand Up @@ -156,12 +157,21 @@ def sets_explain(
from_3d_numpy_to_nested(np.expand_dims(instance_x, axis=0))
)

shapelet_dist=[]
for st in transformer.sts:
save=[]
for shp in st.shapelets:
save.append(shp.distances)
shapelet_dist.append(save)

shapelets_distances_test=shapelet_dist
all_shapelet_locations_test, _ = get_all_shapelet_locations_scaled_threshold_test(
[np.expand_dims(shapelets_distances_test, axis=0)],
instance_x.shape[1],
threshhold,
shapelets_distances_test,
ts_length,
threshhold
)


# Sort dimensions by their highest shapelet scores
shapelets_best_scores = []
for dim in range(len(st_shapelets)):
Expand All @@ -174,13 +184,15 @@ def sets_explain(
# fit a KNN for each class
for c in np.unique(y_train):
knns[c] = KNeighborsTimeSeries(n_neighbors=1)
if X_train.shape[1]!= ts_length:
X_train=np.swapaxes(X_train,1,2)
X_train_knn = X_train[np.argwhere(y_train == c)].reshape(
np.argwhere(y_train == c).shape[0], X_train.shape[1], X_train.shape[2]
np.argwhere(y_train == c).shape[0], ts_length,-1
)
X_train_knn = np.swapaxes(X_train_knn, 1, 2)
knns[c].fit(X_train_knn)

orig_c = int(np.argmax(model.predict(to_tff(instance_x))))
orig_c = int(np.argmax(model.predict(to_tff(instance_x)),axis=1)[0])

if len(target) > 1:
target.remove(orig_c)
for target_c in target:
Expand All @@ -202,7 +214,7 @@ def sets_explain(
cf = instance_x.copy()

cf_pred = model.predict(to_tff(cf))
cf_pred = np.argmax(cf_pred)
cf_pred = np.argmax(cf_pred,axis=1)[0]
if target_c != cf_pred:
# Get the locations where the original class shapelets occur
all_locs = get_shapelets_locations_test(
Expand All @@ -215,7 +227,7 @@ def sets_explain(
for c_i in all_locs:
for loc in all_locs.get(c_i):
cf_pred = model.predict(to_tff(cf))
cf_pred = np.argmax(cf_pred)
cf_pred = np.argmax(cf_pred,axis=1)[0]
if target_c != cf_pred:
# print('Removing original shapelet')
nn = X_train[nn_idx].reshape(-1)
Expand All @@ -238,15 +250,18 @@ def sets_explain(

start = loc[0]
end = loc[1]
#print('start', start)
#print('end', end)

cf[dim][start:end] = target_shapelet
assert np.any(instance_x !=cf ), f"Pertubed instance is identical to the original instance"


# Introduce new shapelets from the target class
for idx, target_shapelet_idx in enumerate(all_target_heat_maps.keys()):
cf_pred = model.predict(to_tff(cf))
cf_pred = np.argmax(cf_pred)
cf_pred = np.argmax(cf_pred,axis=1)[0]
if target_c != cf_pred:
# print('Introducing new shapelet')
h_m = all_target_heat_maps[target_shapelet_idx]
center = (
np.argwhere(h_m > 0)[-1][0] - np.argwhere(h_m > 0)[0][0]
Expand Down Expand Up @@ -283,10 +298,12 @@ def sets_explain(

cf[dim][start:end] = target_shapelet

assert np.any(instance_x !=cf), f"Pertubed instance is identical to the original instance"

# Save the perturbed dimension
cf_dims[dim] = cf[dim]
cf_pred = model.predict(to_tff(cf))
cf_pred = np.argmax(cf_pred)
cf_pred = np.argmax(cf_pred,axis=1)[0]
if target_c == cf_pred:
return cf, cf_pred
elif target_c != cf_pred:
Expand All @@ -298,10 +315,11 @@ def sets_explain(
for dim_ in subset:
cf[dim_] = cf_dims[dim_]
cf_pred = model.predict(to_tff(cf))
cf_pred = np.argmax(cf_pred)
cf_pred = np.argmax(cf_pred,axis=1)[0]
if target_c == cf_pred:
break
if target_c == cf_pred:
return cf, cf_pred
else:
return None, None

#if orig_c != cf_pred:
return cf, cf_pred
#else:
# return None, None
39 changes: 28 additions & 11 deletions TSInterpret/InterpretabilityModels/counterfactual/SETS/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,19 +123,24 @@ def remove_similar_locations(shapelet_locations, shapelet_distances):

# Given the shapelet_distances matrix of a given shapelet, get the locations of
# the closest shapelets from the entire dataset
def get_shapelet_locations_scaled_threshold(shapelet_distances, ts_length, threshold):
def get_shapelet_locations_scaled_threshold(shapelet_distances, ts_length, threshold, shapelets=None):
# Compute the length of the shapelet
shapelet_length = ts_length - shapelet_distances.shape[1] + 1

# Get the indices of the n closest shapelets to the original shapelet
s_indices = []
for i in range(shapelet_distances.shape[0]):
for j in range(shapelet_distances.shape[1]):
# i Iterates Items
# j iterates Shapelets
# Compare to the threshold, scaled to shapelet length
#shapelet_length = ts_length - len(shapelet_distances[j]) + 1
if shapelet_distances[i][j] / shapelet_length <= threshold:
#j is the number of the shapelet
s_indices.append(np.array([i, j]))

if len(s_indices) > 0:
# Relevant shaplet indicies
s_indices = np.asarray(s_indices)

# Create an array to store the locations of the closest n shapelets
Expand All @@ -147,7 +152,6 @@ def get_shapelet_locations_scaled_threshold(shapelet_distances, ts_length, thres
shapelet_locations[i] = np.append(
s_indices[i], s_indices[i][1] + shapelet_length
)

# Remove overlapping shapelets and keep the closest one to th original shapelet
shapelet_locations = remove_similar_locations(
shapelet_locations, shapelet_distances
Expand Down Expand Up @@ -176,9 +180,9 @@ def get_occurences_threshold(shapelets_distances, ts_length, percentage):
# Sort the distances ascendingly
sds.sort()


# Number of shapelet occurences to keep (per shapelet)
n = int(percentage * len(sds))

# Return the threshold distance to select the shapelet occurences to keep
return sds[n]

Expand All @@ -188,6 +192,7 @@ def get_occurences_threshold(shapelets_distances, ts_length, percentage):
def get_all_shapelet_locations_scaled_threshold(
shapelets_distances, ts_length, percentage
):

# Get the threshold to be used for selecting shapelet occurences
threshold = get_occurences_threshold(shapelets_distances, ts_length, percentage)

Expand All @@ -213,19 +218,28 @@ def get_all_shapelet_locations_scaled_threshold(
# Get the locations of the closest shapelets for each timeseries across the
# entire dataset based on the training threshold
def get_all_shapelet_locations_scaled_threshold_test(
shapelets_distances, ts_length, threshold
shapelets_distances, ts_length, threshold,shapelets =None
):

threshold=5
all_shapelet_locations = []
all_no_occurences = []

for dim in shapelets_distances:
# Itreate DIMs
dim_shapelet_locations = []
no_occurences = []
if type(dim) == int:
dim= shapelets_distances[0]
for i, shapelet in enumerate(dim):

# Iterate the shapelet [0. Num Shapelts]?
# Get the shapelet Locations
sls = get_shapelet_locations_scaled_threshold(
shapelet, ts_length, threshold
shapelet, ts_length, threshold,shapelets
)
if sls[0][0] != 4294967295:
#print('Append',sls)
dim_shapelet_locations.append(sls)
else:
no_occurences.append(i)
Expand All @@ -236,31 +250,34 @@ def get_all_shapelet_locations_scaled_threshold_test(


def get_shapelets_locations_test(idx, all_sls, dim, all_shapelets_class):
if len(np.array(all_shapelets_class).shape):
all_shapelets_class=[all_shapelets_class]
all_locs = {}
try:

if True:
for i, s in enumerate([all_sls[dim][j] for j in all_shapelets_class[dim]]):

i_locs = []
for loc in s:
if loc[0] == idx:
if True:
# TODO not necessary?
#if loc[0] == idx:
loc = (loc[1], loc[2])
i_locs.append(loc)
all_locs[i] = i_locs
except Exception as ex:
pass

return all_locs


##Optimize by fitting outside or returning a list of all nns at once
## Reworked so that only training data is available.
def get_nearest_neighbor(knn, instance_x, pred_label, x_train, y_train):
# pred_label = y_pred[idx]
target_labels = np.argwhere(y_train != pred_label)

X_train_knn = instance_x.reshape(1, instance_x.shape[0], instance_x.shape[1])
X_train_knn = np.swapaxes(X_train_knn, 1, 2)

_, nn = knn.kneighbors(X_train_knn)
# print("TARGETLABELS", [t[0] for t in target_labels], [int(nn[0][0])])
nn_idx = None
try:
nn_idx = [t[0] for t in target_labels][int(nn[0][0])]
Expand Down
24 changes: 18 additions & 6 deletions TSInterpret/InterpretabilityModels/counterfactual/SETSCF.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(
remove_self_similar=True,
silent=False,
fit_shapelets=True,
le=False
) -> None:
"""
Arguments:
Expand Down Expand Up @@ -81,20 +82,24 @@ def __init__(
train_x, train_y = data
self.le = LabelEncoder()
self.train_y = self.le.fit_transform(train_y)
self.mode=mode
if mode == "time":
# Parse test data into (1, feat, time):
change = True
change = False
self.train_x = np.swapaxes(train_x, 2, 1)
self.ts_len = train_x.shape[1]
elif mode == "feat":
change = False
self.train_x = np.array(train_x)
change = True
self.train_x = train_x
self.ts_len = train_x.shape[2]
#self.train_x = np.swapaxes(train_x, 2, 1)
print(self.train_x.shape)
self.train_x_n = from_3d_numpy_to_nested(self.train_x)
print(self.train_x_n.shape)
if backend == "PYT":
self.predict = PyTorchModel(model, change).predict
self.predict = PyTorchModel(model, change)
elif backend == "TF":
self.predict = TensorFlowModel(model, change).predict
self.predict = TensorFlowModel(model, change)
elif backend == "SK":
self.predict = SklearnModel(model, change).predict
# Fit Shapelet Transform
Expand Down Expand Up @@ -183,13 +188,20 @@ def explain(
target = list(np.unique(self.train_y))
else:
target = [target]
if self.mode == 'time':
x= np.swapaxes(x, -1, -2)


#else:
# x=np.swapaxes(x,-1,-2)


expl, label = sets_explain(
x,
target,
(self.train_x, self.train_y),
self.st_transformer,
self.model,
self.predict,
self.ts_len,
self.fitted_shapelets,
self.threshhold,
Expand Down
2 changes: 1 addition & 1 deletion TSInterpret/__version__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VERSION = (0, 4, 6)
VERSION = (0, 4, 7)
__version__ = ".".join(map(str, VERSION)) # noqa: F401
Loading

0 comments on commit b78b32e

Please sign in to comment.