Merge pull request #72 from fzi-forschungszentrum-informatik/Debug_SETS

Debug sets
fzi-forschungszentrum-informatik · Nov 25, 2024 · b78b32e · b78b32e
2 parents 4a553dd + 2bfd101
commit b78b32e
Show file tree

Hide file tree

Showing 10 changed files with 1,411 additions and 125 deletions.
diff --git a/TSInterpret/InterpretabilityModels/counterfactual/SETS/ContractedST.py b/TSInterpret/InterpretabilityModels/counterfactual/SETS/ContractedST.py
@@ -662,8 +662,8 @@ def transform(self, X, y=None):
                     shapelet_distances.append(dist)
 
                     min_dist = min(min_dist, dist)
-
-                    output[i][s] = dist
+                    #TODO THIS WAS CHANGED
+                    output[i][s] = min_dist
 
                 self.shapelets[s].distances[i] = np.asarray(shapelet_distances)
 

diff --git a/TSInterpret/InterpretabilityModels/counterfactual/SETS/sets.py b/TSInterpret/InterpretabilityModels/counterfactual/SETS/sets.py
@@ -13,14 +13,13 @@
     get_all_shapelet_locations_scaled_threshold,
     get_all_shapelet_locations_scaled_threshold_test,
     get_nearest_neighbor,
-    get_shapelets_locations_test,
+    get_shapelets_locations_test,get_shapelets_distances
 )
 
 
 # cast to tf format
 def to_tff(x):
-    return np.expand_dims(np.swapaxes(x, 0, 1), axis=0)
-
+    return  np.expand_dims(np.swapaxes(x, 0, 1), axis=0)
 
 def fit_shapelets(
     data,
@@ -34,6 +33,7 @@ def fit_shapelets(
     random.seed(random_seed)
     X_train, y_train = data
 
+
     # make deep copy for reusability
     fitted_shapelets = copy.deepcopy(st_shapelets)
 
@@ -51,6 +51,7 @@ def fit_shapelets(
     all_heat_maps = {}
 
     for c in np.unique(y_train):
+        #print(c)
         all_shapelets_class[c] = []
         all_heat_maps[c] = []
 
@@ -156,12 +157,21 @@ def sets_explain(
         from_3d_numpy_to_nested(np.expand_dims(instance_x, axis=0))
     )
 
+    shapelet_dist=[]
+    for st in transformer.sts:
+        save=[]
+        for shp in st.shapelets:
+            save.append(shp.distances)
+        shapelet_dist.append(save)
+
+    shapelets_distances_test=shapelet_dist
     all_shapelet_locations_test, _ = get_all_shapelet_locations_scaled_threshold_test(
-        [np.expand_dims(shapelets_distances_test, axis=0)],
-        instance_x.shape[1],
-        threshhold,
+        shapelets_distances_test,
+        ts_length,
+        threshhold
     )
 
+
     # Sort dimensions by their highest shapelet scores
     shapelets_best_scores = []
     for dim in range(len(st_shapelets)):
@@ -174,13 +184,15 @@ def sets_explain(
     # fit a KNN for each class
     for c in np.unique(y_train):
         knns[c] = KNeighborsTimeSeries(n_neighbors=1)
+        if X_train.shape[1]!= ts_length:
+            X_train=np.swapaxes(X_train,1,2)
         X_train_knn = X_train[np.argwhere(y_train == c)].reshape(
-            np.argwhere(y_train == c).shape[0], X_train.shape[1], X_train.shape[2]
+            np.argwhere(y_train == c).shape[0], ts_length,-1
         )
-        X_train_knn = np.swapaxes(X_train_knn, 1, 2)
         knns[c].fit(X_train_knn)
 
-    orig_c = int(np.argmax(model.predict(to_tff(instance_x))))
+    orig_c = int(np.argmax(model.predict(to_tff(instance_x)),axis=1)[0])
+
     if len(target) > 1:
         target.remove(orig_c)
     for target_c in target:
@@ -202,7 +214,7 @@ def sets_explain(
             cf = instance_x.copy()
 
             cf_pred = model.predict(to_tff(cf))
-            cf_pred = np.argmax(cf_pred)
+            cf_pred = np.argmax(cf_pred,axis=1)[0]
             if target_c != cf_pred:
                 # Get the locations where the original class shapelets occur
                 all_locs = get_shapelets_locations_test(
@@ -215,7 +227,7 @@ def sets_explain(
                 for c_i in all_locs:
                     for loc in all_locs.get(c_i):
                         cf_pred = model.predict(to_tff(cf))
-                        cf_pred = np.argmax(cf_pred)
+                        cf_pred = np.argmax(cf_pred,axis=1)[0]
                         if target_c != cf_pred:
                             # print('Removing original shapelet')
                             nn = X_train[nn_idx].reshape(-1)
@@ -238,15 +250,18 @@ def sets_explain(
 
                             start = loc[0]
                             end = loc[1]
+                            #print('start', start)
+                            #print('end', end)
 
                             cf[dim][start:end] = target_shapelet
+                            assert np.any(instance_x !=cf ), f"Pertubed instance is identical to the original instance"
+
 
                 # Introduce new shapelets from the target class
                 for idx, target_shapelet_idx in enumerate(all_target_heat_maps.keys()):
                     cf_pred = model.predict(to_tff(cf))
-                    cf_pred = np.argmax(cf_pred)
+                    cf_pred = np.argmax(cf_pred,axis=1)[0]
                     if target_c != cf_pred:
-                        # print('Introducing new shapelet')
                         h_m = all_target_heat_maps[target_shapelet_idx]
                         center = (
                             np.argwhere(h_m > 0)[-1][0] - np.argwhere(h_m > 0)[0][0]
@@ -283,10 +298,12 @@ def sets_explain(
 
                         cf[dim][start:end] = target_shapelet
 
+                        assert np.any(instance_x !=cf), f"Pertubed instance is identical to the original instance"
+
             # Save the perturbed dimension
             cf_dims[dim] = cf[dim]
             cf_pred = model.predict(to_tff(cf))
-            cf_pred = np.argmax(cf_pred)
+            cf_pred = np.argmax(cf_pred,axis=1)[0]
             if target_c == cf_pred:
                 return cf, cf_pred
             elif target_c != cf_pred:
@@ -298,10 +315,11 @@ def sets_explain(
                             for dim_ in subset:
                                 cf[dim_] = cf_dims[dim_]
                             cf_pred = model.predict(to_tff(cf))
-                            cf_pred = np.argmax(cf_pred)
+                            cf_pred = np.argmax(cf_pred,axis=1)[0]
                             if target_c == cf_pred:
                                 break
-            if target_c == cf_pred:
-                return cf, cf_pred
-            else:
-                return None, None
+
+            #if orig_c != cf_pred:
+            return cf, cf_pred
+            #else:
+            #    return None, None
diff --git a/TSInterpret/InterpretabilityModels/counterfactual/SETS/utils.py b/TSInterpret/InterpretabilityModels/counterfactual/SETS/utils.py
@@ -123,19 +123,24 @@ def remove_similar_locations(shapelet_locations, shapelet_distances):
 
 # Given the shapelet_distances matrix of a given shapelet, get the locations of
 # the closest shapelets from the entire dataset
-def get_shapelet_locations_scaled_threshold(shapelet_distances, ts_length, threshold):
+def get_shapelet_locations_scaled_threshold(shapelet_distances, ts_length, threshold, shapelets=None):
     # Compute the length of the shapelet
     shapelet_length = ts_length - shapelet_distances.shape[1] + 1
 
     # Get the indices of the n closest shapelets to the original shapelet
     s_indices = []
     for i in range(shapelet_distances.shape[0]):
         for j in range(shapelet_distances.shape[1]):
+            # i Iterates Items
+            # j iterates Shapelets
             # Compare to the threshold, scaled to shapelet length
+            #shapelet_length = ts_length - len(shapelet_distances[j]) + 1
             if shapelet_distances[i][j] / shapelet_length <= threshold:
+                #j is the number of the shapelet 
                 s_indices.append(np.array([i, j]))
 
     if len(s_indices) > 0:
+        # Relevant shaplet indicies
         s_indices = np.asarray(s_indices)
 
         # Create an array to store the locations of the closest n shapelets
@@ -147,7 +152,6 @@ def get_shapelet_locations_scaled_threshold(shapelet_distances, ts_length, thres
             shapelet_locations[i] = np.append(
                 s_indices[i], s_indices[i][1] + shapelet_length
             )
-
         # Remove overlapping shapelets and keep the closest one to th original shapelet
         shapelet_locations = remove_similar_locations(
             shapelet_locations, shapelet_distances
@@ -176,9 +180,9 @@ def get_occurences_threshold(shapelets_distances, ts_length, percentage):
     # Sort the distances ascendingly
     sds.sort()
 
+
     # Number of shapelet occurences to keep (per shapelet)
     n = int(percentage * len(sds))
-
     # Return the threshold distance to select the shapelet occurences to keep
     return sds[n]
 
@@ -188,6 +192,7 @@ def get_occurences_threshold(shapelets_distances, ts_length, percentage):
 def get_all_shapelet_locations_scaled_threshold(
     shapelets_distances, ts_length, percentage
 ):
+
     # Get the threshold to be used for selecting shapelet occurences
     threshold = get_occurences_threshold(shapelets_distances, ts_length, percentage)
 
@@ -213,19 +218,28 @@ def get_all_shapelet_locations_scaled_threshold(
 # Get the locations of the closest shapelets for each timeseries across the
 # entire dataset based on the training threshold
 def get_all_shapelet_locations_scaled_threshold_test(
-    shapelets_distances, ts_length, threshold
+    shapelets_distances, ts_length, threshold,shapelets =None
 ):
+
+    threshold=5
     all_shapelet_locations = []
     all_no_occurences = []
 
     for dim in shapelets_distances:
+        # Itreate DIMs
         dim_shapelet_locations = []
         no_occurences = []
+        if type(dim) == int: 
+            dim= shapelets_distances[0]
         for i, shapelet in enumerate(dim):
+
+            # Iterate the shapelet [0. Num Shapelts]?
+            # Get the shapelet  Locations
             sls = get_shapelet_locations_scaled_threshold(
-                shapelet, ts_length, threshold
+                shapelet, ts_length, threshold,shapelets
             )
             if sls[0][0] != 4294967295:
+                #print('Append',sls)
                 dim_shapelet_locations.append(sls)
             else:
                 no_occurences.append(i)
@@ -236,31 +250,34 @@ def get_all_shapelet_locations_scaled_threshold_test(
 
 
 def get_shapelets_locations_test(idx, all_sls, dim, all_shapelets_class):
+    if len(np.array(all_shapelets_class).shape):
+        all_shapelets_class=[all_shapelets_class]
     all_locs = {}
-    try:
+
+    if True:
         for i, s in enumerate([all_sls[dim][j] for j in all_shapelets_class[dim]]):
+
             i_locs = []
             for loc in s:
-                if loc[0] == idx:
+                if True:
+                # TODO not necessary?
+                #if loc[0] == idx:
                     loc = (loc[1], loc[2])
                     i_locs.append(loc)
             all_locs[i] = i_locs
-    except Exception as ex:
-        pass
+
     return all_locs
 
 
 ##Optimize by fitting outside or returning a list of all nns at once
 ## Reworked so that only training data is available.
 def get_nearest_neighbor(knn, instance_x, pred_label, x_train, y_train):
-    # pred_label = y_pred[idx]
     target_labels = np.argwhere(y_train != pred_label)
 
     X_train_knn = instance_x.reshape(1, instance_x.shape[0], instance_x.shape[1])
     X_train_knn = np.swapaxes(X_train_knn, 1, 2)
 
     _, nn = knn.kneighbors(X_train_knn)
-    # print("TARGETLABELS", [t[0] for t in target_labels], [int(nn[0][0])])
     nn_idx = None
     try:
         nn_idx = [t[0] for t in target_labels][int(nn[0][0])]

diff --git a/TSInterpret/InterpretabilityModels/counterfactual/SETSCF.py b/TSInterpret/InterpretabilityModels/counterfactual/SETSCF.py
@@ -52,6 +52,7 @@ def __init__(
         remove_self_similar=True,
         silent=False,
         fit_shapelets=True,
+        le=False
     ) -> None:
         """
         Arguments:
@@ -81,20 +82,24 @@ def __init__(
         train_x, train_y = data
         self.le = LabelEncoder()
         self.train_y = self.le.fit_transform(train_y)
+        self.mode=mode
         if mode == "time":
             # Parse test data into (1, feat, time):
-            change = True
+            change = False
             self.train_x = np.swapaxes(train_x, 2, 1)
             self.ts_len = train_x.shape[1]
         elif mode == "feat":
-            change = False
-            self.train_x = np.array(train_x)
+            change = True
+            self.train_x = train_x
             self.ts_len = train_x.shape[2]
+            #self.train_x = np.swapaxes(train_x, 2, 1)
+        print(self.train_x.shape)
         self.train_x_n = from_3d_numpy_to_nested(self.train_x)
+        print(self.train_x_n.shape)
         if backend == "PYT":
-            self.predict = PyTorchModel(model, change).predict
+            self.predict = PyTorchModel(model, change)
         elif backend == "TF":
-            self.predict = TensorFlowModel(model, change).predict
+            self.predict = TensorFlowModel(model, change)
         elif backend == "SK":
             self.predict = SklearnModel(model, change).predict
         # Fit Shapelet Transform
@@ -183,13 +188,20 @@ def explain(
             target = list(np.unique(self.train_y))
         else:
             target = [target]
+        if self.mode == 'time':
+            x= np.swapaxes(x, -1, -2)
+
+
+        #else: 
+        #    x=np.swapaxes(x,-1,-2)
+
 
         expl, label = sets_explain(
             x,
             target,
             (self.train_x, self.train_y),
             self.st_transformer,
-            self.model,
+            self.predict,
             self.ts_len,
             self.fitted_shapelets,
             self.threshhold,

diff --git a/TSInterpret/__version__.py b/TSInterpret/__version__.py
@@ -1,2 +1,2 @@
-VERSION = (0, 4, 6)
+VERSION = (0, 4, 7)
 __version__ = ".".join(map(str, VERSION))  # noqa: F401