From 77f1ea6c4d16f32fa6bf3589b5932bef594d3b2f Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Mon, 9 Aug 2021 17:25:44 -0400
Subject: [PATCH 01/11] init

---
 deepprofiler/dataset/helper.py | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 deepprofiler/dataset/helper.py

diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py
new file mode 100644
index 0000000..ba50dc0
--- /dev/null
+++ b/deepprofiler/dataset/helper.py
@@ -0,0 +1,3 @@
+"""
+Helper function for data loss
+"""
\ No newline at end of file

From 4bd6fd3e2da6934f570d1a855a0825992a64be97 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Tue, 10 Aug 2021 11:44:39 -0400
Subject: [PATCH 02/11] First very bad version of check profile

---
 deepprofiler/dataset/helper.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py
index ba50dc0..82ed099 100644
--- a/deepprofiler/dataset/helper.py
+++ b/deepprofiler/dataset/helper.py
@@ -1,3 +1,20 @@
 """
-Helper function for data loss
-"""
\ No newline at end of file
+Helper functions for checking images, locations and crops before running profile and train.
+"""
+
+import pandas as pd
+import numpy as np
+import os.path
+
+def check_imgs(ls, image_dir, channels):
+    for img in channels:
+        if not os.path.isfile(os.path.join(image_dir, img)):
+            ls.append(img)
+    return ls
+
+def check_profile(config, dset):
+    index = pd.read_csv('metadata/top20_moa.csv')
+    image_dir = os.path.join(project_dir, 'outputs', 'images')
+    ls = []
+    res = index.apply(lambda row: check_imgs(ls, image_dir, row[dset.channels]), axis=1)
+    return ls

From 6e14bcd55448d1710bd65a11cf67cecb70fd34b7 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Tue, 10 Aug 2021 11:44:57 -0400
Subject: [PATCH 03/11] add check profile to cli

---
 deepprofiler/__main__.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py
index 70ef5b6..fb9608f 100644
--- a/deepprofiler/__main__.py
+++ b/deepprofiler/__main__.py
@@ -192,6 +192,17 @@ def split(context, parts):
         context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
     deepprofiler.dataset.indexing.split_index(context.obj["config"], parts)
 
+# Auxiliary tool: check if images and locations are complete to run profiling functions
+@cli.command()
+@click.pass_context
+@click.option("--check-profile",
+              help="check images and locations before running profile function",
+              type=click.INT)
+def check_profile(context, parts):
+    dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile')
+    deepprofiler.dataset.helper.check_profiling(context.obj["config"], dset)
+    print("checking for profile complete.")
+
 
 if __name__ == "__main__":
     cli(obj={})

From 7a7b16a6821ddce9fb13b1b9e2455614a1b2df62 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Tue, 10 Aug 2021 11:45:06 -0400
Subject: [PATCH 04/11] start test function

---
 tests/deepprofiler/dataset/test_helper.py | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 tests/deepprofiler/dataset/test_helper.py

diff --git a/tests/deepprofiler/dataset/test_helper.py b/tests/deepprofiler/dataset/test_helper.py
new file mode 100644
index 0000000..2ba8a4b
--- /dev/null
+++ b/tests/deepprofiler/dataset/test_helper.py
@@ -0,0 +1,9 @@
+import deepprofiler.dataset.helper
+import sys
+import os
+from io import StringIO
+import multiprocessing
+import shutil
+
+
+# Dont know how to test with CLI input?
\ No newline at end of file

From d642c80f5d3509340f5fa0c5f694689aefbc0f1f Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Tue, 10 Aug 2021 15:46:43 -0400
Subject: [PATCH 05/11] first version. only works locally

---
 deepprofiler/dataset/helper.py            | 96 +++++++++++++++++++++--
 tests/deepprofiler/dataset/test_helper.py | 36 +++++++--
 2 files changed, 120 insertions(+), 12 deletions(-)

diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py
index 82ed099..ada5643 100644
--- a/deepprofiler/dataset/helper.py
+++ b/deepprofiler/dataset/helper.py
@@ -4,17 +4,99 @@
 
 import pandas as pd
 import numpy as np
-import os.path
+import cv2
+import os
 
-def check_imgs(ls, image_dir, channels):
+def imgs_dont_exist(ls, image_dir, channels):
+    """ Adds images to a list if those images are not found.
+
+    Parameters
+    ----------
+    ls : empty list, will be filled with missing images
+    image_dir : directory to the images
+    channels : different channels for each image
+    """
     for img in channels:
         if not os.path.isfile(os.path.join(image_dir, img)):
             ls.append(img)
-    return ls
+    return None
+
+def locs_dont_exist(ls, locs_dir, loc):
+    """ Adds location files to a list if they are missing are not found.
+
+    Parameters
+    ----------
+    ls : empty list, will be filled with missing files
+    locs_dir : directory to the location files
+    """
+    if not os.path.isfile(os.path.join(locs_dir, loc)):
+        ls.append(loc)
+    return None
+
+
+def check_profile(dset):
+    """Checks images and location files to prepare for the profiling function.
+    If this function runs correctly, the function 'profile' will also run without errors.
 
-def check_profile(config, dset):
-    index = pd.read_csv('metadata/top20_moa.csv')
-    image_dir = os.path.join(project_dir, 'outputs', 'images')
+    Parameters
+    ----------
+    config :
+    dset :
+
+    Returns
+    -------
+
+    """
+    project_dir = '/Users/mbornhol/git/DeepProf/DP2'
+    feat_rows = ['DNA','Tubulin','Actin']
+
+    # Checking images
+    index = pd.read_csv('/Users/mbornhol/git/DeepProf/DP2/inputs/metadata/index.csv')
+    image_dir = os.path.join(project_dir, 'inputs', 'images')
     ls = []
-    res = index.apply(lambda row: check_imgs(ls, image_dir, row[dset.channels]), axis=1)
+
+    # use this: row[dset.channels]
+    index.apply(lambda row: imgs_dont_exist(ls, image_dir, row[feat_rows]), axis=1)
+    pd.DataFrame(ls, columns=['missing_images']).to_csv('missing_images.csv', index=False)
+
+
+    # Checking location files
+    # image_dir = os.path.join(project_dir, 'outputs', 'images')
+    # ls = []
+    # index.apply(lambda row: locs_dont_exist(ls, dset.locations, row), axis=1)
+    # pd.DataFrame(ls, columns=['missing_locations']).to_csv('missing_locations.csv', index=False)\
     return ls
+
+
+"""
+Checking all crops before training.
+"""
+
+def crop_checks(ls_missing, ls_zero, img_name, sample_dir):
+    if not os.path.isfile(os.path.join(sample_dir, img_name)):
+        ls_missing.append(img_name)
+    else:
+        img = cv2.imread(os.path.join(sample_dir, img_name), cv2.IMREAD_GRAYSCALE)
+        pos = np.nonzero(img)
+        if len(pos[0]) == 0:
+            ls_zero.append(img_name)
+
+
+def check_training(dset):
+    """Check all crops before training in order to avoid errors during training.
+
+    Returns
+    -------
+
+    """
+    # First check if images exist
+    crops_dir = '/Users/mbornhol/git/DeepProf/DP2/outputs/single-cell-sample'
+    # use dset.sample_directory?
+    df = pd.read_csv(os.path.join(crops_dir, 'sc-metadata.csv'))
+
+    ls_missing = []
+    ls_zero = []
+    res = df.apply(lambda row: crop_checks(ls_missing, ls_zero, row['Image_Name'], crops_dir), axis = 1)
+
+    pd.DataFrame(ls_missing, columns=['missing_crops']).to_csv('missing_crops.csv', index=False)
+    pd.DataFrame(ls_zero, columns=['zero_crops']).to_csv('zero_crops.csv', index=False)
\ No newline at end of file
diff --git a/tests/deepprofiler/dataset/test_helper.py b/tests/deepprofiler/dataset/test_helper.py
index 2ba8a4b..cca21d6 100644
--- a/tests/deepprofiler/dataset/test_helper.py
+++ b/tests/deepprofiler/dataset/test_helper.py
@@ -1,9 +1,35 @@
-import deepprofiler.dataset.helper
 import sys
+import pandas as pd
+import tempfile
 import os
-from io import StringIO
-import multiprocessing
-import shutil
+
+import deepprofiler.dataset.helper
+
+
+# Dont know how to test with CLI input?
+
+tempdir = tempfile.TemporaryFile()
+
+def test_check_profile():
+    dset = []
+    deepprofiler.dataset.helper.check_profile(dset)
+    df = pd.read_csv('missing_images.csv')
+    print('Missing images:')
+    print(df.missing_images.tolist())
+    assert len(df) == 0
+
+
+def test_check_training():
+    dset = []
+    deepprofiler.dataset.helper.check_training(dset)
+    miss_crops = pd.read_csv('missing_crops.csv')
+    print('Missing crops:')
+    print(miss_crops.missing_crops.tolist())
+
+    zero_crops = pd.read_csv('zero_crops.csv')
+    print('Zero crops:')
+    print(zero_crops.zero_crops.tolist())
+    assert len(miss_crops) == 0
+    assert len(zero_crops) == 0
 
 
-# Dont know how to test with CLI input?
\ No newline at end of file

From 78c361399be08e016856f074a671fea8c27b9e8a Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Wed, 11 Aug 2021 16:18:37 -0400
Subject: [PATCH 06/11] Updated Profiling

---
 deepprofiler/__main__.py       | 16 ++++++--
 deepprofiler/dataset/helper.py | 71 ++++++++++++----------------------
 2 files changed, 37 insertions(+), 50 deletions(-)

diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py
index fb9608f..662bf85 100644
--- a/deepprofiler/__main__.py
+++ b/deepprofiler/__main__.py
@@ -15,6 +15,7 @@
 import deepprofiler.dataset.sampling
 import deepprofiler.learning.training
 import deepprofiler.learning.profiling
+import deepprofiler.dataset.helper
 import deepprofiler.download.normalize_bbbc021_metadata
 
 
@@ -195,14 +196,21 @@ def split(context, parts):
 # Auxiliary tool: check if images and locations are complete to run profiling functions
 @cli.command()
 @click.pass_context
-@click.option("--check-profile",
-              help="check images and locations before running profile function",
-              type=click.INT)
-def check_profile(context, parts):
+def check_profile(context):
     dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile')
     deepprofiler.dataset.helper.check_profiling(context.obj["config"], dset)
     print("checking for profile complete.")
 
+# Auxiliary tool: check if crops are complete. Use this before running training
+@cli.command()
+@click.pass_context
+@click.option("--check-train",
+              help="checks if crops are complete. Use this before running training",
+              type=click.INT)
+def check_profile(context):
+    dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile')
+    deepprofiler.dataset.helper.check_training(context.obj["config"], dset)
+    print("checking for train is complete.")
 
 if __name__ == "__main__":
     cli(obj={})
diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py
index ada5643..a23a885 100644
--- a/deepprofiler/dataset/helper.py
+++ b/deepprofiler/dataset/helper.py
@@ -6,35 +6,10 @@
 import numpy as np
 import cv2
 import os
+import deepprofiler.imaging.boxes
 
-def imgs_dont_exist(ls, image_dir, channels):
-    """ Adds images to a list if those images are not found.
 
-    Parameters
-    ----------
-    ls : empty list, will be filled with missing images
-    image_dir : directory to the images
-    channels : different channels for each image
-    """
-    for img in channels:
-        if not os.path.isfile(os.path.join(image_dir, img)):
-            ls.append(img)
-    return None
-
-def locs_dont_exist(ls, locs_dir, loc):
-    """ Adds location files to a list if they are missing are not found.
-
-    Parameters
-    ----------
-    ls : empty list, will be filled with missing files
-    locs_dir : directory to the location files
-    """
-    if not os.path.isfile(os.path.join(locs_dir, loc)):
-        ls.append(loc)
-    return None
-
-
-def check_profile(dset):
+def check_profiling(config, dset):
     """Checks images and location files to prepare for the profiling function.
     If this function runs correctly, the function 'profile' will also run without errors.
 
@@ -47,25 +22,29 @@ def check_profile(dset):
     -------
 
     """
-    project_dir = '/Users/mbornhol/git/DeepProf/DP2'
-    feat_rows = ['DNA','Tubulin','Actin']
-
-    # Checking images
-    index = pd.read_csv('/Users/mbornhol/git/DeepProf/DP2/inputs/metadata/index.csv')
-    image_dir = os.path.join(project_dir, 'inputs', 'images')
-    ls = []
-
-    # use this: row[dset.channels]
-    index.apply(lambda row: imgs_dont_exist(ls, image_dir, row[feat_rows]), axis=1)
-    pd.DataFrame(ls, columns=['missing_images']).to_csv('missing_images.csv', index=False)
-
-
-    # Checking location files
-    # image_dir = os.path.join(project_dir, 'outputs', 'images')
-    # ls = []
-    # index.apply(lambda row: locs_dont_exist(ls, dset.locations, row), axis=1)
-    # pd.DataFrame(ls, columns=['missing_locations']).to_csv('missing_locations.csv', index=False)\
-    return ls
+    ls_imgs, ls_locs = [], []
+    os.makedirs('checks', exist_ok=True)
+
+    frame = dset.meta.data.iterrows()
+    images = [dset.get_image_paths(r) for i, r in frame]
+    for channels in images:
+        for img in channels[1]:
+            if not os.path.isfile(img):
+                ls_imgs.append(img)
+    print('found {} missing images'.format(len(ls_imgs)), '|| saving list of missing files to checks/')
+    pd.DataFrame(ls_imgs, columns=['missing_images']).to_csv('checks/missing_images.csv', index=False)
+
+    # start checking location files
+    frame = dset.meta.data.iterrows()
+    for i, r in frame:
+        df = deepprofiler.imaging.boxes.get_single_cell_locations("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]), dset.config)
+        if df.empty:
+            ls_locs.append("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]))
+
+    print('found {} missing location files'.format(len(ls_locs)), '|| saving list of missing files to checks/')
+    pd.DataFrame(ls_locs, columns=['missing_locs']).to_csv('checks/missing_locs.csv', index=False)
+
+    return ls_imgs, ls_locs
 
 
 """

From 132a4599e8d0221c6216c549d52632b7c7cc819f Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Thu, 12 Aug 2021 15:15:44 -0400
Subject: [PATCH 07/11] solve check_profile

---
 deepprofiler/__main__.py       | 12 +++++-------
 deepprofiler/dataset/helper.py |  9 ++++-----
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py
index 662bf85..3071ff9 100644
--- a/deepprofiler/__main__.py
+++ b/deepprofiler/__main__.py
@@ -198,18 +198,16 @@ def split(context, parts):
 @click.pass_context
 def check_profile(context):
     dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile')
-    deepprofiler.dataset.helper.check_profiling(context.obj["config"], dset)
+    deepprofiler.dataset.helper.check_profile(dset)
     print("checking for profile complete.")
 
 # Auxiliary tool: check if crops are complete. Use this before running training
+
 @cli.command()
 @click.pass_context
-@click.option("--check-train",
-              help="checks if crops are complete. Use this before running training",
-              type=click.INT)
-def check_profile(context):
-    dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile')
-    deepprofiler.dataset.helper.check_training(context.obj["config"], dset)
+def check_train(context):
+    dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='train')
+    deepprofiler.dataset.helper.check_train(dset)
     print("checking for train is complete.")
 
 if __name__ == "__main__":
diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py
index a23a885..00a509b 100644
--- a/deepprofiler/dataset/helper.py
+++ b/deepprofiler/dataset/helper.py
@@ -9,7 +9,7 @@
 import deepprofiler.imaging.boxes
 
 
-def check_profiling(config, dset):
+def check_profile(dset):
     """Checks images and location files to prepare for the profiling function.
     If this function runs correctly, the function 'profile' will also run without errors.
 
@@ -61,17 +61,16 @@ def crop_checks(ls_missing, ls_zero, img_name, sample_dir):
             ls_zero.append(img_name)
 
 
-def check_training(dset):
+def check_train(dset):
     """Check all crops before training in order to avoid errors during training.
 
     Returns
     -------
 
     """
+    # print(dset.meta.data.columns)
     # First check if images exist
-    crops_dir = '/Users/mbornhol/git/DeepProf/DP2/outputs/single-cell-sample'
-    # use dset.sample_directory?
-    df = pd.read_csv(os.path.join(crops_dir, 'sc-metadata.csv'))
+    df = # read sc-metadata file
 
     ls_missing = []
     ls_zero = []

From 692b4b364cdf8e0fa928f24671c0a8575a528147 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Tue, 17 Aug 2021 12:03:00 -0400
Subject: [PATCH 08/11] get helper ready

---
 deepprofiler/__main__.py       |  2 +-
 deepprofiler/dataset/helper.py | 33 ++++++++++++++++++++-------------
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py
index 3071ff9..a8110cf 100644
--- a/deepprofiler/__main__.py
+++ b/deepprofiler/__main__.py
@@ -207,7 +207,7 @@ def check_profile(context):
 @click.pass_context
 def check_train(context):
     dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='train')
-    deepprofiler.dataset.helper.check_train(dset)
+    deepprofiler.dataset.helper.check_train(context.obj["config"], dset)
     print("checking for train is complete.")
 
 if __name__ == "__main__":
diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py
index 00a509b..2649767 100644
--- a/deepprofiler/dataset/helper.py
+++ b/deepprofiler/dataset/helper.py
@@ -7,7 +7,8 @@
 import cv2
 import os
 import deepprofiler.imaging.boxes
-
+import plugins.crop_generators.sampled_crop_generator
+import tensorflow as tf
 
 def check_profile(dset):
     """Checks images and location files to prepare for the profiling function.
@@ -27,11 +28,13 @@ def check_profile(dset):
 
     frame = dset.meta.data.iterrows()
     images = [dset.get_image_paths(r) for i, r in frame]
+    # print(images)
+    print('rand image',images[0])
     for channels in images:
         for img in channels[1]:
             if not os.path.isfile(img):
                 ls_imgs.append(img)
-    print('found {} missing images'.format(len(ls_imgs)), '|| saving list of missing files to checks/')
+    print('>>> found {} missing images'.format(len(ls_imgs)), '|| saving list of missing files to checks/')
     pd.DataFrame(ls_imgs, columns=['missing_images']).to_csv('checks/missing_images.csv', index=False)
 
     # start checking location files
@@ -41,7 +44,7 @@ def check_profile(dset):
         if df.empty:
             ls_locs.append("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]))
 
-    print('found {} missing location files'.format(len(ls_locs)), '|| saving list of missing files to checks/')
+    print('>>> found {} missing location files'.format(len(ls_locs)), '|| saving list of missing files to checks/')
     pd.DataFrame(ls_locs, columns=['missing_locs']).to_csv('checks/missing_locs.csv', index=False)
 
     return ls_imgs, ls_locs
@@ -51,30 +54,34 @@ def check_profile(dset):
 Checking all crops before training.
 """
 
-def crop_checks(ls_missing, ls_zero, img_name, sample_dir):
-    if not os.path.isfile(os.path.join(sample_dir, img_name)):
+def crop_checks(ls_missing, ls_zero, img_name):
+    if not os.path.isfile(img_name):
         ls_missing.append(img_name)
     else:
-        img = cv2.imread(os.path.join(sample_dir, img_name), cv2.IMREAD_GRAYSCALE)
+        img = cv2.imread(img_name, cv2.IMREAD_GRAYSCALE)
         pos = np.nonzero(img)
         if len(pos[0]) == 0:
             ls_zero.append(img_name)
 
 
-def check_train(dset):
+def check_train(config, dset):
     """Check all crops before training in order to avoid errors during training.
 
     Returns
     -------
 
     """
-    # print(dset.meta.data.columns)
-    # First check if images exist
-    df = # read sc-metadata file
+    os.makedirs('checks', exist_ok=True)
+
+    crop_generator = plugins.crop_generators.sampled_crop_generator.GeneratorClass(config, dset)
+    sess = tf.compat.v1.Session()
+    crop_generator.start(sess)
+    df = crop_generator.samples
 
-    ls_missing = []
-    ls_zero = []
-    res = df.apply(lambda row: crop_checks(ls_missing, ls_zero, row['Image_Name'], crops_dir), axis = 1)
+    ls_missing, ls_zero = [], []
+    res = df.apply(lambda row: crop_checks(ls_missing, ls_zero, os.path.join(config["paths"]["single_cell_sample"], row['Image_Name'])), axis = 1)
 
+    print('>>> found {} missing crops'.format(len(ls_missing)), '|| saving list of missing crops to checks/')
     pd.DataFrame(ls_missing, columns=['missing_crops']).to_csv('missing_crops.csv', index=False)
+    print('>>> found {} crops with zero values'.format(len(ls_zero)), '|| saving list of zero crops to checks/')
     pd.DataFrame(ls_zero, columns=['zero_crops']).to_csv('zero_crops.csv', index=False)
\ No newline at end of file

From 3e493b4bf8c45e3e7fe0da76e0ed1c9fe8ee98c2 Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Wed, 18 Aug 2021 16:36:00 -0400
Subject: [PATCH 09/11] delete the test file. Not needed

---
 .gitignore                                |  1 +
 tests/deepprofiler/dataset/test_helper.py | 35 -----------------------
 2 files changed, 1 insertion(+), 35 deletions(-)
 delete mode 100644 tests/deepprofiler/dataset/test_helper.py

diff --git a/.gitignore b/.gitignore
index 69da5e5..9f16664 100644
--- a/.gitignore
+++ b/.gitignore
@@ -95,3 +95,4 @@ ENV/
 .ropeproject
 
 .idea
+/tests/files/test_data/
diff --git a/tests/deepprofiler/dataset/test_helper.py b/tests/deepprofiler/dataset/test_helper.py
deleted file mode 100644
index cca21d6..0000000
--- a/tests/deepprofiler/dataset/test_helper.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import sys
-import pandas as pd
-import tempfile
-import os
-
-import deepprofiler.dataset.helper
-
-
-# Dont know how to test with CLI input?
-
-tempdir = tempfile.TemporaryFile()
-
-def test_check_profile():
-    dset = []
-    deepprofiler.dataset.helper.check_profile(dset)
-    df = pd.read_csv('missing_images.csv')
-    print('Missing images:')
-    print(df.missing_images.tolist())
-    assert len(df) == 0
-
-
-def test_check_training():
-    dset = []
-    deepprofiler.dataset.helper.check_training(dset)
-    miss_crops = pd.read_csv('missing_crops.csv')
-    print('Missing crops:')
-    print(miss_crops.missing_crops.tolist())
-
-    zero_crops = pd.read_csv('zero_crops.csv')
-    print('Zero crops:')
-    print(zero_crops.zero_crops.tolist())
-    assert len(miss_crops) == 0
-    assert len(zero_crops) == 0
-
-

From d596c61f770299a0f8e037135455224515a65b3b Mon Sep 17 00:00:00 2001
From: michaelbornholdt <michael.bornholdt@outlook.com>
Date: Wed, 18 Aug 2021 17:48:01 -0400
Subject: [PATCH 10/11] finalize functions

---
 deepprofiler/__main__.py       |   3 +-
 deepprofiler/dataset/helper.py | 108 +++++++++++++++++++++++++--------
 2 files changed, 84 insertions(+), 27 deletions(-)

diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py
index a8110cf..d87a12d 100644
--- a/deepprofiler/__main__.py
+++ b/deepprofiler/__main__.py
@@ -193,6 +193,7 @@ def split(context, parts):
         context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
     deepprofiler.dataset.indexing.split_index(context.obj["config"], parts)
 
+
 # Auxiliary tool: check if images and locations are complete to run profiling functions
 @cli.command()
 @click.pass_context
@@ -201,8 +202,8 @@ def check_profile(context):
     deepprofiler.dataset.helper.check_profile(dset)
     print("checking for profile complete.")
 
-# Auxiliary tool: check if crops are complete. Use this before running training
 
+# Auxiliary tool: check if crops are complete. Use this before running training
 @cli.command()
 @click.pass_context
 def check_train(context):
diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py
index 2649767..95b6953 100644
--- a/deepprofiler/dataset/helper.py
+++ b/deepprofiler/dataset/helper.py
@@ -6,55 +6,84 @@
 import numpy as np
 import cv2
 import os
+import tensorflow as tf
+
 import deepprofiler.imaging.boxes
 import plugins.crop_generators.sampled_crop_generator
-import tensorflow as tf
+
 
 def check_profile(dset):
     """Checks images and location files to prepare for the profiling function.
     If this function runs correctly, the function 'profile' will also run without errors.
+    The names of the missing files are saved in two different files.
 
     Parameters
     ----------
-    config :
-    dset :
+    dset : Data structure with metadata and location files
 
     Returns
     -------
+    ls_imgs : list of missing images
+    ls_locs : list of missing location files
 
     """
     ls_imgs, ls_locs = [], []
-    os.makedirs('checks', exist_ok=True)
+    os.makedirs("checks", exist_ok=True)
 
+    # start checking image files
     frame = dset.meta.data.iterrows()
     images = [dset.get_image_paths(r) for i, r in frame]
-    # print(images)
-    print('rand image',images[0])
+
     for channels in images:
         for img in channels[1]:
             if not os.path.isfile(img):
                 ls_imgs.append(img)
-    print('>>> found {} missing images'.format(len(ls_imgs)), '|| saving list of missing files to checks/')
-    pd.DataFrame(ls_imgs, columns=['missing_images']).to_csv('checks/missing_images.csv', index=False)
+    print(
+        ">>> found {} missing images".format(len(ls_imgs)),
+        "|| saving list of missing files to checks/missing_images.csv",
+    )
+    pd.DataFrame(ls_imgs, columns=["missing_images"]).to_csv(
+        "checks/missing_images.csv", index=False
+    )
 
     # start checking location files
     frame = dset.meta.data.iterrows()
     for i, r in frame:
-        df = deepprofiler.imaging.boxes.get_single_cell_locations("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]), dset.config)
+        df = deepprofiler.imaging.boxes.get_single_cell_locations(
+            "{}/{}-{}".format(
+                r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]
+            ),
+            dset.config,
+        )
         if df.empty:
-            ls_locs.append("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]))
-
-    print('>>> found {} missing location files'.format(len(ls_locs)), '|| saving list of missing files to checks/')
-    pd.DataFrame(ls_locs, columns=['missing_locs']).to_csv('checks/missing_locs.csv', index=False)
+            ls_locs.append(
+                "{}/{}-{}".format(
+                    r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]
+                )
+            )
+
+    print(
+        ">>> found {} missing location files".format(len(ls_locs)),
+        "|| saving list of missing files to checks/missing_locs.csv",
+    )
+    pd.DataFrame(ls_locs, columns=["missing_locs"]).to_csv(
+        "checks/missing_locs.csv", index=False
+    )
 
     return ls_imgs, ls_locs
 
 
-"""
-Checking all crops before training.
-"""
+def crop_checks(img_name, ls_missing, ls_zero):
+    """Utility function for check_train to check images for existence and non-zero values.
+    Parameters
+    ----------
+    img_name : crop image name
+
+    Returns
+    -------
+    ls_missing, ls_zero : lists detailing the missing crops and the zero crops
 
-def crop_checks(ls_missing, ls_zero, img_name):
+    """
     if not os.path.isfile(img_name):
         ls_missing.append(img_name)
     else:
@@ -65,23 +94,50 @@ def crop_checks(ls_missing, ls_zero, img_name):
 
 
 def check_train(config, dset):
-    """Check all crops before training in order to avoid errors during training.
+    """Checks if the data is ready for training by checking if the crops are sampled correctly.
+    Missing and zero crops are saved into two files.
+    Parameters
+    ----------
+    config : config input
+    dset : Data structure with metadata
 
     Returns
     -------
+    ls_missing, ls_zero : lists of missing and zero crops
 
     """
-    os.makedirs('checks', exist_ok=True)
+    os.makedirs("checks", exist_ok=True)
 
-    crop_generator = plugins.crop_generators.sampled_crop_generator.GeneratorClass(config, dset)
+    crop_generator = plugins.crop_generators.sampled_crop_generator.GeneratorClass(
+        config, dset
+    )
     sess = tf.compat.v1.Session()
     crop_generator.start(sess)
     df = crop_generator.samples
 
     ls_missing, ls_zero = [], []
-    res = df.apply(lambda row: crop_checks(ls_missing, ls_zero, os.path.join(config["paths"]["single_cell_sample"], row['Image_Name'])), axis = 1)
-
-    print('>>> found {} missing crops'.format(len(ls_missing)), '|| saving list of missing crops to checks/')
-    pd.DataFrame(ls_missing, columns=['missing_crops']).to_csv('missing_crops.csv', index=False)
-    print('>>> found {} crops with zero values'.format(len(ls_zero)), '|| saving list of zero crops to checks/')
-    pd.DataFrame(ls_zero, columns=['zero_crops']).to_csv('zero_crops.csv', index=False)
\ No newline at end of file
+    res = df.apply(
+        lambda row: crop_checks(
+            os.path.join(config["paths"]["single_cell_sample"], row["Image_Name"]),
+            ls_missing,
+            ls_zero,
+        ),
+        axis=1,
+    )
+
+    print(
+        ">>> found {} missing crops".format(len(ls_missing)),
+        "|| saving list of missing crops to checks/missing_crops.csv",
+    )
+    pd.DataFrame(ls_missing, columns=["missing_crops"]).to_csv(
+        "checks/missing_crops.csv", index=False
+    )
+    print(
+        ">>> found {} crops with zero values".format(len(ls_zero)),
+        "|| saving list of zero crops to checks/missing_crops.csv",
+    )
+    pd.DataFrame(ls_zero, columns=["zero_crops"]).to_csv(
+        "checks/zero_crops.csv", index=False
+    )
+
+    return ls_missing, ls_zero

From 24a233e44ee5ea85002faef1a7263f6ba11c5797 Mon Sep 17 00:00:00 2001
From: Michael Bornholdt <56402523+michaelbornholdt@users.noreply.github.com>
Date: Wed, 18 Aug 2021 23:51:07 +0200
Subject: [PATCH 11/11] Update .gitignore

---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 9f16664..69da5e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -95,4 +95,3 @@ ENV/
 .ropeproject
 
 .idea
-/tests/files/test_data/