From 77f1ea6c4d16f32fa6bf3589b5932bef594d3b2f Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Mon, 9 Aug 2021 17:25:44 -0400 Subject: [PATCH 01/11] init --- deepprofiler/dataset/helper.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 deepprofiler/dataset/helper.py diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py new file mode 100644 index 0000000..ba50dc0 --- /dev/null +++ b/deepprofiler/dataset/helper.py @@ -0,0 +1,3 @@ +""" +Helper function for data loss +""" \ No newline at end of file From 4bd6fd3e2da6934f570d1a855a0825992a64be97 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Tue, 10 Aug 2021 11:44:39 -0400 Subject: [PATCH 02/11] First very bad version of check profile --- deepprofiler/dataset/helper.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py index ba50dc0..82ed099 100644 --- a/deepprofiler/dataset/helper.py +++ b/deepprofiler/dataset/helper.py @@ -1,3 +1,20 @@ """ -Helper function for data loss -""" \ No newline at end of file +Helper functions for checking images, locations and crops before running profile and train. +""" + +import pandas as pd +import numpy as np +import os.path + +def check_imgs(ls, image_dir, channels): + for img in channels: + if not os.path.isfile(os.path.join(image_dir, img)): + ls.append(img) + return ls + +def check_profile(config, dset): + index = pd.read_csv('metadata/top20_moa.csv') + image_dir = os.path.join(project_dir, 'outputs', 'images') + ls = [] + res = index.apply(lambda row: check_imgs(ls, image_dir, row[dset.channels]), axis=1) + return ls From 6e14bcd55448d1710bd65a11cf67cecb70fd34b7 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Tue, 10 Aug 2021 11:44:57 -0400 Subject: [PATCH 03/11] add check profile to cli --- deepprofiler/__main__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py index 70ef5b6..fb9608f 100644 --- a/deepprofiler/__main__.py +++ b/deepprofiler/__main__.py @@ -192,6 +192,17 @@ def split(context, parts): context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"] deepprofiler.dataset.indexing.split_index(context.obj["config"], parts) +# Auxiliary tool: check if images and locations are complete to run profiling functions +@cli.command() +@click.pass_context +@click.option("--check-profile", + help="check images and locations before running profile function", + type=click.INT) +def check_profile(context, parts): + dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile') + deepprofiler.dataset.helper.check_profiling(context.obj["config"], dset) + print("checking for profile complete.") + if __name__ == "__main__": cli(obj={}) From 7a7b16a6821ddce9fb13b1b9e2455614a1b2df62 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Tue, 10 Aug 2021 11:45:06 -0400 Subject: [PATCH 04/11] start test function --- tests/deepprofiler/dataset/test_helper.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tests/deepprofiler/dataset/test_helper.py diff --git a/tests/deepprofiler/dataset/test_helper.py b/tests/deepprofiler/dataset/test_helper.py new file mode 100644 index 0000000..2ba8a4b --- /dev/null +++ b/tests/deepprofiler/dataset/test_helper.py @@ -0,0 +1,9 @@ +import deepprofiler.dataset.helper +import sys +import os +from io import StringIO +import multiprocessing +import shutil + + +# Dont know how to test with CLI input? \ No newline at end of file From d642c80f5d3509340f5fa0c5f694689aefbc0f1f Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Tue, 10 Aug 2021 15:46:43 -0400 Subject: [PATCH 05/11] first version. only works locally --- deepprofiler/dataset/helper.py | 96 +++++++++++++++++++++-- tests/deepprofiler/dataset/test_helper.py | 36 +++++++-- 2 files changed, 120 insertions(+), 12 deletions(-) diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py index 82ed099..ada5643 100644 --- a/deepprofiler/dataset/helper.py +++ b/deepprofiler/dataset/helper.py @@ -4,17 +4,99 @@ import pandas as pd import numpy as np -import os.path +import cv2 +import os -def check_imgs(ls, image_dir, channels): +def imgs_dont_exist(ls, image_dir, channels): + """ Adds images to a list if those images are not found. + + Parameters + ---------- + ls : empty list, will be filled with missing images + image_dir : directory to the images + channels : different channels for each image + """ for img in channels: if not os.path.isfile(os.path.join(image_dir, img)): ls.append(img) - return ls + return None + +def locs_dont_exist(ls, locs_dir, loc): + """ Adds location files to a list if they are missing are not found. + + Parameters + ---------- + ls : empty list, will be filled with missing files + locs_dir : directory to the location files + """ + if not os.path.isfile(os.path.join(locs_dir, loc)): + ls.append(loc) + return None + + +def check_profile(dset): + """Checks images and location files to prepare for the profiling function. + If this function runs correctly, the function 'profile' will also run without errors. -def check_profile(config, dset): - index = pd.read_csv('metadata/top20_moa.csv') - image_dir = os.path.join(project_dir, 'outputs', 'images') + Parameters + ---------- + config : + dset : + + Returns + ------- + + """ + project_dir = '/Users/mbornhol/git/DeepProf/DP2' + feat_rows = ['DNA','Tubulin','Actin'] + + # Checking images + index = pd.read_csv('/Users/mbornhol/git/DeepProf/DP2/inputs/metadata/index.csv') + image_dir = os.path.join(project_dir, 'inputs', 'images') ls = [] - res = index.apply(lambda row: check_imgs(ls, image_dir, row[dset.channels]), axis=1) + + # use this: row[dset.channels] + index.apply(lambda row: imgs_dont_exist(ls, image_dir, row[feat_rows]), axis=1) + pd.DataFrame(ls, columns=['missing_images']).to_csv('missing_images.csv', index=False) + + + # Checking location files + # image_dir = os.path.join(project_dir, 'outputs', 'images') + # ls = [] + # index.apply(lambda row: locs_dont_exist(ls, dset.locations, row), axis=1) + # pd.DataFrame(ls, columns=['missing_locations']).to_csv('missing_locations.csv', index=False)\ return ls + + +""" +Checking all crops before training. +""" + +def crop_checks(ls_missing, ls_zero, img_name, sample_dir): + if not os.path.isfile(os.path.join(sample_dir, img_name)): + ls_missing.append(img_name) + else: + img = cv2.imread(os.path.join(sample_dir, img_name), cv2.IMREAD_GRAYSCALE) + pos = np.nonzero(img) + if len(pos[0]) == 0: + ls_zero.append(img_name) + + +def check_training(dset): + """Check all crops before training in order to avoid errors during training. + + Returns + ------- + + """ + # First check if images exist + crops_dir = '/Users/mbornhol/git/DeepProf/DP2/outputs/single-cell-sample' + # use dset.sample_directory? + df = pd.read_csv(os.path.join(crops_dir, 'sc-metadata.csv')) + + ls_missing = [] + ls_zero = [] + res = df.apply(lambda row: crop_checks(ls_missing, ls_zero, row['Image_Name'], crops_dir), axis = 1) + + pd.DataFrame(ls_missing, columns=['missing_crops']).to_csv('missing_crops.csv', index=False) + pd.DataFrame(ls_zero, columns=['zero_crops']).to_csv('zero_crops.csv', index=False) \ No newline at end of file diff --git a/tests/deepprofiler/dataset/test_helper.py b/tests/deepprofiler/dataset/test_helper.py index 2ba8a4b..cca21d6 100644 --- a/tests/deepprofiler/dataset/test_helper.py +++ b/tests/deepprofiler/dataset/test_helper.py @@ -1,9 +1,35 @@ -import deepprofiler.dataset.helper import sys +import pandas as pd +import tempfile import os -from io import StringIO -import multiprocessing -import shutil + +import deepprofiler.dataset.helper + + +# Dont know how to test with CLI input? + +tempdir = tempfile.TemporaryFile() + +def test_check_profile(): + dset = [] + deepprofiler.dataset.helper.check_profile(dset) + df = pd.read_csv('missing_images.csv') + print('Missing images:') + print(df.missing_images.tolist()) + assert len(df) == 0 + + +def test_check_training(): + dset = [] + deepprofiler.dataset.helper.check_training(dset) + miss_crops = pd.read_csv('missing_crops.csv') + print('Missing crops:') + print(miss_crops.missing_crops.tolist()) + + zero_crops = pd.read_csv('zero_crops.csv') + print('Zero crops:') + print(zero_crops.zero_crops.tolist()) + assert len(miss_crops) == 0 + assert len(zero_crops) == 0 -# Dont know how to test with CLI input? \ No newline at end of file From 78c361399be08e016856f074a671fea8c27b9e8a Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Wed, 11 Aug 2021 16:18:37 -0400 Subject: [PATCH 06/11] Updated Profiling --- deepprofiler/__main__.py | 16 ++++++-- deepprofiler/dataset/helper.py | 71 ++++++++++++---------------------- 2 files changed, 37 insertions(+), 50 deletions(-) diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py index fb9608f..662bf85 100644 --- a/deepprofiler/__main__.py +++ b/deepprofiler/__main__.py @@ -15,6 +15,7 @@ import deepprofiler.dataset.sampling import deepprofiler.learning.training import deepprofiler.learning.profiling +import deepprofiler.dataset.helper import deepprofiler.download.normalize_bbbc021_metadata @@ -195,14 +196,21 @@ def split(context, parts): # Auxiliary tool: check if images and locations are complete to run profiling functions @cli.command() @click.pass_context -@click.option("--check-profile", - help="check images and locations before running profile function", - type=click.INT) -def check_profile(context, parts): +def check_profile(context): dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile') deepprofiler.dataset.helper.check_profiling(context.obj["config"], dset) print("checking for profile complete.") +# Auxiliary tool: check if crops are complete. Use this before running training +@cli.command() +@click.pass_context +@click.option("--check-train", + help="checks if crops are complete. Use this before running training", + type=click.INT) +def check_profile(context): + dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile') + deepprofiler.dataset.helper.check_training(context.obj["config"], dset) + print("checking for train is complete.") if __name__ == "__main__": cli(obj={}) diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py index ada5643..a23a885 100644 --- a/deepprofiler/dataset/helper.py +++ b/deepprofiler/dataset/helper.py @@ -6,35 +6,10 @@ import numpy as np import cv2 import os +import deepprofiler.imaging.boxes -def imgs_dont_exist(ls, image_dir, channels): - """ Adds images to a list if those images are not found. - Parameters - ---------- - ls : empty list, will be filled with missing images - image_dir : directory to the images - channels : different channels for each image - """ - for img in channels: - if not os.path.isfile(os.path.join(image_dir, img)): - ls.append(img) - return None - -def locs_dont_exist(ls, locs_dir, loc): - """ Adds location files to a list if they are missing are not found. - - Parameters - ---------- - ls : empty list, will be filled with missing files - locs_dir : directory to the location files - """ - if not os.path.isfile(os.path.join(locs_dir, loc)): - ls.append(loc) - return None - - -def check_profile(dset): +def check_profiling(config, dset): """Checks images and location files to prepare for the profiling function. If this function runs correctly, the function 'profile' will also run without errors. @@ -47,25 +22,29 @@ def check_profile(dset): ------- """ - project_dir = '/Users/mbornhol/git/DeepProf/DP2' - feat_rows = ['DNA','Tubulin','Actin'] - - # Checking images - index = pd.read_csv('/Users/mbornhol/git/DeepProf/DP2/inputs/metadata/index.csv') - image_dir = os.path.join(project_dir, 'inputs', 'images') - ls = [] - - # use this: row[dset.channels] - index.apply(lambda row: imgs_dont_exist(ls, image_dir, row[feat_rows]), axis=1) - pd.DataFrame(ls, columns=['missing_images']).to_csv('missing_images.csv', index=False) - - - # Checking location files - # image_dir = os.path.join(project_dir, 'outputs', 'images') - # ls = [] - # index.apply(lambda row: locs_dont_exist(ls, dset.locations, row), axis=1) - # pd.DataFrame(ls, columns=['missing_locations']).to_csv('missing_locations.csv', index=False)\ - return ls + ls_imgs, ls_locs = [], [] + os.makedirs('checks', exist_ok=True) + + frame = dset.meta.data.iterrows() + images = [dset.get_image_paths(r) for i, r in frame] + for channels in images: + for img in channels[1]: + if not os.path.isfile(img): + ls_imgs.append(img) + print('found {} missing images'.format(len(ls_imgs)), '|| saving list of missing files to checks/') + pd.DataFrame(ls_imgs, columns=['missing_images']).to_csv('checks/missing_images.csv', index=False) + + # start checking location files + frame = dset.meta.data.iterrows() + for i, r in frame: + df = deepprofiler.imaging.boxes.get_single_cell_locations("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]), dset.config) + if df.empty: + ls_locs.append("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"])) + + print('found {} missing location files'.format(len(ls_locs)), '|| saving list of missing files to checks/') + pd.DataFrame(ls_locs, columns=['missing_locs']).to_csv('checks/missing_locs.csv', index=False) + + return ls_imgs, ls_locs """ From 132a4599e8d0221c6216c549d52632b7c7cc819f Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Thu, 12 Aug 2021 15:15:44 -0400 Subject: [PATCH 07/11] solve check_profile --- deepprofiler/__main__.py | 12 +++++------- deepprofiler/dataset/helper.py | 9 ++++----- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py index 662bf85..3071ff9 100644 --- a/deepprofiler/__main__.py +++ b/deepprofiler/__main__.py @@ -198,18 +198,16 @@ def split(context, parts): @click.pass_context def check_profile(context): dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile') - deepprofiler.dataset.helper.check_profiling(context.obj["config"], dset) + deepprofiler.dataset.helper.check_profile(dset) print("checking for profile complete.") # Auxiliary tool: check if crops are complete. Use this before running training + @cli.command() @click.pass_context -@click.option("--check-train", - help="checks if crops are complete. Use this before running training", - type=click.INT) -def check_profile(context): - dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile') - deepprofiler.dataset.helper.check_training(context.obj["config"], dset) +def check_train(context): + dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='train') + deepprofiler.dataset.helper.check_train(dset) print("checking for train is complete.") if __name__ == "__main__": diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py index a23a885..00a509b 100644 --- a/deepprofiler/dataset/helper.py +++ b/deepprofiler/dataset/helper.py @@ -9,7 +9,7 @@ import deepprofiler.imaging.boxes -def check_profiling(config, dset): +def check_profile(dset): """Checks images and location files to prepare for the profiling function. If this function runs correctly, the function 'profile' will also run without errors. @@ -61,17 +61,16 @@ def crop_checks(ls_missing, ls_zero, img_name, sample_dir): ls_zero.append(img_name) -def check_training(dset): +def check_train(dset): """Check all crops before training in order to avoid errors during training. Returns ------- """ + # print(dset.meta.data.columns) # First check if images exist - crops_dir = '/Users/mbornhol/git/DeepProf/DP2/outputs/single-cell-sample' - # use dset.sample_directory? - df = pd.read_csv(os.path.join(crops_dir, 'sc-metadata.csv')) + df = # read sc-metadata file ls_missing = [] ls_zero = [] From 692b4b364cdf8e0fa928f24671c0a8575a528147 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Tue, 17 Aug 2021 12:03:00 -0400 Subject: [PATCH 08/11] get helper ready --- deepprofiler/__main__.py | 2 +- deepprofiler/dataset/helper.py | 33 ++++++++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py index 3071ff9..a8110cf 100644 --- a/deepprofiler/__main__.py +++ b/deepprofiler/__main__.py @@ -207,7 +207,7 @@ def check_profile(context): @click.pass_context def check_train(context): dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='train') - deepprofiler.dataset.helper.check_train(dset) + deepprofiler.dataset.helper.check_train(context.obj["config"], dset) print("checking for train is complete.") if __name__ == "__main__": diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py index 00a509b..2649767 100644 --- a/deepprofiler/dataset/helper.py +++ b/deepprofiler/dataset/helper.py @@ -7,7 +7,8 @@ import cv2 import os import deepprofiler.imaging.boxes - +import plugins.crop_generators.sampled_crop_generator +import tensorflow as tf def check_profile(dset): """Checks images and location files to prepare for the profiling function. @@ -27,11 +28,13 @@ def check_profile(dset): frame = dset.meta.data.iterrows() images = [dset.get_image_paths(r) for i, r in frame] + # print(images) + print('rand image',images[0]) for channels in images: for img in channels[1]: if not os.path.isfile(img): ls_imgs.append(img) - print('found {} missing images'.format(len(ls_imgs)), '|| saving list of missing files to checks/') + print('>>> found {} missing images'.format(len(ls_imgs)), '|| saving list of missing files to checks/') pd.DataFrame(ls_imgs, columns=['missing_images']).to_csv('checks/missing_images.csv', index=False) # start checking location files @@ -41,7 +44,7 @@ def check_profile(dset): if df.empty: ls_locs.append("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"])) - print('found {} missing location files'.format(len(ls_locs)), '|| saving list of missing files to checks/') + print('>>> found {} missing location files'.format(len(ls_locs)), '|| saving list of missing files to checks/') pd.DataFrame(ls_locs, columns=['missing_locs']).to_csv('checks/missing_locs.csv', index=False) return ls_imgs, ls_locs @@ -51,30 +54,34 @@ def check_profile(dset): Checking all crops before training. """ -def crop_checks(ls_missing, ls_zero, img_name, sample_dir): - if not os.path.isfile(os.path.join(sample_dir, img_name)): +def crop_checks(ls_missing, ls_zero, img_name): + if not os.path.isfile(img_name): ls_missing.append(img_name) else: - img = cv2.imread(os.path.join(sample_dir, img_name), cv2.IMREAD_GRAYSCALE) + img = cv2.imread(img_name, cv2.IMREAD_GRAYSCALE) pos = np.nonzero(img) if len(pos[0]) == 0: ls_zero.append(img_name) -def check_train(dset): +def check_train(config, dset): """Check all crops before training in order to avoid errors during training. Returns ------- """ - # print(dset.meta.data.columns) - # First check if images exist - df = # read sc-metadata file + os.makedirs('checks', exist_ok=True) + + crop_generator = plugins.crop_generators.sampled_crop_generator.GeneratorClass(config, dset) + sess = tf.compat.v1.Session() + crop_generator.start(sess) + df = crop_generator.samples - ls_missing = [] - ls_zero = [] - res = df.apply(lambda row: crop_checks(ls_missing, ls_zero, row['Image_Name'], crops_dir), axis = 1) + ls_missing, ls_zero = [], [] + res = df.apply(lambda row: crop_checks(ls_missing, ls_zero, os.path.join(config["paths"]["single_cell_sample"], row['Image_Name'])), axis = 1) + print('>>> found {} missing crops'.format(len(ls_missing)), '|| saving list of missing crops to checks/') pd.DataFrame(ls_missing, columns=['missing_crops']).to_csv('missing_crops.csv', index=False) + print('>>> found {} crops with zero values'.format(len(ls_zero)), '|| saving list of zero crops to checks/') pd.DataFrame(ls_zero, columns=['zero_crops']).to_csv('zero_crops.csv', index=False) \ No newline at end of file From 3e493b4bf8c45e3e7fe0da76e0ed1c9fe8ee98c2 Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Wed, 18 Aug 2021 16:36:00 -0400 Subject: [PATCH 09/11] delete the test file. Not needed --- .gitignore | 1 + tests/deepprofiler/dataset/test_helper.py | 35 ----------------------- 2 files changed, 1 insertion(+), 35 deletions(-) delete mode 100644 tests/deepprofiler/dataset/test_helper.py diff --git a/.gitignore b/.gitignore index 69da5e5..9f16664 100644 --- a/.gitignore +++ b/.gitignore @@ -95,3 +95,4 @@ ENV/ .ropeproject .idea +/tests/files/test_data/ diff --git a/tests/deepprofiler/dataset/test_helper.py b/tests/deepprofiler/dataset/test_helper.py deleted file mode 100644 index cca21d6..0000000 --- a/tests/deepprofiler/dataset/test_helper.py +++ /dev/null @@ -1,35 +0,0 @@ -import sys -import pandas as pd -import tempfile -import os - -import deepprofiler.dataset.helper - - -# Dont know how to test with CLI input? - -tempdir = tempfile.TemporaryFile() - -def test_check_profile(): - dset = [] - deepprofiler.dataset.helper.check_profile(dset) - df = pd.read_csv('missing_images.csv') - print('Missing images:') - print(df.missing_images.tolist()) - assert len(df) == 0 - - -def test_check_training(): - dset = [] - deepprofiler.dataset.helper.check_training(dset) - miss_crops = pd.read_csv('missing_crops.csv') - print('Missing crops:') - print(miss_crops.missing_crops.tolist()) - - zero_crops = pd.read_csv('zero_crops.csv') - print('Zero crops:') - print(zero_crops.zero_crops.tolist()) - assert len(miss_crops) == 0 - assert len(zero_crops) == 0 - - From d596c61f770299a0f8e037135455224515a65b3b Mon Sep 17 00:00:00 2001 From: michaelbornholdt Date: Wed, 18 Aug 2021 17:48:01 -0400 Subject: [PATCH 10/11] finalize functions --- deepprofiler/__main__.py | 3 +- deepprofiler/dataset/helper.py | 108 +++++++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 27 deletions(-) diff --git a/deepprofiler/__main__.py b/deepprofiler/__main__.py index a8110cf..d87a12d 100644 --- a/deepprofiler/__main__.py +++ b/deepprofiler/__main__.py @@ -193,6 +193,7 @@ def split(context, parts): context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"] deepprofiler.dataset.indexing.split_index(context.obj["config"], parts) + # Auxiliary tool: check if images and locations are complete to run profiling functions @cli.command() @click.pass_context @@ -201,8 +202,8 @@ def check_profile(context): deepprofiler.dataset.helper.check_profile(dset) print("checking for profile complete.") -# Auxiliary tool: check if crops are complete. Use this before running training +# Auxiliary tool: check if crops are complete. Use this before running training @cli.command() @click.pass_context def check_train(context): diff --git a/deepprofiler/dataset/helper.py b/deepprofiler/dataset/helper.py index 2649767..95b6953 100644 --- a/deepprofiler/dataset/helper.py +++ b/deepprofiler/dataset/helper.py @@ -6,55 +6,84 @@ import numpy as np import cv2 import os +import tensorflow as tf + import deepprofiler.imaging.boxes import plugins.crop_generators.sampled_crop_generator -import tensorflow as tf + def check_profile(dset): """Checks images and location files to prepare for the profiling function. If this function runs correctly, the function 'profile' will also run without errors. + The names of the missing files are saved in two different files. Parameters ---------- - config : - dset : + dset : Data structure with metadata and location files Returns ------- + ls_imgs : list of missing images + ls_locs : list of missing location files """ ls_imgs, ls_locs = [], [] - os.makedirs('checks', exist_ok=True) + os.makedirs("checks", exist_ok=True) + # start checking image files frame = dset.meta.data.iterrows() images = [dset.get_image_paths(r) for i, r in frame] - # print(images) - print('rand image',images[0]) + for channels in images: for img in channels[1]: if not os.path.isfile(img): ls_imgs.append(img) - print('>>> found {} missing images'.format(len(ls_imgs)), '|| saving list of missing files to checks/') - pd.DataFrame(ls_imgs, columns=['missing_images']).to_csv('checks/missing_images.csv', index=False) + print( + ">>> found {} missing images".format(len(ls_imgs)), + "|| saving list of missing files to checks/missing_images.csv", + ) + pd.DataFrame(ls_imgs, columns=["missing_images"]).to_csv( + "checks/missing_images.csv", index=False + ) # start checking location files frame = dset.meta.data.iterrows() for i, r in frame: - df = deepprofiler.imaging.boxes.get_single_cell_locations("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]), dset.config) + df = deepprofiler.imaging.boxes.get_single_cell_locations( + "{}/{}-{}".format( + r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"] + ), + dset.config, + ) if df.empty: - ls_locs.append("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"])) - - print('>>> found {} missing location files'.format(len(ls_locs)), '|| saving list of missing files to checks/') - pd.DataFrame(ls_locs, columns=['missing_locs']).to_csv('checks/missing_locs.csv', index=False) + ls_locs.append( + "{}/{}-{}".format( + r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"] + ) + ) + + print( + ">>> found {} missing location files".format(len(ls_locs)), + "|| saving list of missing files to checks/missing_locs.csv", + ) + pd.DataFrame(ls_locs, columns=["missing_locs"]).to_csv( + "checks/missing_locs.csv", index=False + ) return ls_imgs, ls_locs -""" -Checking all crops before training. -""" +def crop_checks(img_name, ls_missing, ls_zero): + """Utility function for check_train to check images for existence and non-zero values. + Parameters + ---------- + img_name : crop image name + + Returns + ------- + ls_missing, ls_zero : lists detailing the missing crops and the zero crops -def crop_checks(ls_missing, ls_zero, img_name): + """ if not os.path.isfile(img_name): ls_missing.append(img_name) else: @@ -65,23 +94,50 @@ def crop_checks(ls_missing, ls_zero, img_name): def check_train(config, dset): - """Check all crops before training in order to avoid errors during training. + """Checks if the data is ready for training by checking if the crops are sampled correctly. + Missing and zero crops are saved into two files. + Parameters + ---------- + config : config input + dset : Data structure with metadata Returns ------- + ls_missing, ls_zero : lists of missing and zero crops """ - os.makedirs('checks', exist_ok=True) + os.makedirs("checks", exist_ok=True) - crop_generator = plugins.crop_generators.sampled_crop_generator.GeneratorClass(config, dset) + crop_generator = plugins.crop_generators.sampled_crop_generator.GeneratorClass( + config, dset + ) sess = tf.compat.v1.Session() crop_generator.start(sess) df = crop_generator.samples ls_missing, ls_zero = [], [] - res = df.apply(lambda row: crop_checks(ls_missing, ls_zero, os.path.join(config["paths"]["single_cell_sample"], row['Image_Name'])), axis = 1) - - print('>>> found {} missing crops'.format(len(ls_missing)), '|| saving list of missing crops to checks/') - pd.DataFrame(ls_missing, columns=['missing_crops']).to_csv('missing_crops.csv', index=False) - print('>>> found {} crops with zero values'.format(len(ls_zero)), '|| saving list of zero crops to checks/') - pd.DataFrame(ls_zero, columns=['zero_crops']).to_csv('zero_crops.csv', index=False) \ No newline at end of file + res = df.apply( + lambda row: crop_checks( + os.path.join(config["paths"]["single_cell_sample"], row["Image_Name"]), + ls_missing, + ls_zero, + ), + axis=1, + ) + + print( + ">>> found {} missing crops".format(len(ls_missing)), + "|| saving list of missing crops to checks/missing_crops.csv", + ) + pd.DataFrame(ls_missing, columns=["missing_crops"]).to_csv( + "checks/missing_crops.csv", index=False + ) + print( + ">>> found {} crops with zero values".format(len(ls_zero)), + "|| saving list of zero crops to checks/missing_crops.csv", + ) + pd.DataFrame(ls_zero, columns=["zero_crops"]).to_csv( + "checks/zero_crops.csv", index=False + ) + + return ls_missing, ls_zero From 24a233e44ee5ea85002faef1a7263f6ba11c5797 Mon Sep 17 00:00:00 2001 From: Michael Bornholdt <56402523+michaelbornholdt@users.noreply.github.com> Date: Wed, 18 Aug 2021 23:51:07 +0200 Subject: [PATCH 11/11] Update .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9f16664..69da5e5 100644 --- a/.gitignore +++ b/.gitignore @@ -95,4 +95,3 @@ ENV/ .ropeproject .idea -/tests/files/test_data/