Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Helper functions #266

Open
wants to merge 11 commits into
base: tf2
Choose a base branch
from
17 changes: 17 additions & 0 deletions deepprofiler/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import deepprofiler.dataset.sampling
import deepprofiler.learning.training
import deepprofiler.learning.profiling
import deepprofiler.dataset.helper
import deepprofiler.download.normalize_bbbc021_metadata


Expand Down Expand Up @@ -192,6 +193,22 @@ def split(context, parts):
context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
deepprofiler.dataset.indexing.split_index(context.obj["config"], parts)

# Auxiliary tool: check if images and locations are complete to run profiling functions
@cli.command()
@click.pass_context
def check_profile(context):
dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile')
deepprofiler.dataset.helper.check_profile(dset)
print("checking for profile complete.")

# Auxiliary tool: check if crops are complete. Use this before running training

@cli.command()
@click.pass_context
def check_train(context):
dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='train')
deepprofiler.dataset.helper.check_train(dset)
print("checking for train is complete.")

if __name__ == "__main__":
cli(obj={})
80 changes: 80 additions & 0 deletions deepprofiler/dataset/helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
Helper functions for checking images, locations and crops before running profile and train.
"""

import pandas as pd
import numpy as np
import cv2
import os
import deepprofiler.imaging.boxes


def check_profile(dset):
"""Checks images and location files to prepare for the profiling function.
If this function runs correctly, the function 'profile' will also run without errors.

Parameters
----------
config :
dset :

Returns
-------

"""
ls_imgs, ls_locs = [], []
os.makedirs('checks', exist_ok=True)

frame = dset.meta.data.iterrows()
images = [dset.get_image_paths(r) for i, r in frame]
for channels in images:
for img in channels[1]:
if not os.path.isfile(img):
ls_imgs.append(img)
print('found {} missing images'.format(len(ls_imgs)), '|| saving list of missing files to checks/')
pd.DataFrame(ls_imgs, columns=['missing_images']).to_csv('checks/missing_images.csv', index=False)

# start checking location files
frame = dset.meta.data.iterrows()
for i, r in frame:
df = deepprofiler.imaging.boxes.get_single_cell_locations("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]), dset.config)
if df.empty:
ls_locs.append("{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]))

print('found {} missing location files'.format(len(ls_locs)), '|| saving list of missing files to checks/')
pd.DataFrame(ls_locs, columns=['missing_locs']).to_csv('checks/missing_locs.csv', index=False)

return ls_imgs, ls_locs


"""
Checking all crops before training.
"""

def crop_checks(ls_missing, ls_zero, img_name, sample_dir):
if not os.path.isfile(os.path.join(sample_dir, img_name)):
ls_missing.append(img_name)
else:
img = cv2.imread(os.path.join(sample_dir, img_name), cv2.IMREAD_GRAYSCALE)
pos = np.nonzero(img)
if len(pos[0]) == 0:
ls_zero.append(img_name)


def check_train(dset):
"""Check all crops before training in order to avoid errors during training.

Returns
-------

"""
# print(dset.meta.data.columns)
# First check if images exist
df = # read sc-metadata file
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Arkkienkeli Can you tell me how to read the sc metadata here?


ls_missing = []
ls_zero = []
res = df.apply(lambda row: crop_checks(ls_missing, ls_zero, row['Image_Name'], crops_dir), axis = 1)

pd.DataFrame(ls_missing, columns=['missing_crops']).to_csv('missing_crops.csv', index=False)
pd.DataFrame(ls_zero, columns=['zero_crops']).to_csv('zero_crops.csv', index=False)
35 changes: 35 additions & 0 deletions tests/deepprofiler/dataset/test_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import sys
import pandas as pd
import tempfile
import os

import deepprofiler.dataset.helper


# Dont know how to test with CLI input?

tempdir = tempfile.TemporaryFile()

def test_check_profile():
dset = []
deepprofiler.dataset.helper.check_profile(dset)
df = pd.read_csv('missing_images.csv')
print('Missing images:')
print(df.missing_images.tolist())
assert len(df) == 0


def test_check_training():
dset = []
deepprofiler.dataset.helper.check_training(dset)
miss_crops = pd.read_csv('missing_crops.csv')
print('Missing crops:')
print(miss_crops.missing_crops.tolist())

zero_crops = pd.read_csv('zero_crops.csv')
print('Zero crops:')
print(zero_crops.zero_crops.tolist())
assert len(miss_crops) == 0
assert len(zero_crops) == 0