From 0b276d70ada8d285ac69517f3a34cc242afc7b98 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Sat, 17 Jun 2023 10:17:51 -0400 Subject: [PATCH 1/3] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0bbb9d0..10cc4a1 100755 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Contact: Mika Rubinov, `mika.rubinov at vanderbilt.edu` # example code -``` +```python # set up import os import pprint From b14929721f0d6d9a686126da6450d55c4fd9a9f6 Mon Sep 17 00:00:00 2001 From: luiz Date: Mon, 26 Aug 2024 10:09:55 +0200 Subject: [PATCH 2/3] black formatting --- .gitignore | 13 ++ setup.py | 32 ++-- voluseg/__init__.py | 2 +- voluseg/_steps/step0.py | 206 +++++++++++++++--------- voluseg/_steps/step1.py | 51 +++--- voluseg/_steps/step2.py | 121 +++++++------- voluseg/_steps/step3.py | 180 ++++++++++++--------- voluseg/_steps/step4.py | 195 ++++++++++++++-------- voluseg/_steps/step4a.py | 29 ++-- voluseg/_steps/step4b.py | 54 ++++--- voluseg/_steps/step4c.py | 60 ++++--- voluseg/_steps/step4d.py | 54 ++++--- voluseg/_steps/step4e.py | 45 ++++-- voluseg/_steps/step5.py | 93 ++++++----- voluseg/_tools/__init__.py | 0 voluseg/_tools/ants_registration.py | 89 ++++++---- voluseg/_tools/ants_transformation.py | 37 +++-- voluseg/_tools/ball.py | 10 +- voluseg/_tools/clean_signal.py | 39 ++--- voluseg/_tools/constants.py | 10 +- voluseg/_tools/evenly_parallelize.py | 3 +- voluseg/_tools/get_volume_name.py | 6 +- voluseg/_tools/load_metadata.py | 48 +++--- voluseg/_tools/load_parameters.py | 8 +- voluseg/_tools/load_volume.py | 17 +- voluseg/_tools/parameter_dictionary.py | 60 +++---- voluseg/_tools/save_volume.py | 18 +-- voluseg/_tools/sparseness.py | 7 +- voluseg/_tools/sparseness_projection.py | 16 +- voluseg/_update.py | 7 +- 30 files changed, 892 insertions(+), 618 deletions(-) create mode 100644 voluseg/_tools/__init__.py diff --git a/.gitignore b/.gitignore index 3fbb66b..2eb9b61 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,16 @@ __pycache__/ *.pyc *.egg-info +# Python coverage files +.coverage +.coverage.* +.cache +.cache/ +coverage.xml +coverage_html/ +htmlcov/ + +# Local data +sample_data/* +output/* +logs/* \ No newline at end of file diff --git a/setup.py b/setup.py index 42626d8..02bc1b6 100644 --- a/setup.py +++ b/setup.py @@ -1,32 +1,28 @@ -'''volumetric segmentation pipeline''' +"""volumetric segmentation pipeline""" import setuptools -with open('README.md', 'r') as fh: +with open("README.md", "r") as fh: README = fh.read() # with open('requirements.txt', 'r') as fh: # install_requires = fh.read().split('\n') setuptools.setup( - name='voluseg', - version='2023.12', - author='Mika Rubinov', - author_email='mika.rubinov@vanderbilt.edu', - description='pipeline for volumetric segmentation of calcium imaging data', + name="voluseg", + version="2023.12", + author="Mika Rubinov", + author_email="mika.rubinov@vanderbilt.edu", + description="pipeline for volumetric segmentation of calcium imaging data", long_description=README, - long_description_content_type='text/markdown', - url='https://github.com/mikarubi/voluseg', - packages=[ - 'voluseg', - 'voluseg._steps', - 'voluseg._tools' - ], + long_description_content_type="text/markdown", + url="https://github.com/mikarubi/voluseg", + packages=["voluseg", "voluseg._steps", "voluseg._tools"], classifiers=[ - 'Programming Language :: Python :: 3', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", ], - python_requires='>=3.5', + python_requires=">=3.5", # install_requires=install_requires ) diff --git a/voluseg/__init__.py b/voluseg/__init__.py index b58a6d3..c9ca139 100644 --- a/voluseg/__init__.py +++ b/voluseg/__init__.py @@ -1,4 +1,4 @@ -'''initialization''' +"""initialization""" from voluseg._steps.step0 import process_parameters as step0_process_parameters from voluseg._steps.step1 import process_volumes as step1_process_volumes diff --git a/voluseg/_steps/step0.py b/voluseg/_steps/step0.py index 3baba95..a395964 100644 --- a/voluseg/_steps/step0.py +++ b/voluseg/_steps/step0.py @@ -1,5 +1,5 @@ def process_parameters(parameters0=None): - '''process parameters and create parameter file''' + """process parameters and create parameter file""" import os import copy @@ -17,115 +17,152 @@ def process_parameters(parameters0=None): # check that parameter input is a dictionary if not isinstance(parameters, dict): - raise Exception('specify parameter dictionary as input.') + raise Exception("specify parameter dictionary as input.") # check if any parameters are missing missing_parameters = set(parameter_dictionary()) - set(parameters) if missing_parameters: - raise Exception('missing parameters \'%s\'.'%('\', \''.join(missing_parameters))) + raise Exception("missing parameters '%s'." % ("', '".join(missing_parameters))) # get input and output directories, and parameter filename - dir_input = parameters['dir_input'] - dir_output = parameters['dir_output'] - filename_parameters = os.path.join(dir_output, 'parameters.pickle') + dir_input = parameters["dir_input"] + dir_output = parameters["dir_output"] + filename_parameters = os.path.join(dir_output, "parameters.pickle") # load parameters from file, if it already exists if os.path.isfile(filename_parameters): - print('exiting, parameter file exists: %s.'%(filename_parameters)) + print("exiting, parameter file exists: %s." % (filename_parameters)) return ## specific checks # check strings - for i in ['dir_ants', 'dir_input', 'dir_output', 'dir_transform', 'registration', 'registration_restrict']: + for i in [ + "dir_ants", + "dir_input", + "dir_output", + "dir_transform", + "registration", + "registration_restrict", + ]: pi = parameters[i] - if not (isinstance(pi, str) and (' ' not in pi)): - raise Exception('\'%s\' must be a string without spaces.'%(i)) + if not (isinstance(pi, str) and (" " not in pi)): + raise Exception("'%s' must be a string without spaces." % (i)) # check booleans - for i in ['parallel_clean', 'parallel_volume', 'planes_packed', 'save_volume']: + for i in ["parallel_clean", "parallel_volume", "planes_packed", "save_volume"]: pi = parameters[i] if not isinstance(pi, bool): - raise Exception('\'%s\' must be a boolean.'%(i)) + raise Exception("'%s' must be a boolean." % (i)) # check integers - for i in ['ds', 'n_cells_block', 'n_colors', 'planes_pad']: + for i in ["ds", "n_cells_block", "n_colors", "planes_pad"]: pi = parameters[i] if not (np.isscalar(pi) and (pi >= 0) and (pi == np.round(pi))): - raise Exception('\'%s\' must be a nonnegative or positive integer.'%(i)) + raise Exception("'%s' must be a nonnegative or positive integer." % (i)) # check non-negative real numbers: - for i in ['diam_cell', 'f_hipass', 'f_volume', 'res_x', 'res_y', - 'res_z', 't_baseline', 't_section', 'thr_mask']: + for i in [ + "diam_cell", + "f_hipass", + "f_volume", + "res_x", + "res_y", + "res_z", + "t_baseline", + "t_section", + "thr_mask", + ]: pi = parameters[i] if not (np.isscalar(pi) and (pi >= 0) and np.isreal(pi)): - raise Exception('\'%s\' must be a nonnegative or positive real number.'%(i)) + raise Exception("'%s' must be a nonnegative or positive real number." % (i)) # check detrending - if parameters['detrending']: - parameters['detrending'] = parameters['detrending'].lower() - if parameters['detrending'] == 'none': - parameters['detrending'] = None - elif not parameters['detrending'] in ['standard', 'robust']: - raise Exception('\'detrending\' must be \'standard\', \'robust\', or \'none\'.') + if parameters["detrending"]: + parameters["detrending"] = parameters["detrending"].lower() + if parameters["detrending"] == "none": + parameters["detrending"] = None + elif not parameters["detrending"] in ["standard", "robust"]: + raise Exception("'detrending' must be 'standard', 'robust', or 'none'.") # check registration - if parameters['registration']: - parameters['registration'] = parameters['registration'].lower() - if parameters['registration'] == 'none': - parameters['registration'] = None - elif not parameters['registration'] in ['high', 'medium', 'low', 'transform']: - raise Exception('\'registration\' must be \'high\', \'medium\', \'low\', \'none\' or \'transform\'.') + if parameters["registration"]: + parameters["registration"] = parameters["registration"].lower() + if parameters["registration"] == "none": + parameters["registration"] = None + elif not parameters["registration"] in ["high", "medium", "low", "transform"]: + raise Exception( + "'registration' must be 'high', 'medium', 'low', 'none' or 'transform'." + ) # check registration restrict - if parameters['registration_restrict']: - parameters['registration_restrict'] = parameters['registration_restrict'].lower() - if not set(['0', '1']) == set(parameters['registration_restrict'].split('x')): - raise Exception('\'registration_restrict\' must comprise \'1\'s and \'0\'s, separated by \'x\'s.') + if parameters["registration_restrict"]: + parameters["registration_restrict"] = parameters[ + "registration_restrict" + ].lower() + if not set(["0", "1"]) == set(parameters["registration_restrict"].split("x")): + raise Exception( + "'registration_restrict' must comprise '1's and '0's, separated by 'x's." + ) # check type of mask - parameters['type_mask'] = parameters['type_mask'].lower() - if not parameters['type_mask'] in ['mean', 'geomean', 'max']: - raise Exception('\'type_mask\' must be \'mean\', \'geomean\', or \'max\'.') + parameters["type_mask"] = parameters["type_mask"].lower() + if not parameters["type_mask"] in ["mean", "geomean", "max"]: + raise Exception("'type_mask' must be 'mean', 'geomean', or 'max'.") # check plane padding - if (not parameters['registration']) and not ((parameters['planes_pad'] == 0)): - raise Exception('\'planes_pad\' must be 0 if \'registration\' is None.') + if (not parameters["registration"]) and not ((parameters["planes_pad"] == 0)): + raise Exception("'planes_pad' must be 0 if 'registration' is None.") # convert dir_input into a list to account for multiple directories - input_dirs = [os.path.normpath(h) for h in dir_input.split(';')] + input_dirs = [os.path.normpath(h) for h in dir_input.split(";")] if len(input_dirs) == 1: prefix_dirs = [None] else: prefix_dirs = [os.path.basename(h) for h in input_dirs] if len(prefix_dirs) != len(set(prefix_dirs)): - raise Exception('the names of last directories in \'dir_input\' must be unique.') + raise Exception( + "the names of last directories in 'dir_input' must be unique." + ) if prefix_dirs != sorted(prefix_dirs): - raise Exception('the names of last directories in \'dir_input\' must be sorted.') + raise Exception( + "the names of last directories in 'dir_input' must be sorted." + ) volume_fullnames_input = [] volume_names = [] for dir_input_h, dir_prefix_h in zip(input_dirs, prefix_dirs): # get volume extension, volume names and number of segmentation timepoints - file_names = [i.split('.', 1) for i in os.listdir(dir_input_h) if '.' in i] + file_names = [i.split(".", 1) for i in os.listdir(dir_input_h) if "." in i] file_exts, counts = np.unique(list(zip(*file_names))[1], return_counts=True) - ext = '.'+file_exts[np.argmax(counts)] - volume_names_input_h = sorted([i for i, j in file_names if '.'+j == ext]) - volume_fullnames_input_h = [os.path.join(dir_input_h, i) for i in volume_names_input_h] + ext = "." + file_exts[np.argmax(counts)] + volume_names_input_h = sorted([i for i, j in file_names if "." + j == ext]) + volume_fullnames_input_h = [ + os.path.join(dir_input_h, i) for i in volume_names_input_h + ] # adjust parameters for packed planes data - if parameters['planes_packed']: - parameters['res_z'] = parameters['diam_cell'] + if parameters["planes_packed"]: + parameters["res_z"] = parameters["diam_cell"] def get_plane_names(tuple_fullname_volume_input): fullname_volume_input = tuple_fullname_volume_input[1] - lp = len(load_volume(fullname_volume_input+ext)) - return [get_volume_name(fullname_volume_input, dir_prefix_h, pi) for pi in range(lp)] - - volume_names_h = evenly_parallelize(volume_fullnames_input_h).map(get_plane_names).collect() + lp = len(load_volume(fullname_volume_input + ext)) + return [ + get_volume_name(fullname_volume_input, dir_prefix_h, pi) + for pi in range(lp) + ] + + volume_names_h = ( + evenly_parallelize(volume_fullnames_input_h) + .map(get_plane_names) + .collect() + ) volume_names_h = [pi for ni in volume_names_h for pi in ni] else: - volume_names_h = [get_volume_name(i, dir_prefix_h) for i in volume_names_input_h] + volume_names_h = [ + get_volume_name(i, dir_prefix_h) for i in volume_names_input_h + ] # grow volume-name lists volume_fullnames_input += volume_fullnames_input_h @@ -136,47 +173,60 @@ def get_plane_names(tuple_fullname_volume_input): lt = len(volume_names) # check timepoints - parameters['type_timepoints'] = parameters['type_timepoints'].lower() - if not parameters['type_timepoints'] in ['dff', 'periodic', 'custom']: - raise Exception('\'type_timepoints\' must be \'dff\', \'periodic\' or \'custom\'.') + parameters["type_timepoints"] = parameters["type_timepoints"].lower() + if not parameters["type_timepoints"] in ["dff", "periodic", "custom"]: + raise Exception("'type_timepoints' must be 'dff', 'periodic' or 'custom'.") else: - print('checking \'timepoints\' for \'type_timepoints\'=\'%s\'.'%parameters['type_timepoints']) - tp = parameters['timepoints'] - if parameters['type_timepoints'] in ['dff', 'periodic']: + print( + "checking 'timepoints' for 'type_timepoints'='%s'." + % parameters["type_timepoints"] + ) + tp = parameters["timepoints"] + if parameters["type_timepoints"] in ["dff", "periodic"]: if not (np.isscalar(tp) and (tp >= 0) and (tp == np.round(tp))): - raise Exception('\'timepoints\' must be a nonnegative integer.') + raise Exception("'timepoints' must be a nonnegative integer.") elif tp >= lt: - warn('specified number of timepoints is greater than the number of volumes, overriding.') + warn( + "specified number of timepoints is greater than the number of volumes, overriding." + ) tp = 0 - elif parameters['type_timepoints'] in ['custom']: + elif parameters["type_timepoints"] in ["custom"]: tp = np.unique(tp) - if not ((np.ndim(tp) == 1) and np.all(tp >= 0) and np.all(tp == np.round(tp))): - raise Exception('\'timepoints\' must be a one-dimensional vector of nonnegative integers.') + if not ( + (np.ndim(tp) == 1) and np.all(tp >= 0) and np.all(tp == np.round(tp)) + ): + raise Exception( + "'timepoints' must be a one-dimensional vector of nonnegative integers." + ) elif np.any(tp >= lt): - warn('discarding timepoints that exceed the number of volumes.') + warn("discarding timepoints that exceed the number of volumes.") tp = tp[tp < lt] tp = tp.astype(int) # affine matrix - affine_mat = np.diag([ parameters['res_x'] * parameters['ds'], - parameters['res_y'] * parameters['ds'], - parameters['res_z'], - 1]) + affine_mat = np.diag( + [ + parameters["res_x"] * parameters["ds"], + parameters["res_y"] * parameters["ds"], + parameters["res_z"], + 1, + ] + ) # save parameters - parameters['volume_fullnames_input'] = volume_fullnames_input - parameters['volume_names'] = volume_names - parameters['input_dirs'] = input_dirs - parameters['ext'] = ext - parameters['lt'] = lt - parameters['affine_mat'] = affine_mat - parameters['timepoints'] = tp + parameters["volume_fullnames_input"] = volume_fullnames_input + parameters["volume_names"] = volume_names + parameters["input_dirs"] = input_dirs + parameters["ext"] = ext + parameters["lt"] = lt + parameters["affine_mat"] = affine_mat + parameters["timepoints"] = tp try: os.makedirs(dir_output, exist_ok=True) - with open(filename_parameters, 'wb') as file_handle: + with open(filename_parameters, "wb") as file_handle: pickle.dump(parameters, file_handle) - print('parameter file successfully saved.') + print("parameter file successfully saved.") except Exception as msg: - print('parameter file not saved: %s.'%(msg)) + print("parameter file not saved: %s." % (msg)) diff --git a/voluseg/_steps/step1.py b/voluseg/_steps/step1.py index c5688b4..4c49dce 100755 --- a/voluseg/_steps/step1.py +++ b/voluseg/_steps/step1.py @@ -1,5 +1,5 @@ def process_volumes(parameters): - '''process original volumes and save into nifti format''' + """process original volumes and save into nifti format""" import os from scipy import interpolate @@ -14,29 +14,32 @@ def process_volumes(parameters): volume_fullname_inputRDD = evenly_parallelize(p.volume_fullnames_input) for color_i in range(p.n_colors): - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - if os.path.isfile(fullname_volmean+hdf): + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + if os.path.isfile(fullname_volmean + hdf): continue - dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i)) + dir_volume = os.path.join(p.dir_output, "volumes", str(color_i)) os.makedirs(dir_volume, exist_ok=True) def initial_processing(tuple_fullname_volume_input): def make_output_volume(name_volume, volume): import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np # skip processing if output volume exists fullname_volume = os.path.join(dir_volume, name_volume) - if load_volume(fullname_volume+ori+nii) is not None \ - or load_volume(fullname_volume+ali+hdf) is not None: + if ( + load_volume(fullname_volume + ori + nii) is not None + or load_volume(fullname_volume + ali + hdf) is not None + ): return # fix dimensionality @@ -60,14 +63,14 @@ def make_output_volume(name_volume, volume): # downsample in the x-y if specified if p.ds > 1: if (lx % p.ds) or (ly % p.ds): - lx -= (lx % p.ds) - ly -= (ly % p.ds) + lx -= lx % p.ds + ly -= ly % p.ds volume = volume[:lx, :ly, :] # make grid for computing downsampled values sx_ds = np.arange(0.5, lx, p.ds) sy_ds = np.arange(0.5, ly, p.ds) - xy_grid_ds = np.dstack(np.meshgrid(sx_ds, sy_ds, indexing='ij')) + xy_grid_ds = np.dstack(np.meshgrid(sx_ds, sy_ds, indexing="ij")) # get downsampled volume volume_ds = np.zeros((len(sx_ds), len(sy_ds), lz)) @@ -75,7 +78,7 @@ def make_output_volume(name_volume, volume): interpolation_fx = interpolate.RegularGridInterpolator( (np.arange(lx), np.arange(ly)), volume[:, :, zi], - method='linear' + method="linear", ) volume_ds[:, :, zi] = interpolation_fx(xy_grid_ds) @@ -84,21 +87,24 @@ def make_output_volume(name_volume, volume): # pad planes as necessary if p.registration and p.planes_pad: volume = np.lib.pad( - volume, ((0, 0), (0, 0), (p.planes_pad, p.planes_pad)), - 'constant', constant_values=(np.percentile(volume, 1),) + volume, + ((0, 0), (0, 0), (p.planes_pad, p.planes_pad)), + "constant", + constant_values=(np.percentile(volume, 1),), ) # save volume in output directory if p.registration: - save_volume(fullname_volume+ori+nii, volume, p.affine_mat) + save_volume(fullname_volume + ori + nii, volume, p.affine_mat) else: volume = volume.T - save_volume(fullname_volume+ali+hdf, volume) + save_volume(fullname_volume + ali + hdf, volume) + # end make output volume # get full name of input volume, input data and list of planes fullname_volume_input = tuple_fullname_volume_input[1] - volume = load_volume(fullname_volume_input+p.ext) + volume = load_volume(fullname_volume_input + p.ext) if len(p.input_dirs) == 1: dir_prefix = None else: @@ -107,11 +113,14 @@ def make_output_volume(name_volume, volume): # process output volumes if p.planes_packed: for pi, volume_pi in enumerate(volume): - name_volume_pi = get_volume_name(fullname_volume_input, dir_prefix, pi) + name_volume_pi = get_volume_name( + fullname_volume_input, dir_prefix, pi + ) make_output_volume(name_volume_pi, volume_pi) else: name_volume = get_volume_name(fullname_volume_input, dir_prefix) make_output_volume(name_volume, volume) + # end initial_processing volume_fullname_inputRDD.foreach(initial_processing) diff --git a/voluseg/_steps/step2.py b/voluseg/_steps/step2.py index 6de9c8c..018be55 100755 --- a/voluseg/_steps/step2.py +++ b/voluseg/_steps/step2.py @@ -1,8 +1,8 @@ def align_volumes(parameters): - '''register volumes to a single middle volume''' + """register volumes to a single middle volume""" # do not run if registration is set to none - if not parameters['registration']: + if not parameters["registration"]: return import os @@ -22,24 +22,26 @@ def align_volumes(parameters): volume_nameRDD = evenly_parallelize(p.volume_names) for color_i in range(p.n_colors): - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - if os.path.isfile(fullname_volmean+hdf): + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + if os.path.isfile(fullname_volmean + hdf): continue - dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i)) - fullname_reference = os.path.join(dir_volume, 'reference') - if load_volume(fullname_reference+nii) is None: - fullname_median = os.path.join(dir_volume, p.volume_names[p.lt//2]) - shutil.copyfile(fullname_median+ori+nii, fullname_reference+nii) + dir_volume = os.path.join(p.dir_output, "volumes", str(color_i)) + fullname_reference = os.path.join(dir_volume, "reference") + if load_volume(fullname_reference + nii) is None: + fullname_median = os.path.join(dir_volume, p.volume_names[p.lt // 2]) + shutil.copyfile(fullname_median + ori + nii, fullname_reference + nii) if p.dir_transform: dir_transform = os.path.join(p.dir_transform, str(color_i)) else: - dir_transform = os.path.join(p.dir_output, 'transforms', str(color_i)) + dir_transform = os.path.join(p.dir_output, "transforms", str(color_i)) os.makedirs(dir_transform, exist_ok=True) def get_fullname_tform(name_volume): - return os.path.join(dir_transform, name_volume+'_tform_0GenericAffine.mat') + return os.path.join( + dir_transform, name_volume + "_tform_0GenericAffine.mat" + ) def load_transform(name_volume): try: @@ -52,77 +54,88 @@ def load_transform(name_volume): def register_volume(tuple_name_volume): import os + # disable ITK multithreading - os.environ['ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS'] = '1' + os.environ["ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS"] = "1" name_volume = tuple_name_volume[1] fullname_volume = os.path.join(dir_volume, name_volume) # skip processing if aligned volume exists - if load_volume(fullname_volume+ali+hdf) is not None: + if load_volume(fullname_volume + ali + hdf) is not None: tform_vector = load_transform(name_volume) return tform_vector # setup registration\ - if p.registration == 'transform': + if p.registration == "transform": cmd = ants_transformation( - dir_ants = p.dir_ants, - in_nii = fullname_volume+ori+nii, - ref_nii = fullname_reference+nii, - out_nii = fullname_volume+ali+nii, - in_tform = get_fullname_tform(name_volume), - interpolation='Linear' - ) + dir_ants=p.dir_ants, + in_nii=fullname_volume + ori + nii, + ref_nii=fullname_reference + nii, + out_nii=fullname_volume + ali + nii, + in_tform=get_fullname_tform(name_volume), + interpolation="Linear", + ) else: cmd = ants_registration( - dir_ants = p.dir_ants, - in_nii = fullname_volume+ori+nii, - ref_nii = fullname_reference+nii, - out_nii = fullname_volume+ali+nii, - prefix_out_tform = os.path.join(dir_transform, name_volume+'_tform_'), - restrict = p.registration_restrict, - typ = 'r', + dir_ants=p.dir_ants, + in_nii=fullname_volume + ori + nii, + ref_nii=fullname_reference + nii, + out_nii=fullname_volume + ali + nii, + prefix_out_tform=os.path.join( + dir_transform, name_volume + "_tform_" + ), + restrict=p.registration_restrict, + typ="r", ) - if p.registration == 'high': + if p.registration == "high": pass - elif p.registration == 'medium': - cmd = cmd.replace('[1000x500x250x125]', '[1000x500x250]')\ - .replace('12x8x4x2', '12x8x4')\ - .replace('4x3x2x1vox', '4x3x2vox') - elif p.registration == 'low': - cmd = cmd.replace('[1000x500x250x125]', '[1000x500]')\ - .replace('12x8x4x2', '12x8')\ - .replace('4x3x2x1vox', '4x3vox') + elif p.registration == "medium": + cmd = ( + cmd.replace("[1000x500x250x125]", "[1000x500x250]") + .replace("12x8x4x2", "12x8x4") + .replace("4x3x2x1vox", "4x3x2vox") + ) + elif p.registration == "low": + cmd = ( + cmd.replace("[1000x500x250x125]", "[1000x500]") + .replace("12x8x4x2", "12x8") + .replace("4x3x2x1vox", "4x3vox") + ) # run registration flag = os.system(cmd) if flag: # if breaks change initialization - flag = os.system(cmd.replace(nii+',1]', nii+',0]')) - if flag and load_volume(fullname_volume+ori+nii).shape[2] == 1: + flag = os.system(cmd.replace(nii + ",1]", nii + ",0]")) + if flag and load_volume(fullname_volume + ori + nii).shape[2] == 1: # if breaks change dimensionality - flag = os.system(cmd.replace('--dimensionality 3', '--dimensionality 2')) + flag = os.system( + cmd.replace("--dimensionality 3", "--dimensionality 2") + ) if not flag: - volume = load_volume(fullname_volume+ali+nii)[:, :, None] - save_volume(fullname_volume+ali+nii, volume, p.affine_mat) + volume = load_volume(fullname_volume + ali + nii)[:, :, None] + save_volume(fullname_volume + ali + nii, volume, p.affine_mat) if flag: - raise Exception('volume %s not registered: flag %d.'%(name_volume, flag)) + raise Exception( + "volume %s not registered: flag %d." % (name_volume, flag) + ) # load aligned volume - volume = load_volume(fullname_volume+ali+nii) + volume = load_volume(fullname_volume + ali + nii) # remove padding if p.planes_pad: - volume = volume[:, :, p.planes_pad:-p.planes_pad] + volume = volume[:, :, p.planes_pad : -p.planes_pad] # save as hdf5 volume = volume.T - save_volume(fullname_volume+ali+hdf, volume) + save_volume(fullname_volume + ali + hdf, volume) # remove nifti files - if load_volume(fullname_volume+ali+hdf) is not None: + if load_volume(fullname_volume + ali + hdf) is not None: try: - os.remove(fullname_volume+ori+nii) - os.remove(fullname_volume+ali+nii) + os.remove(fullname_volume + ori + nii) + os.remove(fullname_volume + ali + nii) except: pass @@ -132,9 +145,9 @@ def register_volume(tuple_name_volume): # run registration and save transforms as needed transforms = volume_nameRDD.map(register_volume).collect() - if not p.registration == 'transform': + if not p.registration == "transform": transforms = np.array(transforms).astype(dtype) - fullname_tforms = os.path.join(dir_transform, 'transforms%d'%(color_i)) - if not os.path.isfile(fullname_tforms+hdf): - with h5py.File(fullname_tforms+hdf, 'w') as file_handle: - file_handle['transforms'] = transforms + fullname_tforms = os.path.join(dir_transform, "transforms%d" % (color_i)) + if not os.path.isfile(fullname_tforms + hdf): + with h5py.File(fullname_tforms + hdf, "w") as file_handle: + file_handle["transforms"] = transforms diff --git a/voluseg/_steps/step3.py b/voluseg/_steps/step3.py index 64a1a0a..256d20c 100755 --- a/voluseg/_steps/step3.py +++ b/voluseg/_steps/step3.py @@ -1,5 +1,5 @@ def mask_volumes(parameters): - '''create intensity mask from the average registered volume''' + """create intensity mask from the average registered volume""" import os import h5py @@ -18,43 +18,46 @@ def mask_volumes(parameters): # set up spark from pyspark.sql.session import SparkSession + spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext # set up matplotlib import warnings import matplotlib + with warnings.catch_warnings(): - warnings.simplefilter('ignore') - matplotlib.use('Agg') + warnings.simplefilter("ignore") + matplotlib.use("Agg") import matplotlib.pyplot as plt p = SimpleNamespace(**parameters) # compute mean timeseries and ranked dff - fullname_timemean = os.path.join(p.dir_output, 'mean_timeseries') + fullname_timemean = os.path.join(p.dir_output, "mean_timeseries") volume_nameRDD = evenly_parallelize(p.volume_names) - if not os.path.isfile(fullname_timemean+hdf): + if not os.path.isfile(fullname_timemean + hdf): dff_rank = np.zeros(p.lt) mean_timeseries_raw = np.zeros((p.n_colors, p.lt)) mean_timeseries = np.zeros((p.n_colors, p.lt)) mean_baseline = np.zeros((p.n_colors, p.lt)) for color_i in range(p.n_colors): - dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i)) + dir_volume = os.path.join(p.dir_output, "volumes", str(color_i)) def mean_volume(tuple_name_volume): import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np name_volume = tuple_name_volume[1] fullname_volume = os.path.join(dir_volume, name_volume) - return np.mean(load_volume(fullname_volume+ali+hdf), dtype='float') + return np.mean(load_volume(fullname_volume + ali + hdf), dtype="float") mean_timeseries_raw[color_i] = volume_nameRDD.map(mean_volume).collect() time, base = clean_signal(parameters, mean_timeseries_raw[color_i]) @@ -62,69 +65,71 @@ def mean_volume(tuple_name_volume): dff_rank += stats.rankdata((time - base) / time) # get high delta-f/f timepoints - if p.type_timepoints == 'custom': + if p.type_timepoints == "custom": timepoints = p.timepoints else: nt = p.timepoints if not nt: timepoints = np.arange(p.lt) else: - if p.type_timepoints == 'dff': + if p.type_timepoints == "dff": timepoints = np.sort(np.argsort(dff_rank)[::-1][:nt]) - elif p.type_timepoints == 'periodic': - timepoints = np.linspace(0, p.lt, nt, dtype='int', endpoint=False) + elif p.type_timepoints == "periodic": + timepoints = np.linspace(0, p.lt, nt, dtype="int", endpoint=False) - with h5py.File(fullname_timemean+hdf, 'w') as file_handle: - file_handle['mean_timeseries_raw'] = mean_timeseries_raw - file_handle['mean_timeseries'] = mean_timeseries - file_handle['mean_baseline'] = mean_baseline - file_handle['timepoints'] = timepoints + with h5py.File(fullname_timemean + hdf, "w") as file_handle: + file_handle["mean_timeseries_raw"] = mean_timeseries_raw + file_handle["mean_timeseries"] = mean_timeseries + file_handle["mean_baseline"] = mean_baseline + file_handle["timepoints"] = timepoints # load timepoints - with h5py.File(fullname_timemean+hdf, 'r') as file_handle: - timepoints = file_handle['timepoints'][()] + with h5py.File(fullname_timemean + hdf, "r") as file_handle: + timepoints = file_handle["timepoints"][()] for color_i in range(p.n_colors): - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - if os.path.isfile(fullname_volmean+hdf): + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + if os.path.isfile(fullname_volmean + hdf): continue - dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i)) - dir_plot = os.path.join(p.dir_output, 'mask_plots', str(color_i)) + dir_volume = os.path.join(p.dir_output, "volumes", str(color_i)) + dir_plot = os.path.join(p.dir_output, "mask_plots", str(color_i)) os.makedirs(dir_plot, exist_ok=True) fullname_volume = os.path.join(dir_volume, p.volume_names[0]) - lx, ly, lz = load_volume(fullname_volume+ali+hdf).T.shape + lx, ly, lz = load_volume(fullname_volume + ali + hdf).T.shape class accum_param(pyspark.accumulators.AccumulatorParam): - '''define accumulator class''' + """define accumulator class""" def zero(self, val0): import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np - return np.zeros(val0.shape, dtype='float') + return np.zeros(val0.shape, dtype="float") def addInPlace(self, val1, val2): import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np - if p.type_mask == 'max': - return np.maximum(val1, val2, dtype='float') + if p.type_mask == "max": + return np.maximum(val1, val2, dtype="float") else: - return np.add(val1, val2, dtype='float') + return np.add(val1, val2, dtype="float") # geometric mean volume_accum = sc.accumulator(np.zeros((lx, ly, lz)), accum_param()) @@ -132,8 +137,8 @@ def addInPlace(self, val1, val2): def add_volume(tuple_name_volume): name_volume = tuple_name_volume[1] fullname_volume = os.path.join(dir_volume, name_volume) - volume = load_volume(fullname_volume+ali+hdf).T - if p.type_mask == 'geomean': + volume = load_volume(fullname_volume + ali + hdf).T + if p.type_mask == "geomean": volume = np.log10(volume) volume_accum.add(volume) @@ -143,20 +148,26 @@ def add_volume(tuple_name_volume): for name_volume in p.volume_names[timepoints]: add_volume(([], name_volume)) volume_mean = volume_accum.value - if p.type_mask != 'max': + if p.type_mask != "max": volume_mean = volume_mean / len(timepoints) - if p.type_mask == 'geomean': - volume_mean = 10 ** volume_mean + if p.type_mask == "geomean": + volume_mean = 10**volume_mean # get peaks by comparing to a median-smoothed volume ball_radi = ball(0.5 * p.diam_cell, p.affine_mat)[0] volume_peak = volume_mean >= median_filter(volume_mean, footprint=ball_radi) # compute power and probability - voxel_intensity = np.percentile(volume_mean[volume_mean>0], np.r_[5:95:0.001])[:, None] - gmm = mixture.GaussianMixture(n_components=2, max_iter=100, n_init=100).fit(voxel_intensity) + voxel_intensity = np.percentile( + volume_mean[volume_mean > 0], np.r_[5:95:0.001] + )[:, None] + gmm = mixture.GaussianMixture(n_components=2, max_iter=100, n_init=100).fit( + voxel_intensity + ) voxel_probability = gmm.predict_proba(voxel_intensity) - voxel_probability = voxel_probability[:, np.argmax(voxel_intensity[np.argmax(voxel_probability, 0)])] + voxel_probability = voxel_probability[ + :, np.argmax(voxel_intensity[np.argmax(voxel_probability, 0)]) + ] # compute intensity threshold if (p.thr_mask > 0) and (p.thr_mask <= 1): @@ -168,34 +179,34 @@ def add_volume(tuple_name_volume): ix = np.argmin(np.abs(voxel_intensity - thr_intensity)) thr_probability = voxel_probability[ix] else: - thr_intensity = - np.inf + thr_intensity = -np.inf thr_probability = 0 - print('using probability threshold of %f.'%(thr_probability)) - print('using intensity threshold of %f.'%(thr_intensity)) + print("using probability threshold of %f." % (thr_probability)) + print("using intensity threshold of %f." % (thr_intensity)) # get and save brain mask fig = plt.figure(1, (18, 6)) plt.subplot(131) _ = plt.hist(voxel_intensity, 100) - plt.plot(thr_intensity, 0, '|', color='r', markersize=200) - plt.xlabel('voxel intensity') - plt.title('intensity histogram with threshold (red)') + plt.plot(thr_intensity, 0, "|", color="r", markersize=200) + plt.xlabel("voxel intensity") + plt.title("intensity histogram with threshold (red)") plt.subplot(132) _ = plt.hist(voxel_probability, 100) - plt.plot(thr_probability, 0, '|', color='r', markersize=200) - plt.xlabel('voxel probability') - plt.title('probability histogram with threshold (red)') + plt.plot(thr_probability, 0, "|", color="r", markersize=200) + plt.xlabel("voxel probability") + plt.title("probability histogram with threshold (red)") plt.subplot(133) plt.plot(voxel_intensity, voxel_probability, linewidth=3) - plt.plot(thr_intensity, thr_probability, 'x', color='r', markersize=10) - plt.xlabel('voxel intensity') - plt.ylabel('voxel probability') - plt.title('intensity-probability plot with threshold (red)') + plt.plot(thr_intensity, thr_probability, "x", color="r", markersize=10) + plt.xlabel("voxel intensity") + plt.ylabel("voxel probability") + plt.title("intensity-probability plot with threshold (red)") - plt.savefig(os.path.join(dir_plot, 'histogram.png')) + plt.savefig(os.path.join(dir_plot, "histogram.png")) plt.close(fig) # remove all disconnected components less than 5000 cubic microliters in size @@ -205,33 +216,42 @@ def add_volume(tuple_name_volume): volume_mask = morphology.remove_small_objects(volume_mask, thr_size) # compute background fluorescence - background = np.median(volume_mean[volume_mask==0]) + background = np.median(volume_mean[volume_mask == 0]) # save brain mask figures for i in range(lz): fig = plt.figure(1, (18, 6)) plt.subplot(131) - plt.imshow(volume_mean[:, :, i].T, vmin=voxel_intensity[0], vmax=voxel_intensity[-1]) - plt.title('volume intensity (plane %d)'%(i)) + plt.imshow( + volume_mean[:, :, i].T, + vmin=voxel_intensity[0], + vmax=voxel_intensity[-1], + ) + plt.title("volume intensity (plane %d)" % (i)) plt.subplot(132) plt.imshow(volume_mask[:, :, i].T) - plt.title('volume mask (plane %d)'%(i)) + plt.title("volume mask (plane %d)" % (i)) plt.subplot(133) - img = np.stack((volume_mean[:, :, i], volume_mask[:, :, i], volume_mask[:, :, i]), axis=2) - img[:, :, 0] = (img[:, :, 0] - voxel_intensity[0]) / (voxel_intensity[-1] - voxel_intensity[0]) + img = np.stack( + (volume_mean[:, :, i], volume_mask[:, :, i], volume_mask[:, :, i]), + axis=2, + ) + img[:, :, 0] = (img[:, :, 0] - voxel_intensity[0]) / ( + voxel_intensity[-1] - voxel_intensity[0] + ) img[:, :, 0] = np.minimum(np.maximum(img[:, :, 0], 0), 1) plt.imshow(np.transpose(img, [1, 0, 2])) - plt.title('volume mask/intensity overlay (plane %d)'%(i)) + plt.title("volume mask/intensity overlay (plane %d)" % (i)) - plt.savefig(os.path.join(dir_plot, 'mask_z%03d.png'%(i))) + plt.savefig(os.path.join(dir_plot, "mask_z%03d.png" % (i))) plt.close(fig) - with h5py.File(fullname_volmean+hdf, 'w') as file_handle: - file_handle['volume_mask'] = volume_mask.T - file_handle['volume_mean'] = volume_mean.T - file_handle['volume_peak'] = volume_peak.T - file_handle['thr_intensity'] = thr_intensity - file_handle['thr_probability'] = thr_probability - file_handle['background'] = background + with h5py.File(fullname_volmean + hdf, "w") as file_handle: + file_handle["volume_mask"] = volume_mask.T + file_handle["volume_mean"] = volume_mean.T + file_handle["volume_peak"] = volume_peak.T + file_handle["thr_intensity"] = thr_intensity + file_handle["thr_probability"] = thr_probability + file_handle["background"] = background diff --git a/voluseg/_steps/step4.py b/voluseg/_steps/step4.py index d7c4555..14715cf 100755 --- a/voluseg/_steps/step4.py +++ b/voluseg/_steps/step4.py @@ -1,5 +1,5 @@ def detect_cells(parameters): - '''detect cells in volumes''' + """detect cells in volumes""" import os import h5py @@ -16,6 +16,7 @@ def detect_cells(parameters): # set up spark from pyspark.sql.session import SparkSession + spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext @@ -24,31 +25,31 @@ def detect_cells(parameters): ball_diam, ball_diam_xyz0 = ball(1.0 * p.diam_cell, p.affine_mat) # load timepoints - fullname_timemean = os.path.join(p.dir_output, 'mean_timeseries') - with h5py.File(fullname_timemean+hdf, 'r') as file_handle: - timepoints = file_handle['timepoints'][()] + fullname_timemean = os.path.join(p.dir_output, "mean_timeseries") + with h5py.File(fullname_timemean + hdf, "r") as file_handle: + timepoints = file_handle["timepoints"][()] # load plane filename for color_i in range(p.n_colors): - fullname_cells = os.path.join(p.dir_output, 'cells%s_clean'%(color_i)) - if os.path.isfile(fullname_cells+hdf): + fullname_cells = os.path.join(p.dir_output, "cells%s_clean" % (color_i)) + if os.path.isfile(fullname_cells + hdf): continue - dir_cell = os.path.join(p.dir_output, 'cells', str(color_i)) + dir_cell = os.path.join(p.dir_output, "cells", str(color_i)) os.makedirs(dir_cell, exist_ok=True) - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - with h5py.File(fullname_volmean+hdf, 'r') as file_handle: - volume_mean = file_handle['volume_mean'][()].T - volume_mask = file_handle['volume_mask'][()].T - volume_peak = file_handle['volume_peak'][()].T - if 'n_blocks' in file_handle.keys(): + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + with h5py.File(fullname_volmean + hdf, "r") as file_handle: + volume_mean = file_handle["volume_mean"][()].T + volume_mask = file_handle["volume_mask"][()].T + volume_peak = file_handle["volume_peak"][()].T + if "n_blocks" in file_handle.keys(): flag = 0 - n_voxels_cell = file_handle['n_voxels_cell'][()] - n_blocks = file_handle['n_blocks'][()] - block_valids = file_handle['block_valids'][()] - xyz0 = file_handle['block_xyz0'][()] - xyz1 = file_handle['block_xyz1'][()] + n_voxels_cell = file_handle["n_voxels_cell"][()] + n_blocks = file_handle["n_blocks"][()] + block_valids = file_handle["block_valids"][()] + xyz0 = file_handle["block_xyz0"][()] + xyz1 = file_handle["block_xyz1"][()] else: flag = 1 @@ -68,104 +69,162 @@ def detect_cells(parameters): # get number of voxels in cell if (lz == 1) or (rz >= p.diam_cell): # area of a circle - n_voxels_cell = np.pi * ((p.diam_cell / 2.0)**2) / (rx * ry) + n_voxels_cell = np.pi * ((p.diam_cell / 2.0) ** 2) / (rx * ry) else: # volume of a cylinder (change to sphere later) - n_voxels_cell = p.diam_cell * np.pi * ((p.diam_cell / 2.0)**2) / (rx * ry * rz) + n_voxels_cell = ( + p.diam_cell * np.pi * ((p.diam_cell / 2.0) ** 2) / (rx * ry * rz) + ) n_voxels_cell = np.round(n_voxels_cell).astype(int) # get number of voxels in each cell - n_blocks, block_valids, xyz0, xyz1 = \ - define_blocks(lx, ly, lz, p.n_cells_block, n_voxels_cell, volume_mask, volume_peak) + n_blocks, block_valids, xyz0, xyz1 = define_blocks( + lx, ly, lz, p.n_cells_block, n_voxels_cell, volume_mask, volume_peak + ) # save number and indices of blocks - with h5py.File(fullname_volmean+hdf, 'r+') as file_handle: - file_handle['n_voxels_cell'] = n_voxels_cell - file_handle['n_blocks'] = n_blocks - file_handle['block_valids'] = block_valids - file_handle['block_xyz0'] = xyz0 - file_handle['block_xyz1'] = xyz1 + with h5py.File(fullname_volmean + hdf, "r+") as file_handle: + file_handle["n_voxels_cell"] = n_voxels_cell + file_handle["n_blocks"] = n_blocks + file_handle["block_valids"] = block_valids + file_handle["block_xyz0"] = xyz0 + file_handle["block_xyz1"] = xyz1 - print('number of blocks, total: %d.'%(block_valids.sum())) + print("number of blocks, total: %d." % (block_valids.sum())) for ii in np.where(block_valids)[0]: try: - fullname_block = os.path.join(dir_cell, 'block%05d'%(ii)) - with h5py.File(fullname_block+hdf, 'r') as file_handle: - if ('completion' in file_handle.keys()) and file_handle['completion'][()]: + fullname_block = os.path.join(dir_cell, "block%05d" % (ii)) + with h5py.File(fullname_block + hdf, "r") as file_handle: + if ("completion" in file_handle.keys()) and file_handle[ + "completion" + ][()]: block_valids[ii] = 0 except (NameError, OSError): pass - print('number of blocks, remaining: %d.'%(block_valids.sum())) + print("number of blocks, remaining: %d." % (block_valids.sum())) ix = np.where(block_valids)[0] block_ixyz01 = list(zip(ix, xyz0[ix], xyz1[ix])) # detect individual cells with sparse nnmf algorithm def detect_cells_block(tuple_i_xyz0_xyz1): import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np ii, xyz0, xyz1 = tuple_i_xyz0_xyz1[1] - voxel_xyz, voxel_timeseries, peak_idx, voxel_similarity_peak = \ - process_block_data(xyz0, xyz1, parameters, color_i, lxyz, rxyz, - ball_diam, bvolume_mean, bvolume_peak, timepoints) - - n_voxels_block = len(voxel_xyz) # number of voxels in block - - voxel_fraction_peak = np.argsort(((voxel_timeseries[peak_idx])**2).mean(1)) / len(peak_idx) + voxel_xyz, voxel_timeseries, peak_idx, voxel_similarity_peak = ( + process_block_data( + xyz0, + xyz1, + parameters, + color_i, + lxyz, + rxyz, + ball_diam, + bvolume_mean, + bvolume_peak, + timepoints, + ) + ) + + n_voxels_block = len(voxel_xyz) # number of voxels in block + + voxel_fraction_peak = np.argsort( + ((voxel_timeseries[peak_idx]) ** 2).mean(1) + ) / len(peak_idx) for fraction in np.r_[1:0:-0.05]: try: - peak_valids = (voxel_fraction_peak >= (1 - fraction)) # valid voxel indices + peak_valids = voxel_fraction_peak >= ( + 1 - fraction + ) # valid voxel indices - n_cells = np.round(peak_valids.sum() / (0.5 * n_voxels_cell)).astype(int) + n_cells = np.round( + peak_valids.sum() / (0.5 * n_voxels_cell) + ).astype(int) print((fraction, n_cells)) tic = time.time() - voxel_timeseries_valid, voxel_xyz_valid, cell_weight_init_valid, \ - cell_neighborhood_valid, cell_sparseness = initialize_block_cells( - n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, voxel_timeseries, peak_idx, - peak_valids, voxel_similarity_peak, lxyz, rxyz, ball_diam, ball_diam_xyz0) - print('cell initialization: %.1f minutes.\n'%((time.time() - tic) / 60)) + ( + voxel_timeseries_valid, + voxel_xyz_valid, + cell_weight_init_valid, + cell_neighborhood_valid, + cell_sparseness, + ) = initialize_block_cells( + n_voxels_cell, + n_voxels_block, + n_cells, + voxel_xyz, + voxel_timeseries, + peak_idx, + peak_valids, + voxel_similarity_peak, + lxyz, + rxyz, + ball_diam, + ball_diam_xyz0, + ) + print( + "cell initialization: %.1f minutes.\n" + % ((time.time() - tic) / 60) + ) tic = time.time() cell_weights_valid, cell_timeseries_valid, d = nnmf_sparse( - voxel_timeseries_valid, voxel_xyz_valid, cell_weight_init_valid, - cell_neighborhood_valid, cell_sparseness, timepoints=timepoints, - miniter=10, maxiter=100, tolfun=1e-3) + voxel_timeseries_valid, + voxel_xyz_valid, + cell_weight_init_valid, + cell_neighborhood_valid, + cell_sparseness, + timepoints=timepoints, + miniter=10, + maxiter=100, + tolfun=1e-3, + ) success = 1 - print('cell factorization: %.1f minutes.\n'%((time.time() - tic) / 60)) + print( + "cell factorization: %.1f minutes.\n" + % ((time.time() - tic) / 60) + ) break except ValueError as msg: - print('retrying factorization of block %d: %s'%(ii, msg)) + print("retrying factorization of block %d: %s" % (ii, msg)) success = 0 # get cell positions and timeseries, and save cell data - fullname_block = os.path.join(dir_cell, 'block%05d'%(ii)) - with h5py.File(fullname_block+hdf, 'w') as file_handle: + fullname_block = os.path.join(dir_cell, "block%05d" % (ii)) + with h5py.File(fullname_block + hdf, "w") as file_handle: if success: for ci in range(n_cells): ix = cell_weights_valid[:, ci] > 0 xyzi = voxel_xyz_valid[ix] wi = cell_weights_valid[ix, ci] - bi = np.sum(wi * bvolume_mean.value[tuple(zip(*xyzi))]) / np.sum(wi) - ti = bi * cell_timeseries_valid[ci] / np.mean(cell_timeseries_valid[ci]) - - file_handle['/cell/%05d/xyz'%(ci)] = xyzi - file_handle['/cell/%05d/weights'%(ci)] = wi - file_handle['/cell/%05d/timeseries'%(ci)] = ti - - file_handle['n_cells'] = n_cells - file_handle['completion'] = 1 + bi = np.sum( + wi * bvolume_mean.value[tuple(zip(*xyzi))] + ) / np.sum(wi) + ti = ( + bi + * cell_timeseries_valid[ci] + / np.mean(cell_timeseries_valid[ci]) + ) + + file_handle["/cell/%05d/xyz" % (ci)] = xyzi + file_handle["/cell/%05d/weights" % (ci)] = wi + file_handle["/cell/%05d/timeseries" % (ci)] = ti + + file_handle["n_cells"] = n_cells + file_handle["completion"] = 1 if block_valids.any(): evenly_parallelize(block_ixyz01).foreach(detect_cells_block) diff --git a/voluseg/_steps/step4a.py b/voluseg/_steps/step4a.py index 4aad1d0..c5676b2 100755 --- a/voluseg/_steps/step4a.py +++ b/voluseg/_steps/step4a.py @@ -1,13 +1,14 @@ def define_blocks(lx, ly, lz, n_cells_block, n_voxels_cell, volume_mask, volume_peak): - '''get coordinates of individual blocks''' + """get coordinates of individual blocks""" import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np # get initial estimate for the number of blocks @@ -19,10 +20,10 @@ def define_blocks(lx, ly, lz, n_cells_block, n_voxels_cell, volume_mask, volume_ z0_range = np.array([0, 1]) else: part_block = np.round(np.cbrt(n_blocks)).astype(int) - z0_range = np.unique(np.round(np.linspace(0, lz, part_block+1)).astype(int)) + z0_range = np.unique(np.round(np.linspace(0, lz, part_block + 1)).astype(int)) - x0_range = np.unique(np.round(np.linspace(0, lx, part_block+1)).astype(int)) - y0_range = np.unique(np.round(np.linspace(0, ly, part_block+1)).astype(int)) + x0_range = np.unique(np.round(np.linspace(0, lx, part_block + 1)).astype(int)) + y0_range = np.unique(np.round(np.linspace(0, ly, part_block + 1)).astype(int)) ijk, xyz = [], [] for i, x0 in enumerate(x0_range): @@ -32,15 +33,15 @@ def define_blocks(lx, ly, lz, n_cells_block, n_voxels_cell, volume_mask, volume_ xyz.append([x0, y0, z0]) dim = np.max(ijk, 0) - XYZ = np.zeros(np.r_[dim+1, 3], dtype=int) + XYZ = np.zeros(np.r_[dim + 1, 3], dtype=int) XYZ[tuple(zip(*ijk))] = xyz xyz0, xyz1 = [], [] for i in range(dim[0]): for j in range(dim[1]): for k in range(dim[2]): - xyz0.append(XYZ[i , j , k]) - xyz1.append(XYZ[i+1, j+1, k+1]) + xyz0.append(XYZ[i, j, k]) + xyz1.append(XYZ[i + 1, j + 1, k + 1]) # get final block number and coordinates xyz0 = np.array(xyz0) @@ -54,8 +55,8 @@ def define_blocks(lx, ly, lz, n_cells_block, n_voxels_cell, volume_mask, volume_ # get indices of masked blocks block_valids = np.ones(n_blocks, dtype=bool) for i in range(n_blocks): - mask_i = volume_mask[x0[i]:x1[i], y0[i]:y1[i], z0[i]:z1[i]] - peak_i = volume_peak[x0[i]:x1[i], y0[i]:y1[i], z0[i]:z1[i]] + mask_i = volume_mask[x0[i] : x1[i], y0[i] : y1[i], z0[i] : z1[i]] + peak_i = volume_peak[x0[i] : x1[i], y0[i] : y1[i], z0[i] : z1[i]] block_valids[i] = np.any(np.logical_and(mask_i, peak_i)) return n_blocks, block_valids, xyz0, xyz1 diff --git a/voluseg/_steps/step4b.py b/voluseg/_steps/step4b.py index 9f76203..0df23a9 100644 --- a/voluseg/_steps/step4b.py +++ b/voluseg/_steps/step4b.py @@ -1,14 +1,25 @@ -def process_block_data(xyz0, xyz1, parameters, color_i, lxyz, rxyz, - ball_diam, bvolume_mean, bvolume_peak, timepoints): - '''load timeseries in individual blocks, slice-time correct, and find similar timeseries''' +def process_block_data( + xyz0, + xyz1, + parameters, + color_i, + lxyz, + rxyz, + ball_diam, + bvolume_mean, + bvolume_peak, + timepoints, +): + """load timeseries in individual blocks, slice-time correct, and find similar timeseries""" import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np import h5py @@ -27,38 +38,41 @@ def process_block_data(xyz0, xyz1, parameters, color_i, lxyz, rxyz, voxel_peak = np.zeros_like(bvolume_peak.value) voxel_peak[x0_:x1_, y0_:y1_, z0_:z1_] = 1 voxel_peak = voxel_peak & bvolume_peak.value & (bvolume_mean.value > 0) - voxel_mask = morphology.binary_dilation(voxel_peak, ball_diam) & (bvolume_mean.value > 0) + voxel_mask = morphology.binary_dilation(voxel_peak, ball_diam) & ( + bvolume_mean.value > 0 + ) voxel_xyz = np.argwhere(voxel_mask) voxel_xyz_peak = np.argwhere(voxel_peak) peak_idx = np.argwhere(voxel_peak[voxel_mask]).T[0] tic = time.time() - dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i)) + dir_volume = os.path.join(p.dir_output, "volumes", str(color_i)) x0, y0, z0 = voxel_xyz.min(0) x1, y1, z1 = voxel_xyz.max(0) + 1 voxel_timeseries_block = [None] * p.lt for ti, name_volume in enumerate(p.volume_names): fullname_volume = os.path.join(dir_volume, name_volume) - with h5py.File(fullname_volume+ali+hdf, 'r') as file_handle: - voxel_timeseries_block[ti] = file_handle['volume'][z0:z1, y0:y1, x0:x1].T + with h5py.File(fullname_volume + ali + hdf, "r") as file_handle: + voxel_timeseries_block[ti] = file_handle["volume"][z0:z1, y0:y1, x0:x1].T voxel_timeseries_block = np.transpose(voxel_timeseries_block, (1, 2, 3, 0)) voxel_timeseries = voxel_timeseries_block[voxel_mask[x0:x1, y0:y1, z0:z1]] voxel_timeseries = voxel_timeseries.astype(dtype) del voxel_timeseries_block - print('data loading: %.1f minutes.\n'%((time.time() - tic) / 60)) + print("data loading: %.1f minutes.\n" % ((time.time() - tic) / 60)) # slice-time correct if more than one slice and t_section is positive if (lz > 1) and (p.t_section > 0): for i, zi in enumerate(voxel_xyz[:, 2]): # get timepoints of midpoint and zi plane for interpolation - timepoints_zi = np.arange(p.lt) / p.f_volume + zi * p.t_section + timepoints_zi = np.arange(p.lt) / p.f_volume + zi * p.t_section timepoints_zm = np.arange(p.lt) / p.f_volume + (lz / 2) * p.t_section # make spline interpolator and interpolate timeseries - spline_interpolator_xyzi = \ - interpolate.InterpolatedUnivariateSpline(timepoints_zi, voxel_timeseries[i]) + spline_interpolator_xyzi = interpolate.InterpolatedUnivariateSpline( + timepoints_zi, voxel_timeseries[i] + ) voxel_timeseries[i] = spline_interpolator_xyzi(timepoints_zm) def normalize(timeseries): @@ -68,19 +82,21 @@ def normalize(timeseries): # get voxel connectivity from proximities (distances) and similarities (correlations) voxel_xyz_phys_peak = voxel_xyz_peak * rxyz - voxel_timeseries_peak_nrm = normalize(voxel_timeseries[np.ix_(peak_idx, timepoints)]) + voxel_timeseries_peak_nrm = normalize( + voxel_timeseries[np.ix_(peak_idx, timepoints)] + ) # compute voxel peak similarity: combination of high proximity and high correlation tic = time.time() n_peaks = len(peak_idx) voxel_similarity_peak = np.zeros((n_peaks, n_peaks), dtype=bool) for i in range(n_peaks): - dist_i = (((voxel_xyz_phys_peak[i] - voxel_xyz_phys_peak)**2).sum(1))**0.5 + dist_i = (((voxel_xyz_phys_peak[i] - voxel_xyz_phys_peak) ** 2).sum(1)) ** 0.5 neib_i = dist_i < p.diam_cell corr_i = np.dot(voxel_timeseries_peak_nrm[i], voxel_timeseries_peak_nrm.T) voxel_similarity_peak[i] = neib_i & (corr_i > np.median(corr_i[neib_i])) voxel_similarity_peak = voxel_similarity_peak | voxel_similarity_peak.T - print('voxel similarity: %.1f minutes.\n'%((time.time() - tic) / 60)) + print("voxel similarity: %.1f minutes.\n" % ((time.time() - tic) / 60)) return (voxel_xyz, voxel_timeseries, peak_idx, voxel_similarity_peak) diff --git a/voluseg/_steps/step4c.py b/voluseg/_steps/step4c.py index 1710056..99323a4 100644 --- a/voluseg/_steps/step4c.py +++ b/voluseg/_steps/step4c.py @@ -1,15 +1,27 @@ -def initialize_block_cells(n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, - voxel_timeseries, peak_idx, peak_valids, voxel_similarity_peak, - lxyz, rxyz, ball_diam, ball_diam_xyz0): - '''initialize cell positions in individual blocks''' +def initialize_block_cells( + n_voxels_cell, + n_voxels_block, + n_cells, + voxel_xyz, + voxel_timeseries, + peak_idx, + peak_valids, + voxel_similarity_peak, + lxyz, + rxyz, + ball_diam, + ball_diam_xyz0, +): + """initialize cell positions in individual blocks""" import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np from sklearn import cluster @@ -21,12 +33,13 @@ def initialize_block_cells(n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, voxel_xyz_phys_peak_valid = voxel_xyz_peak_valid * rxyz # cluster valid voxels of peaks by distance and similarity - cell_clusters = \ - cluster.AgglomerativeClustering( - n_clusters=n_cells, - connectivity=voxel_similarity_peak[np.ix_(peak_valids,peak_valids)], - linkage='ward')\ - .fit(voxel_xyz_phys_peak_valid) # physical location of voxels + cell_clusters = cluster.AgglomerativeClustering( + n_clusters=n_cells, + connectivity=voxel_similarity_peak[np.ix_(peak_valids, peak_valids)], + linkage="ward", + ).fit( + voxel_xyz_phys_peak_valid + ) # physical location of voxels cell_labels_peak_valid = cell_clusters.labels_ # initialize weights for n_cells and background @@ -35,18 +48,20 @@ def initialize_block_cells(n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, cell_sparseness = np.zeros(n_cells + 1) for cell_i in range(n_cells): # initialize cells with binary weights - cell_weight_init[peak_idx_valid, cell_i] = (cell_labels_peak_valid == cell_i) + cell_weight_init[peak_idx_valid, cell_i] = cell_labels_peak_valid == cell_i # compute cell centroid cell_idx = np.argwhere(cell_labels_peak_valid == cell_i).T[0] cell_xyz_phys = voxel_xyz_phys_peak_valid[cell_idx] cell_dist = np.zeros(len(cell_xyz_phys)) for j, voxel_xyz_phys in enumerate(cell_xyz_phys): - cell_dist[j] = ((((voxel_xyz_phys - cell_xyz_phys)**2).sum(1))**0.5).sum() + cell_dist[j] = ( + (((voxel_xyz_phys - cell_xyz_phys) ** 2).sum(1)) ** 0.5 + ).sum() cell_xyz0 = voxel_xyz_peak_valid[cell_idx[np.argmin(cell_dist)]] # find neighborhood voxels - for voxel_xyz_neib in (cell_xyz0 - ball_diam_xyz0 + np.argwhere(ball_diam)): + for voxel_xyz_neib in cell_xyz0 - ball_diam_xyz0 + np.argwhere(ball_diam): cell_neighborhood[(voxel_xyz == voxel_xyz_neib).all(1), cell_i] = 1 # make cell sparseness @@ -66,5 +81,10 @@ def initialize_block_cells(n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, cell_neighborhood_valid[:, -1] = 1 cell_sparseness[-1] = 0 - return voxel_timeseries_valid, voxel_xyz_valid, \ - cell_weight_init_valid, cell_neighborhood_valid, cell_sparseness + return ( + voxel_timeseries_valid, + voxel_xyz_valid, + cell_weight_init_valid, + cell_neighborhood_valid, + cell_sparseness, + ) diff --git a/voluseg/_steps/step4d.py b/voluseg/_steps/step4d.py index bf90642..fe52e44 100644 --- a/voluseg/_steps/step4d.py +++ b/voluseg/_steps/step4d.py @@ -1,21 +1,33 @@ -def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, - timeseries_mean=1.0, timepoints=None, verbosity=1): - ''' +def nnmf_sparse( + V0, + XYZ0, + W0, + B0, + S0, + tolfun=1e-4, + miniter=10, + maxiter=100, + timeseries_mean=1.0, + timepoints=None, + verbosity=1, +): + """ cell detection via nonnegative matrix factorization with sparseness projection V0 = voxel_timeseries_valid XYZ0 = voxel_xyz_valid W0 = cell_weight_init_valid B0 = cell_neighborhood_valid S0 = cell_sparseness - ''' + """ import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np from scipy import stats @@ -24,12 +36,12 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, from voluseg._tools.sparseness_projection import sparseness_projection # CAUTION: variable is modified in-place to save memory - V0 *= (timeseries_mean / V0.mean(1)[:, None]) # normalize voxel timeseries + V0 *= timeseries_mean / V0.mean(1)[:, None] # normalize voxel timeseries if timepoints is not None: - V = V0[:, timepoints].astype(float) # copy input signal + V = V0[:, timepoints].astype(float) # copy input signal else: - V = V0.astype(float) # copy input signal + V = V0.astype(float) # copy input signal XYZ = XYZ0.astype(int) W = W0.astype(float) @@ -37,22 +49,22 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, S = S0.copy() # get dimensions - n, t = V.shape + n, t = V.shape n_, c = W.shape assert n_ == n - H = np.zeros((c, t)) # zero timeseries array - dnorm_prev = np.full(2, np.inf) # last two d-norms + H = np.zeros((c, t)) # zero timeseries array + dnorm_prev = np.full(2, np.inf) # last two d-norms for ii in range(maxiter): # save current states H_ = H.copy() # Alternate least squares with regularization H = np.maximum(linalg.lstsq(W, V)[0], 0) - H *= (timeseries_mean / H.mean(1)[:, None]) # normalize component timeseries + H *= timeseries_mean / H.mean(1)[:, None] # normalize component timeseries W = np.maximum(linalg.lstsq(V.T, H.T)[0], 0) - W[np.logical_not(B)] = 0 # restrict component boundaries + W[np.logical_not(B)] = 0 # restrict component boundaries for ci in range(c): W_ci = W[B[:, ci], ci] if np.any(W_ci) and (S[ci] > 0): @@ -66,7 +78,7 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, L_ci = np.zeros(np.ptp(XYZ_ci, 0) + 1, dtype=bool) L_ci[tuple(zip(*XYZ_ci))] = W_ci > 0 L_ci = measure.label(L_ci, connectivity=3) - lci_mode = stats.mode(L_ci[L_ci>0]).mode + lci_mode = stats.mode(L_ci[L_ci > 0]).mode # backwards compatibility with old scipy behavior if not np.isscalar(lci_mode): lci_mode = lci_mode[0] @@ -76,9 +88,9 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, # Get norm of difference and check for convergence dnorm = np.sqrt(np.mean(np.square(V - W.dot(H)))) / timeseries_mean - diffh = np.sqrt(np.mean(np.square(H - H_ ))) / timeseries_mean + diffh = np.sqrt(np.mean(np.square(H - H_))) / timeseries_mean if ((dnorm_prev.max(0) - dnorm) < tolfun) & (diffh < tolfun): - if (ii >= miniter): + if ii >= miniter: break dnorm_prev[1] = dnorm_prev[0] dnorm_prev[0] = dnorm @@ -88,6 +100,6 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, # Perform final regression on full input timeseries H = np.maximum(linalg.lstsq(W, V0)[0], 0) - H *= (timeseries_mean / H.mean(1)[:, None]) # normalize component timeseries + H *= timeseries_mean / H.mean(1)[:, None] # normalize component timeseries return (W, H, dnorm) diff --git a/voluseg/_steps/step4e.py b/voluseg/_steps/step4e.py index 9a576d4..34f03d5 100644 --- a/voluseg/_steps/step4e.py +++ b/voluseg/_steps/step4e.py @@ -1,5 +1,5 @@ def collect_blocks(color_i, parameters): - '''collect cells across all blocks''' + """collect cells across all blocks""" import os import h5py @@ -11,19 +11,20 @@ def collect_blocks(color_i, parameters): # set up spark import pyspark from pyspark.sql.session import SparkSession + spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext p = SimpleNamespace(**parameters) - dir_cell = os.path.join(p.dir_output, 'cells', str(color_i)) + dir_cell = os.path.join(p.dir_output, "cells", str(color_i)) - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - with h5py.File(fullname_volmean+hdf, 'r') as file_handle: - block_valids = file_handle['block_valids'][()] + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + with h5py.File(fullname_volmean + hdf, "r") as file_handle: + block_valids = file_handle["block_valids"][()] class accum_data(pyspark.accumulators.AccumulatorParam): - '''define accumulator class''' + """define accumulator class""" def zero(self, val0): return [[]] * 4 @@ -43,18 +44,20 @@ def add_data(tuple_ii): cell_weights = [] cell_timeseries = [] try: - fullname_block = os.path.join(dir_cell, 'block%05d'%(ii)) - with h5py.File(fullname_block+hdf, 'r') as file_handle: - for ci in range(file_handle['n_cells'][()]): - cell_xyz.append(file_handle['/cell/%05d/xyz'%(ci)][()]) - cell_weights.append(file_handle['/cell/%05d/weights'%(ci)][()]) - cell_timeseries.append(file_handle['/cell/%05d/timeseries'%(ci)][()]) + fullname_block = os.path.join(dir_cell, "block%05d" % (ii)) + with h5py.File(fullname_block + hdf, "r") as file_handle: + for ci in range(file_handle["n_cells"][()]): + cell_xyz.append(file_handle["/cell/%05d/xyz" % (ci)][()]) + cell_weights.append(file_handle["/cell/%05d/weights" % (ci)][()]) + cell_timeseries.append( + file_handle["/cell/%05d/timeseries" % (ci)][()] + ) cell_block_id.append(ii) except KeyError: - print('block %d is empty.'%ii) + print("block %d is empty." % ii) except IOError: - print('block %d does not exist.'%ii) + print("block %d does not exist." % ii) if p.parallel_clean: cell_data.add([cell_block_id, cell_xyz, cell_weights, cell_timeseries]) @@ -66,8 +69,10 @@ def add_data(tuple_ii): cell_block_id, cell_xyz, cell_weights, cell_timeseries = cell_data.value else: idx_block_valids = np.argwhere(block_valids).T[0] - valids_tuple = zip([[]]*len(idx_block_valids), idx_block_valids) - cell_block_id, cell_xyz, cell_weights, cell_timeseries = list(zip(*map(add_data, valids_tuple))) + valids_tuple = zip([[]] * len(idx_block_valids), idx_block_valids) + cell_block_id, cell_xyz, cell_weights, cell_timeseries = list( + zip(*map(add_data, valids_tuple)) + ) cell_block_id = [ii for bi in cell_block_id for ii in bi] cell_xyz = [xyzi for ci in cell_xyz for xyzi in ci] cell_weights = [wi for ci in cell_weights for wi in ci] @@ -84,4 +89,10 @@ def add_data(tuple_ii): cell_weights_array[ci, :li] = cell_weights[ci] cell_timeseries_array = np.array(cell_timeseries) - return cell_block_id, cell_xyz_array, cell_weights_array, cell_timeseries_array, cell_lengths + return ( + cell_block_id, + cell_xyz_array, + cell_weights_array, + cell_timeseries_array, + cell_lengths, + ) diff --git a/voluseg/_steps/step5.py b/voluseg/_steps/step5.py index 8c38271..c0b997c 100755 --- a/voluseg/_steps/step5.py +++ b/voluseg/_steps/step5.py @@ -1,5 +1,5 @@ def clean_cells(parameters): - '''remove noise cells, detrend and detect baseline''' + """remove noise cells, detrend and detect baseline""" import os import h5py @@ -14,6 +14,7 @@ def clean_cells(parameters): # set up spark from pyspark.sql.session import SparkSession + spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext @@ -22,17 +23,18 @@ def clean_cells(parameters): thr_similarity = 0.5 for color_i in range(p.n_colors): - fullname_cells = os.path.join(p.dir_output, 'cells%s_clean'%(color_i)) - if os.path.isfile(fullname_cells+hdf): + fullname_cells = os.path.join(p.dir_output, "cells%s_clean" % (color_i)) + if os.path.isfile(fullname_cells + hdf): continue - cell_block_id, cell_xyz, cell_weights, cell_timeseries, cell_lengths = \ + cell_block_id, cell_xyz, cell_weights, cell_timeseries, cell_lengths = ( collect_blocks(color_i, parameters) + ) - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - with h5py.File(fullname_volmean+hdf, 'r') as file_handle: - volume_mask = file_handle['volume_mask'][()].T - volume_mean = file_handle['volume_mean'][()].T + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + with h5py.File(fullname_volmean + hdf, "r") as file_handle: + volume_mask = file_handle["volume_mask"][()].T + volume_mean = file_handle["volume_mean"][()].T x, y, z = volume_mask.shape cell_x = cell_xyz[:, :, 0] @@ -42,27 +44,33 @@ def clean_cells(parameters): ix = np.any(np.isnan(cell_timeseries), 1) if np.any(ix): - print('nans (to be removed): %d'%np.count_nonzero(ix)) + print("nans (to be removed): %d" % np.count_nonzero(ix)) cell_timeseries[ix] = 0 # keep cells that exceed threshold cell_valids = np.zeros(len(cell_w), dtype=bool) for i, (li, xi, yi, zi) in enumerate(zip(cell_lengths, cell_x, cell_y, cell_z)): if (p.thr_mask > 0) and (p.thr_mask <= 1): - cell_valids[i] = np.mean(volume_mask[xi[:li], yi[:li], zi[:li]]) > p.thr_mask + cell_valids[i] = ( + np.mean(volume_mask[xi[:li], yi[:li], zi[:li]]) > p.thr_mask + ) elif p.thr_mask > 1: - cell_valids[i] = np.mean(volume_mean[xi[:li], yi[:li], zi[:li]]) > p.thr_mask + cell_valids[i] = ( + np.mean(volume_mean[xi[:li], yi[:li], zi[:li]]) > p.thr_mask + ) # brain mask array volume_list = [[[[] for zi in range(z)] for yi in range(y)] for xi in range(x)] - volume_cell_n = np.zeros((x, y, z), dtype='int') + volume_cell_n = np.zeros((x, y, z), dtype="int") for i, (li, vi) in enumerate(zip(cell_lengths, cell_valids)): for j in range(li if vi else 0): xij, yij, zij = cell_x[i, j], cell_y[i, j], cell_z[i, j] volume_list[xij][yij][zij].append(i) volume_cell_n[xij, yij, zij] += 1 - pair_cells = [pi for a in volume_list for b in a for c in b for pi in combinations(c, 2)] + pair_cells = [ + pi for a in volume_list for b in a for c in b for pi in combinations(c, 2) + ] assert len(pair_cells) == np.sum(volume_cell_n * (volume_cell_n - 1) / 2) # remove duplicate cells @@ -70,7 +78,7 @@ def clean_cells(parameters): for pi, fi in zip(pair_id, pair_count): pair_overlap = (fi / np.mean(cell_lengths[pi])) > thr_similarity pair_correlation = np.corrcoef(cell_timeseries[pi])[0, 1] > thr_similarity - if (pair_overlap and pair_correlation): + if pair_overlap and pair_correlation: cell_valids[pi[np.argmin(cell_w[pi])]] = 0 ## get valid version of cells @@ -89,14 +97,15 @@ def clean_cells(parameters): def get_timebase(timeseries_tuple): timeseries = timeseries_tuple[1] return clean_signal(bparameters.value, timeseries) + if p.parallel_clean: - print('Computing baseline in parallel mode... ', end='') + print("Computing baseline in parallel mode... ", end="") timebase = evenly_parallelize(cell_timeseries).map(get_timebase).collect() else: - print('Computing baseline in serial mode... ', end='') - timeseries_tuple = zip([[]]*len(cell_timeseries), cell_timeseries) + print("Computing baseline in serial mode... ", end="") + timeseries_tuple = zip([[]] * len(cell_timeseries), cell_timeseries) timebase = map(get_timebase, timeseries_tuple) - print('done.') + print("done.") cell_timeseries1, cell_baseline1 = list(zip(*timebase)) @@ -120,38 +129,38 @@ def get_timebase(timeseries_tuple): volume_id[xij, yij, zij] = i volume_weight[xij, yij, zij] = cell_weights[i, j] - with h5py.File(fullname_volmean+hdf, 'r') as file_handle: - background = file_handle['background'][()] - - with h5py.File(fullname_cells+hdf, 'w') as file_handle: - file_handle['n'] = n - file_handle['t'] = p.lt - file_handle['x'] = x - file_handle['y'] = y - file_handle['z'] = z - file_handle['cell_x'] = cell_x - file_handle['cell_y'] = cell_y - file_handle['cell_z'] = cell_z - file_handle['cell_block_id'] = cell_block_id - file_handle['volume_id'] = volume_id - file_handle['volume_weight'] = volume_weight - file_handle['cell_weights'] = cell_weights - file_handle['cell_timeseries_raw'] = cell_timeseries - file_handle['cell_timeseries'] = cell_timeseries1 - file_handle['cell_baseline'] = cell_baseline1 - file_handle['background'] = background + with h5py.File(fullname_volmean + hdf, "r") as file_handle: + background = file_handle["background"][()] + + with h5py.File(fullname_cells + hdf, "w") as file_handle: + file_handle["n"] = n + file_handle["t"] = p.lt + file_handle["x"] = x + file_handle["y"] = y + file_handle["z"] = z + file_handle["cell_x"] = cell_x + file_handle["cell_y"] = cell_y + file_handle["cell_z"] = cell_z + file_handle["cell_block_id"] = cell_block_id + file_handle["volume_id"] = volume_id + file_handle["volume_weight"] = volume_weight + file_handle["cell_weights"] = cell_weights + file_handle["cell_timeseries_raw"] = cell_timeseries + file_handle["cell_timeseries"] = cell_timeseries1 + file_handle["cell_baseline"] = cell_baseline1 + file_handle["background"] = background # clean up completion = 1 for color_i in range(p.n_colors): - fullname_cells = os.path.join(p.dir_output, 'cells%s_clean'%(color_i)) - if not os.path.isfile(fullname_cells+hdf): + fullname_cells = os.path.join(p.dir_output, "cells%s_clean" % (color_i)) + if not os.path.isfile(fullname_cells + hdf): completion = 0 if not p.save_volume: if completion: try: - shutil.rmtree(os.path.join(p.dir_output, 'cells')) - shutil.rmtree(os.path.join(p.dir_output, 'volumes')) + shutil.rmtree(os.path.join(p.dir_output, "cells")) + shutil.rmtree(os.path.join(p.dir_output, "volumes")) except: pass diff --git a/voluseg/_tools/__init__.py b/voluseg/_tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/voluseg/_tools/ants_registration.py b/voluseg/_tools/ants_registration.py index 6d0a886..2d567e2 100644 --- a/voluseg/_tools/ants_registration.py +++ b/voluseg/_tools/ants_registration.py @@ -1,38 +1,61 @@ -def ants_registration(dir_ants, in_nii, ref_nii, out_nii, prefix_out_tform, typ, in_tform=None, restrict=None): - '''ants registration''' +def ants_registration( + dir_ants, + in_nii, + ref_nii, + out_nii, + prefix_out_tform, + typ, + in_tform=None, + restrict=None, +): + """ants registration""" import os - lin_tform_params = ' '.join([ - '--metric MI[%s,%s,1,32,Regular,0.25]'%(ref_nii, in_nii), - '--convergence [1000x500x250x125]', - '--shrink-factors 12x8x4x2', - '--smoothing-sigmas 4x3x2x1vox', - ]) - syn_tform_params = ' '.join([ - '--metric CC[%s,%s,1,4]'%(ref_nii, in_nii), - '--convergence [100x100x70x50x20]', - '--shrink-factors 10x6x4x2x1', - '--smoothing-sigmas 5x3x2x1x0vox', - ]) + lin_tform_params = " ".join( + [ + "--metric MI[%s,%s,1,32,Regular,0.25]" % (ref_nii, in_nii), + "--convergence [1000x500x250x125]", + "--shrink-factors 12x8x4x2", + "--smoothing-sigmas 4x3x2x1vox", + ] + ) + syn_tform_params = " ".join( + [ + "--metric CC[%s,%s,1,4]" % (ref_nii, in_nii), + "--convergence [100x100x70x50x20]", + "--shrink-factors 10x6x4x2x1", + "--smoothing-sigmas 5x3x2x1x0vox", + ] + ) - initial_moving_transform = (in_tform if in_tform else '[%s,%s,1]'%(ref_nii, in_nii)) - antsRegistration_call = ' '.join([ - os.path.join(dir_ants, 'antsRegistration'), - ('--initial-moving-transform ' + initial_moving_transform if not restrict else ''), - '--output [%s,%s]'%(prefix_out_tform, out_nii), - '--dimensionality 3', - '--float 1', - '--interpolation Linear', - '--winsorize-image-intensities [0.005,0.995]', - '--use-histogram-matching 0', - ('--restrict-deformation ' + restrict if restrict else ''), - ('--transform Translation[0.1] ' + lin_tform_params if 't' in typ else ''), - ('--transform Rigid[0.1] ' + lin_tform_params if 'r' in typ else ''), - ('--transform Similarity[0.1] ' + lin_tform_params if 'i' in typ else ''), - ('--transform Affine[0.1] ' + lin_tform_params if 'a' in typ else ''), - ('--transform SyN[0.1,3,0] ' + syn_tform_params if 's' in typ else ''), - ('--transform BSplineSyN[0.1,26,0,3]' + syn_tform_params if 'b' in typ else '') - ]) + initial_moving_transform = in_tform if in_tform else "[%s,%s,1]" % (ref_nii, in_nii) + antsRegistration_call = " ".join( + [ + os.path.join(dir_ants, "antsRegistration"), + ( + "--initial-moving-transform " + initial_moving_transform + if not restrict + else "" + ), + "--output [%s,%s]" % (prefix_out_tform, out_nii), + "--dimensionality 3", + "--float 1", + "--interpolation Linear", + "--winsorize-image-intensities [0.005,0.995]", + "--use-histogram-matching 0", + ("--restrict-deformation " + restrict if restrict else ""), + ("--transform Translation[0.1] " + lin_tform_params if "t" in typ else ""), + ("--transform Rigid[0.1] " + lin_tform_params if "r" in typ else ""), + ("--transform Similarity[0.1] " + lin_tform_params if "i" in typ else ""), + ("--transform Affine[0.1] " + lin_tform_params if "a" in typ else ""), + ("--transform SyN[0.1,3,0] " + syn_tform_params if "s" in typ else ""), + ( + "--transform BSplineSyN[0.1,26,0,3]" + syn_tform_params + if "b" in typ + else "" + ), + ] + ) - return(antsRegistration_call) + return antsRegistration_call diff --git a/voluseg/_tools/ants_transformation.py b/voluseg/_tools/ants_transformation.py index ffde555..7772457 100644 --- a/voluseg/_tools/ants_transformation.py +++ b/voluseg/_tools/ants_transformation.py @@ -1,17 +1,26 @@ -def ants_transformation(dir_ants, in_nii, ref_nii, out_nii, in_tform, interpolation='Linear'): - '''application of ants transform''' - +def ants_transformation( + dir_ants, in_nii, ref_nii, out_nii, in_tform, interpolation="Linear" +): + """application of ants transform""" + import os - antsTransformation_call = ' '.join([ - os.path.join(dir_ants, 'antsApplyTransforms'), - '--dimensionality 3', - '--input', in_nii, - '--reference-image', ref_nii, - '--output', out_nii, - '--interpolation', interpolation, - '--transform', in_tform, - '--float' - ]) + antsTransformation_call = " ".join( + [ + os.path.join(dir_ants, "antsApplyTransforms"), + "--dimensionality 3", + "--input", + in_nii, + "--reference-image", + ref_nii, + "--output", + out_nii, + "--interpolation", + interpolation, + "--transform", + in_tform, + "--float", + ] + ) - return(antsTransformation_call) + return antsTransformation_call diff --git a/voluseg/_tools/ball.py b/voluseg/_tools/ball.py index af7df62..6d77734 100644 --- a/voluseg/_tools/ball.py +++ b/voluseg/_tools/ball.py @@ -1,13 +1,17 @@ def ball(radi, affine_mat): - '''morphological cell balls and midpoints''' + """morphological cell balls and midpoints""" import numpy as np rx, ry, rz, _ = np.diag(affine_mat) - ball = np.ones(( + ball = np.ones( + ( np.maximum(1, np.round(radi / rx).astype(int) * 2 + 1), np.maximum(1, np.round(radi / ry).astype(int) * 2 + 1), - np.maximum(1, np.round(radi / rz).astype(int) * 2 + 1)), dtype=int) + np.maximum(1, np.round(radi / rz).astype(int) * 2 + 1), + ), + dtype=int, + ) ball_xyzm = (np.array(ball.shape) - 1) / 2 for xi in range(ball.shape[0]): diff --git a/voluseg/_tools/clean_signal.py b/voluseg/_tools/clean_signal.py index d62b095..67ff758 100644 --- a/voluseg/_tools/clean_signal.py +++ b/voluseg/_tools/clean_signal.py @@ -1,13 +1,14 @@ def clean_signal(parameters, timeseries): - '''detrend, filter, and estimate dynamic baseline for input timeseries''' + """detrend, filter, and estimate dynamic baseline for input timeseries""" import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np import pandas as pd @@ -21,24 +22,26 @@ def clean_signal(parameters, timeseries): # p.t_baseline: timescale constant for baseline estimation (in seconds) # p.f_hipass: highpass cutoff frequency # p.f_volume: frequency of imaging a single stack (in Hz) - - if p.detrending == 'standard': + + if p.detrending == "standard": robustify = lambda timeseries: timeseries - elif p.detrending == 'robust': + elif p.detrending == "robust": robustify = lambda timeseries: winsorize(timeseries, [0.1, 0.1]) - + # compute dynamic baseline def compute_baseline(timeseries): timeseries_df = pd.DataFrame(timeseries) - baseline_df = timeseries_df.rolling(ltau, min_periods=1, center=True).quantile(0.1) + baseline_df = timeseries_df.rolling(ltau, min_periods=1, center=True).quantile( + 0.1 + ) baseline_df = baseline_df.rolling(ltau, min_periods=1, center=True).mean() baseline = np.ravel(baseline_df) baseline += np.percentile(timeseries - baseline, 1) - assert(np.allclose(np.percentile(timeseries - baseline, 1), 0)) + assert np.allclose(np.percentile(timeseries - baseline, 1), 0) return baseline # convert to double precision - timeseries = timeseries.astype('float64') + timeseries = timeseries.astype("float64") # timeseries mean timeseries_mean = timeseries.mean() @@ -51,7 +54,7 @@ def compute_baseline(timeseries): coefpoly = np.polyfit(xtime, robustify(timeseries), 2) timeseries -= np.polyval(coefpoly, xtime) timeseries = np.concatenate((timeseries[::-1], timeseries, timeseries[::-1])) - + # highpass filter nyquist = p.f_volume / 2 if (p.f_hipass > 1e-10) and (p.f_hipass < nyquist - 1e-10): @@ -62,9 +65,9 @@ def compute_baseline(timeseries): # restore mean timeseries = timeseries - timeseries.mean() + timeseries_mean baseline = compute_baseline(robustify(timeseries)) - + # slice and convert to single precision - timeseries = timeseries[p.lt:2*p.lt].astype(dtype) - baseline = baseline[p.lt:2*p.lt].astype(dtype) + timeseries = timeseries[p.lt : 2 * p.lt].astype(dtype) + baseline = baseline[p.lt : 2 * p.lt].astype(dtype) - return(timeseries, baseline) + return (timeseries, baseline) diff --git a/voluseg/_tools/constants.py b/voluseg/_tools/constants.py index 76ffff1..85cb301 100755 --- a/voluseg/_tools/constants.py +++ b/voluseg/_tools/constants.py @@ -1,5 +1,5 @@ -ori = '_original' -ali = '_aligned' -nii = '.nii.gz' -hdf = '.hdf5' -dtype = 'float32' +ori = "_original" +ali = "_aligned" +nii = ".nii.gz" +hdf = ".hdf5" +dtype = "float32" diff --git a/voluseg/_tools/evenly_parallelize.py b/voluseg/_tools/evenly_parallelize.py index 8b68b89..a79a94e 100644 --- a/voluseg/_tools/evenly_parallelize.py +++ b/voluseg/_tools/evenly_parallelize.py @@ -1,8 +1,9 @@ def evenly_parallelize(input_list): - '''return evenly partitioned spark resilient distributed dataset (RDD)''' + """return evenly partitioned spark resilient distributed dataset (RDD)""" import numpy as np from pyspark.sql.session import SparkSession + spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext diff --git a/voluseg/_tools/get_volume_name.py b/voluseg/_tools/get_volume_name.py index b590146..17ca027 100644 --- a/voluseg/_tools/get_volume_name.py +++ b/voluseg/_tools/get_volume_name.py @@ -1,5 +1,5 @@ def get_volume_name(fullname_volume, dir_prefix=None, plane_i=None): - '''get name of volume (with directory prefix and plane suffix)''' + """get name of volume (with directory prefix and plane suffix)""" import os @@ -7,10 +7,10 @@ def get_volume_name(fullname_volume, dir_prefix=None, plane_i=None): # add prefix (multiple input directories) if dir_prefix is not None: - name_volume = dir_prefix+'_'+name_volume + name_volume = dir_prefix + "_" + name_volume # add suffix (packed planes) if plane_i is not None: - name_volume += '_PLN'+str(plane_i).zfill(3) + name_volume += "_PLN" + str(plane_i).zfill(3) return name_volume diff --git a/voluseg/_tools/load_metadata.py b/voluseg/_tools/load_metadata.py index f12a26e..0461ab8 100644 --- a/voluseg/_tools/load_metadata.py +++ b/voluseg/_tools/load_metadata.py @@ -1,41 +1,41 @@ def load_metadata(parameters, filename_channel, filename_stack): - '''fetch z-resolution, exposure time, and stack frequency''' + """fetch z-resolution, exposure time, and stack frequency""" import numpy as np from xml.etree import ElementTree -# try: - with open(filename_channel, 'r') as file_handle: + # try: + with open(filename_channel, "r") as file_handle: xml_str = file_handle.read() -# except: -# print('error: cannot load %s.'%(filename_channel)) + # except: + # print('error: cannot load %s.'%(filename_channel)) try: xml_tree = ElementTree.fromstring(xml_str) except ElementTree.ParseError: - xml_tree = ElementTree.fromstring(xml_str.replace('&', ' ')) + xml_tree = ElementTree.fromstring(xml_str.replace("&", " ")) - for info in xml_tree.findall('info'): - if list(info.attrib.keys())[0] == 'exposure_time': - t_section = float(info.attrib['exposure_time']) / 1000 - parameters['t_section'] = t_section - print('fetched t_section.') - if list(info.attrib.keys())[0] == 'z_step': - res_z = float(info.attrib['z_step']) - parameters['res_z'] = res_z - print('fetched res_z.') + for info in xml_tree.findall("info"): + if list(info.attrib.keys())[0] == "exposure_time": + t_section = float(info.attrib["exposure_time"]) / 1000 + parameters["t_section"] = t_section + print("fetched t_section.") + if list(info.attrib.keys())[0] == "z_step": + res_z = float(info.attrib["z_step"]) + parameters["res_z"] = res_z + print("fetched res_z.") -# try: - if 'Stack_frequency' in filename_stack: - f_volume = np.fromfile(filename_stack, sep='\n')[0] # Hz + # try: + if "Stack_frequency" in filename_stack: + f_volume = np.fromfile(filename_stack, sep="\n")[0] # Hz else: - with open(filename_stack, 'r') as file_handle: - times_stack = np.array(file_handle.read().split('\t'))[1:-1] + with open(filename_stack, "r") as file_handle: + times_stack = np.array(file_handle.read().split("\t"))[1:-1] f_volume = 1.0 / np.mean(np.diff(times_stack.astype(float))) - parameters['f_volume'] = f_volume - print('fetched f_volume.') -# except: -# print('error: cannot load %s.'%(filename_stack)) + parameters["f_volume"] = f_volume + print("fetched f_volume.") + # except: + # print('error: cannot load %s.'%(filename_stack)) return parameters diff --git a/voluseg/_tools/load_parameters.py b/voluseg/_tools/load_parameters.py index 13e0de2..a863be8 100644 --- a/voluseg/_tools/load_parameters.py +++ b/voluseg/_tools/load_parameters.py @@ -1,14 +1,14 @@ def load_parameters(filename): - ''' load previously saved parameters from filename ''' + """load previously saved parameters from filename""" import pickle try: - with open(filename, 'rb') as file_handle: + with open(filename, "rb") as file_handle: parameters = pickle.load(file_handle) - print('parameter file successfully loaded.') + print("parameter file successfully loaded.") return parameters except Exception as msg: - print('parameter file not loaded: %s.'%(msg)) + print("parameter file not loaded: %s." % (msg)) diff --git a/voluseg/_tools/load_volume.py b/voluseg/_tools/load_volume.py index b1c614c..f98c214 100644 --- a/voluseg/_tools/load_volume.py +++ b/voluseg/_tools/load_volume.py @@ -1,9 +1,10 @@ def load_volume(fullname_ext): - '''load volume based on input name and extension''' + """load volume based on input name and extension""" import h5py import nibabel import numpy as np + try: from skimage.external import tifffile except: @@ -16,9 +17,9 @@ def load_volume(fullname_ext): from voluseg._tools.constants import dtype try: - ext = '.'+fullname_ext.split('.', 1)[1] + ext = "." + fullname_ext.split(".", 1)[1] - if ('.tif' in ext) or ('.tiff' in ext): + if (".tif" in ext) or (".tiff" in ext): try: volume = tifffile.imread(fullname_ext) except: @@ -28,16 +29,16 @@ def load_volume(fullname_ext): img.seek(i) volume.append(np.array(img).T) volume = np.array(volume) - elif ('.h5' in ext) or ('.hdf5' in ext): - with h5py.File(fullname_ext, 'r') as file_handle: + elif (".h5" in ext) or (".hdf5" in ext): + with h5py.File(fullname_ext, "r") as file_handle: volume = file_handle[list(file_handle.keys())[0]][()] - elif ('.klb' in ext): + elif ".klb" in ext: volume = pyklb.readfull(fullname_ext) volume = volume.transpose(0, 2, 1) - elif ('.nii' in ext) or ('.nii.gz' in ext): + elif (".nii" in ext) or (".nii.gz" in ext): volume = nibabel.load(fullname_ext).get_fdata() else: - raise Exception('unknown extension.') + raise Exception("unknown extension.") return volume.astype(dtype) diff --git a/voluseg/_tools/parameter_dictionary.py b/voluseg/_tools/parameter_dictionary.py index d17aa9f..8a97261 100755 --- a/voluseg/_tools/parameter_dictionary.py +++ b/voluseg/_tools/parameter_dictionary.py @@ -1,32 +1,32 @@ def parameter_dictionary(): - '''parameter dictionary with specified defaults''' + """parameter dictionary with specified defaults""" - return { - 'detrending': 'standard', # type of detrending: 'standard', 'robust', or 'none' - 'registration': 'medium', # quality of registration: 'high', 'medium', 'low' or 'none' - 'registration_restrict': '', # restrict registration (e.g. 1x1x1x1x1x1x0x0x0x1x1x0) - 'diam_cell': 6.0, # cell diameter (microns) - 'dir_ants': '', # path to ANTs directory - 'dir_input': '', # path to input directory/ies (separate multiple directories by ';') - 'dir_output': '', # path to output directory - 'dir_transform': '', # path to transform directory - 'ds': 2, # spatial coarse-graining in x-y dimension - 'planes_pad': 0, # number of planes to pad the volume with (for robust registration) - 'planes_packed': False, # packed planes in each volume (for single plane imaging with packed planes) - 'parallel_clean': True, # parallelization of final cleaning (True is fast but memory intensive) - 'parallel_volume': True, # parallelization of mean-volume computation (True is fast but memory intensive) - 'save_volume': False, # save registered volumes after segmentation (True keeps a copy of the volumes) - 'type_timepoints': 'dff', # type of timepoints to use for cell detection: 'dff', 'periodic' or 'custom' - 'type_mask': 'geomean', # type of volume averaging for mask: 'mean', 'geomean' or 'max' - 'timepoints': 1000, # number ('dff'/'periodic') or vector ('custom') of timepoints for segmentation - 'f_hipass': 0, # frequency (Hz) for high-pass filtering of cell timeseries - 'f_volume': 2.0, # imaging frequency (Hz) - 'n_cells_block': 316, # number of cells in block (small n_cells is fast but can lead to blocky output) - 'n_colors': 1, # number of brain colors (2 in two-color volumes) - 'res_x': 0.40625, # x resolution (microns) - 'res_y': 0.40625, # y resolution (microns) - 'res_z': 5.0, # z resolution (microns) - 't_baseline': 300, # interval for baseline calculation (seconds) - 't_section': 0.01, # exposure time (seconds): time of slice acquisition - 'thr_mask': 0.5, # threshold for volume mask: 0 < thr <= 1 (probability) or thr > 1 (intensity) - } + return { + "detrending": "standard", # type of detrending: 'standard', 'robust', or 'none' + "registration": "medium", # quality of registration: 'high', 'medium', 'low' or 'none' + "registration_restrict": "", # restrict registration (e.g. 1x1x1x1x1x1x0x0x0x1x1x0) + "diam_cell": 6.0, # cell diameter (microns) + "dir_ants": "", # path to ANTs directory + "dir_input": "", # path to input directory/ies (separate multiple directories by ';') + "dir_output": "", # path to output directory + "dir_transform": "", # path to transform directory + "ds": 2, # spatial coarse-graining in x-y dimension + "planes_pad": 0, # number of planes to pad the volume with (for robust registration) + "planes_packed": False, # packed planes in each volume (for single plane imaging with packed planes) + "parallel_clean": True, # parallelization of final cleaning (True is fast but memory intensive) + "parallel_volume": True, # parallelization of mean-volume computation (True is fast but memory intensive) + "save_volume": False, # save registered volumes after segmentation (True keeps a copy of the volumes) + "type_timepoints": "dff", # type of timepoints to use for cell detection: 'dff', 'periodic' or 'custom' + "type_mask": "geomean", # type of volume averaging for mask: 'mean', 'geomean' or 'max' + "timepoints": 1000, # number ('dff'/'periodic') or vector ('custom') of timepoints for segmentation + "f_hipass": 0, # frequency (Hz) for high-pass filtering of cell timeseries + "f_volume": 2.0, # imaging frequency (Hz) + "n_cells_block": 316, # number of cells in block (small n_cells is fast but can lead to blocky output) + "n_colors": 1, # number of brain colors (2 in two-color volumes) + "res_x": 0.40625, # x resolution (microns) + "res_y": 0.40625, # y resolution (microns) + "res_z": 5.0, # z resolution (microns) + "t_baseline": 300, # interval for baseline calculation (seconds) + "t_section": 0.01, # exposure time (seconds): time of slice acquisition + "thr_mask": 0.5, # threshold for volume mask: 0 < thr <= 1 (probability) or thr > 1 (intensity) + } diff --git a/voluseg/_tools/save_volume.py b/voluseg/_tools/save_volume.py index 0ef7385..bb69c5b 100755 --- a/voluseg/_tools/save_volume.py +++ b/voluseg/_tools/save_volume.py @@ -1,5 +1,5 @@ def save_volume(fullname_ext, volume, affine_mat=None): - '''save volume in hdf5 or nifti format''' + """save volume in hdf5 or nifti format""" import h5py import nibabel @@ -7,18 +7,18 @@ def save_volume(fullname_ext, volume, affine_mat=None): try: volume = volume.astype(dtype) - ext = '.'+fullname_ext.split('.', 1)[1] + ext = "." + fullname_ext.split(".", 1)[1] - if ('.h5' in ext) or ('.hdf5' in ext): - with h5py.File(fullname_ext, 'w') as file_handle: - file_handle.create_dataset('volume', data=volume, compression='gzip') - elif ('.nii' in ext) or ('.nii.gz' in ext): + if (".h5" in ext) or (".hdf5" in ext): + with h5py.File(fullname_ext, "w") as file_handle: + file_handle.create_dataset("volume", data=volume, compression="gzip") + elif (".nii" in ext) or (".nii.gz" in ext): nii = nibabel.Nifti1Image(volume, affine_mat) - nii.header['qform_code'] = 2 # make codes consistent with ANTS - nii.header['sform_code'] = 1 # 1 scanner, 2 aligned, 3 tlrc, 4 mni. + nii.header["qform_code"] = 2 # make codes consistent with ANTS + nii.header["sform_code"] = 1 # 1 scanner, 2 aligned, 3 tlrc, 4 mni. nibabel.save(nii, fullname_ext) else: - raise Exception('unknown extension.') + raise Exception("unknown extension.") return True diff --git a/voluseg/_tools/sparseness.py b/voluseg/_tools/sparseness.py index a321108..9c7596b 100644 --- a/voluseg/_tools/sparseness.py +++ b/voluseg/_tools/sparseness.py @@ -1,8 +1,9 @@ def sparseness(W, dim=0): - '''vector sparseness along specified dimension''' + """vector sparseness along specified dimension""" import numpy as np + n = W.shape[dim] - l2 = np.sqrt(np.sum(np.square(W), dim)) # l2-norm - l1 = np.sum(np.abs( W), dim) # l1-norm + l2 = np.sqrt(np.sum(np.square(W), dim)) # l2-norm + l1 = np.sum(np.abs(W), dim) # l1-norm return (np.sqrt(n) - l1 / l2) / (np.sqrt(n) - 1) # sparseness diff --git a/voluseg/_tools/sparseness_projection.py b/voluseg/_tools/sparseness_projection.py index 6f240f8..ce90f39 100644 --- a/voluseg/_tools/sparseness_projection.py +++ b/voluseg/_tools/sparseness_projection.py @@ -1,18 +1,18 @@ def sparseness_projection(Si, s, at_least_as_sparse=False): - '''Hoyer sparseness projection''' + """Hoyer sparseness projection""" import numpy as np - assert(Si.ndim == 1) - S = np.copy(Si) # copy input signal + assert Si.ndim == 1 + S = np.copy(Si) # copy input signal if s <= 0: - return np.maximum(S, 0) # enforce nonnegativity + return np.maximum(S, 0) # enforce nonnegativity d = S.size - L2 = np.sqrt(np.sum(np.square(S))) # fixed l2-norm - L1 = L2 * (np.sqrt(d) * (1 - s) + s) # desired l1-norm + L2 = np.sqrt(np.sum(np.square(S))) # fixed l2-norm + L1 = L2 * (np.sqrt(d) * (1 - s) + s) # desired l1-norm # quit if at_least_sparse=True and original exceeds target sparseness if at_least_as_sparse: @@ -42,9 +42,9 @@ def sparseness_projection(Si, s, at_least_as_sparse=False): # For convenience, we square both sides and find the roots, # 0 = (l2[P*Alph + M])^2 - (L2)^2 # 0 = sum((P*Alph)^2) + sum(2*P*M*Alph) + sum(M^2) - L2^2 - A = np.sum(P * P) + A = np.sum(P * P) B = 2 * np.sum(P * M) - C = np.sum(M * M) - L2**2 + C = np.sum(M * M) - L2**2 Alph = (-B + np.real(np.sqrt(B**2 - 4 * A * C))) / (2 * A) diff --git a/voluseg/_update.py b/voluseg/_update.py index 72df05b..6ada62f 100644 --- a/voluseg/_update.py +++ b/voluseg/_update.py @@ -1,5 +1,8 @@ def voluseg_update(): - '''shortcut to update package''' + """shortcut to update package""" import os - os.system('pip install --upgrade --force-reinstall git+https://github.com/mikarubi/voluseg.git') + + os.system( + "pip install --upgrade --force-reinstall git+https://github.com/mikarubi/voluseg.git" + ) From 3bfefb13f70cabdaca8657bf49963adc5f10cacb Mon Sep 17 00:00:00 2001 From: luiz Date: Mon, 26 Aug 2024 10:34:14 +0200 Subject: [PATCH 3/3] docker basics --- .gitignore | 13 +++++++++++++ Dockerfile | 36 ++++++++++++++++++++++++++++++++++++ README-docker.md | 21 +++++++++++++++++++++ app/__init__.py | 0 app/app.py | 34 ++++++++++++++++++++++++++++++++++ requirements-docker.txt | 9 +++++++++ 6 files changed, 113 insertions(+) create mode 100644 Dockerfile create mode 100644 README-docker.md create mode 100644 app/__init__.py create mode 100644 app/app.py create mode 100644 requirements-docker.txt diff --git a/.gitignore b/.gitignore index 3fbb66b..2eb9b61 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,16 @@ __pycache__/ *.pyc *.egg-info +# Python coverage files +.coverage +.coverage.* +.cache +.cache/ +coverage.xml +coverage_html/ +htmlcov/ + +# Local data +sample_data/* +output/* +logs/* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6126d58 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +FROM apache/spark-py:v3.4.0 + +USER root + +WORKDIR /voluseg + +RUN apt-get update && apt-get install -y wget unzip && \ + apt-get update && apt-get install -y git && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN python3 -m pip install --upgrade pip + +# Download and install ANTs +RUN wget https://github.com/ANTsX/ANTs/releases/download/v2.5.3/ants-2.5.3-ubuntu-20.04-X64-gcc.zip +RUN unzip ants-2.5.3-ubuntu-20.04-X64-gcc.zip -d / +RUN rm ants-2.5.3-ubuntu-20.04-X64-gcc.zip + +# Install requirements +COPY requirements-docker.txt /voluseg/requirements-docker.txt +RUN pip install --no-cache-dir -r requirements-docker.txt + +# Install voluseg +COPY README.md /voluseg/README.md +COPY setup.py /voluseg/setup.py +COPY voluseg /voluseg/voluseg +COPY app /voluseg/app +RUN pip install --no-cache-dir -e . + +# Create directories +RUN mkdir /voluseg/data +RUN mkdir /voluseg/output +RUN mkdir /voluseg/logs + +# EXPOSE 80 + +CMD ["python3", "/voluseg/app/app.py"] \ No newline at end of file diff --git a/README-docker.md b/README-docker.md new file mode 100644 index 0000000..c50c4b5 --- /dev/null +++ b/README-docker.md @@ -0,0 +1,21 @@ +# Voluseg container + +Build the image: +```bash +DOCKER_BUILDKIT=1 docker build -t voluseg . +``` + +Run with local data volume mount: +```bash +docker run -v $(pwd)/data:/voluseg/data voluseg +``` + +Run with local data volume mount and hot reload for the voluseg package: +```bash +docker run \ +-v $(pwd)/sample_data:/voluseg/data \ +-v $(pwd)/output:/voluseg/output \ +-v $(pwd)/voluseg:/voluseg/voluseg \ +-v $(pwd)/app:/voluseg/app \ +voluseg +``` \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/app.py b/app/app.py new file mode 100644 index 0000000..a3ff5a7 --- /dev/null +++ b/app/app.py @@ -0,0 +1,34 @@ +import os +import pprint +import voluseg + + +# set and save parameters +parameters0 = voluseg.parameter_dictionary() +parameters0['dir_ants'] = '/ants-2.5.3/bin/' +parameters0['dir_input'] = '/voluseg/data/' +parameters0['dir_output'] = '/voluseg/output/' +parameters0['registration'] = 'high' +parameters0['diam_cell'] = 5.0 +parameters0['f_volume'] = 2.0 +voluseg.step0_process_parameters(parameters0) + +# load and print parameters +filename_parameters = os.path.join(parameters0['dir_output'], 'parameters.pickle') +parameters = voluseg.load_parameters(filename_parameters) +pprint.pprint(parameters) + +print("process volumes.") +voluseg.step1_process_volumes(parameters) + +print("align volumes.") +voluseg.step2_align_volumes(parameters) + +print("mask volumes.") +voluseg.step3_mask_volumes(parameters) + +print("detect cells.") +voluseg.step4_detect_cells(parameters) + +print("clean cells.") +voluseg.step5_clean_cells(parameters) \ No newline at end of file diff --git a/requirements-docker.txt b/requirements-docker.txt new file mode 100644 index 0000000..379076f --- /dev/null +++ b/requirements-docker.txt @@ -0,0 +1,9 @@ +nibabel==5.2.1 +h5py==3.11.0 +scipy==1.14.0 +scikit-learn==1.5.1 +scikit-image==0.24.0 +matplotlib==3.9.2 +numpy==2.1.0 +pandas==2.2.2 +pyspark==3.4.0 \ No newline at end of file