diff --git a/.gitignore b/.gitignore index b37bc0e..2eb9b61 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,6 @@ __pycache__/ *.pyc *.egg-info - # Python coverage files .coverage .coverage.* diff --git a/README-docker.md b/README-docker.md new file mode 100644 index 0000000..c50c4b5 --- /dev/null +++ b/README-docker.md @@ -0,0 +1,21 @@ +# Voluseg container + +Build the image: +```bash +DOCKER_BUILDKIT=1 docker build -t voluseg . +``` + +Run with local data volume mount: +```bash +docker run -v $(pwd)/data:/voluseg/data voluseg +``` + +Run with local data volume mount and hot reload for the voluseg package: +```bash +docker run \ +-v $(pwd)/sample_data:/voluseg/data \ +-v $(pwd)/output:/voluseg/output \ +-v $(pwd)/voluseg:/voluseg/voluseg \ +-v $(pwd)/app:/voluseg/app \ +voluseg +``` \ No newline at end of file diff --git a/README.md b/README.md index 0bbb9d0..10cc4a1 100755 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Contact: Mika Rubinov, `mika.rubinov at vanderbilt.edu` # example code -``` +```python # set up import os import pprint diff --git a/app/app.py b/app/app.py index 427c592..fa9fb54 100644 --- a/app/app.py +++ b/app/app.py @@ -1,23 +1,20 @@ -# set up import os import pprint import voluseg -# check for updates -# voluseg.update() # set and save parameters parameters0 = voluseg.parameter_dictionary() -parameters0['dir_ants'] = '/ants-2.5.3/bin/' -parameters0['dir_input'] = '/voluseg/data/' -parameters0['dir_output'] = '/voluseg/output/' -parameters0['registration'] = 'high' -parameters0['diam_cell'] = 5.0 -parameters0['f_volume'] = 2.0 +parameters0["dir_ants"] = "/ants-2.5.3/bin/" +parameters0["dir_input"] = "/voluseg/data/" +parameters0["dir_output"] = "/voluseg/output/" +parameters0["registration"] = "high" +parameters0["diam_cell"] = 5.0 +parameters0["f_volume"] = 2.0 voluseg.step0_process_parameters(parameters0) # load and print parameters -filename_parameters = os.path.join(parameters0['dir_output'], 'parameters.pickle') +filename_parameters = os.path.join(parameters0["dir_output"], "parameters.pickle") parameters = voluseg.load_parameters(filename_parameters) pprint.pprint(parameters) @@ -34,4 +31,4 @@ voluseg.step4_detect_cells(parameters) print("clean cells.") -voluseg.step5_clean_cells(parameters) \ No newline at end of file +voluseg.step5_clean_cells(parameters) diff --git a/requirements-docker.txt b/requirements-docker.txt index 3381cae..e3853c5 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -7,4 +7,4 @@ matplotlib==3.9.2 numpy==2.1.0 pandas==2.2.2 pyspark==3.4.0 -pynwb==2.8.1 \ No newline at end of file +pynwb==2.8.1 diff --git a/setup.py b/setup.py index 42626d8..02bc1b6 100644 --- a/setup.py +++ b/setup.py @@ -1,32 +1,28 @@ -'''volumetric segmentation pipeline''' +"""volumetric segmentation pipeline""" import setuptools -with open('README.md', 'r') as fh: +with open("README.md", "r") as fh: README = fh.read() # with open('requirements.txt', 'r') as fh: # install_requires = fh.read().split('\n') setuptools.setup( - name='voluseg', - version='2023.12', - author='Mika Rubinov', - author_email='mika.rubinov@vanderbilt.edu', - description='pipeline for volumetric segmentation of calcium imaging data', + name="voluseg", + version="2023.12", + author="Mika Rubinov", + author_email="mika.rubinov@vanderbilt.edu", + description="pipeline for volumetric segmentation of calcium imaging data", long_description=README, - long_description_content_type='text/markdown', - url='https://github.com/mikarubi/voluseg', - packages=[ - 'voluseg', - 'voluseg._steps', - 'voluseg._tools' - ], + long_description_content_type="text/markdown", + url="https://github.com/mikarubi/voluseg", + packages=["voluseg", "voluseg._steps", "voluseg._tools"], classifiers=[ - 'Programming Language :: Python :: 3', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", ], - python_requires='>=3.5', + python_requires=">=3.5", # install_requires=install_requires ) diff --git a/tests/test_voluseg.py b/tests/test_voluseg.py index 85c77d6..c2204f5 100644 --- a/tests/test_voluseg.py +++ b/tests/test_voluseg.py @@ -9,14 +9,20 @@ def setup_parameters(tmp_path): # Define parameters and paths parameters0 = voluseg.parameter_dictionary() - parameters0['dir_ants'] = "/home/luiz/Downloads/ants-2.5.2/bin" # Change this to your actual ANTs bin path + parameters0["dir_ants"] = ( + "/home/luiz/Downloads/ants-2.5.2/bin" # Change this to your actual ANTs bin path + ) # parameters0['dir_input'] = str((Path(".").resolve().parent / "sample_data")) - parameters0['dir_input'] = "/mnt/shared_storage/taufferconsulting/client_catalystneuro/project_voluseg/sample_data" + parameters0["dir_input"] = ( + "/mnt/shared_storage/taufferconsulting/client_catalystneuro/project_voluseg/sample_data" + ) # parameters0['dir_output'] = str(tmp_path) # Use pytest's tmp_path fixture for output - parameters0['dir_output'] = "/mnt/shared_storage/taufferconsulting/client_catalystneuro/project_voluseg/output" - parameters0['registration'] = 'high' - parameters0['diam_cell'] = 5.0 - parameters0['f_volume'] = 2.0 + parameters0["dir_output"] = ( + "/mnt/shared_storage/taufferconsulting/client_catalystneuro/project_voluseg/output" + ) + parameters0["registration"] = "high" + parameters0["diam_cell"] = 5.0 + parameters0["f_volume"] = 2.0 # Save parameters voluseg.step0_process_parameters(parameters0) @@ -26,7 +32,9 @@ def setup_parameters(tmp_path): def test_voluseg_pipeline_h5_dir(setup_parameters): - filename_parameters = os.path.join(setup_parameters['dir_output'], 'parameters.pickle') + filename_parameters = os.path.join( + setup_parameters["dir_output"], "parameters.pickle" + ) parameters = voluseg.load_parameters(filename_parameters) pprint.pprint(parameters) @@ -57,15 +65,17 @@ def test_voluseg_pipeline_h5_dir(setup_parameters): def test_voluseg_pipeline_nwbfile(setup_parameters): from voluseg._tools.nwb import h5_dir_to_nwb_file - filename_parameters = os.path.join(setup_parameters['dir_output'], 'parameters.pickle') + filename_parameters = os.path.join( + setup_parameters["dir_output"], "parameters.pickle" + ) parameters = voluseg.load_parameters(filename_parameters) # Convert a directory of HDF5 files to a single NWB file - nwb_file_path = str(os.path.join(parameters['dir_output'], "output.nwb")) + nwb_file_path = str(os.path.join(parameters["dir_output"], "output.nwb")) h5_dir_to_nwb_file( - h5_dir=parameters['dir_input'], + h5_dir=parameters["dir_input"], acquisition_name="TwoPhotonSeries", - output_file_path=nwb_file_path + output_file_path=nwb_file_path, ) parameters["nwb_input_local_path"] = nwb_file_path parameters["nwb_input_acquisition_name"] = "TwoPhotonSeries" @@ -87,4 +97,4 @@ def test_voluseg_pipeline_nwbfile(setup_parameters): # voluseg.step4_detect_cells(parameters) # print("Clean cells.") - # voluseg.step5_clean_cells(parameters) \ No newline at end of file + # voluseg.step5_clean_cells(parameters) diff --git a/voluseg/__init__.py b/voluseg/__init__.py index b58a6d3..c9ca139 100644 --- a/voluseg/__init__.py +++ b/voluseg/__init__.py @@ -1,4 +1,4 @@ -'''initialization''' +"""initialization""" from voluseg._steps.step0 import process_parameters as step0_process_parameters from voluseg._steps.step1 import process_volumes as step1_process_volumes diff --git a/voluseg/_steps/step0.py b/voluseg/_steps/step0.py index f81ea30..361067d 100644 --- a/voluseg/_steps/step0.py +++ b/voluseg/_steps/step0.py @@ -148,8 +148,12 @@ def process_parameters(parameters0: dict) -> None: if not acquisition_name: raise Exception("nwb_input_acquisition_name is required to read nwb file") volume_fullnames_input = [parameters.get("nwb_input_local_path")] - volume_names = [parameters.get("nwb_input_local_path").split("/")[-1].split(".nwb")[0]] - with open_nwbfile_local(file_path=parameters["nwb_input_local_path"]) as nwbfile: + volume_names = [ + parameters.get("nwb_input_local_path").split("/")[-1].split(".nwb")[0] + ] + with open_nwbfile_local( + file_path=parameters["nwb_input_local_path"] + ) as nwbfile: lt = nwbfile.acquisition[acquisition_name].data.shape[0] if parameters["timepoints"]: lt = min(lt, parameters["timepoints"]) diff --git a/voluseg/_steps/step1.py b/voluseg/_steps/step1.py index b713236..93e08be 100755 --- a/voluseg/_steps/step1.py +++ b/voluseg/_steps/step1.py @@ -127,7 +127,8 @@ def make_output_volume(name_volume, volume): volume_index=tuple_fullname_volume_input[1], ) make_output_volume( - name_volume=p.volume_names[0] + f"_{tuple_fullname_volume_input[1]}", + name_volume=p.volume_names[0] + + f"_{tuple_fullname_volume_input[1]}", volume=volume, ) else: @@ -136,7 +137,9 @@ def make_output_volume(name_volume, volume): if len(p.input_dirs) == 1: dir_prefix = None else: - dir_prefix = os.path.basename(os.path.split(fullname_volume_input)[0]) + dir_prefix = os.path.basename( + os.path.split(fullname_volume_input)[0] + ) # process output volumes if p.planes_packed: diff --git a/voluseg/_steps/step2.py b/voluseg/_steps/step2.py index 6de9c8c..018be55 100755 --- a/voluseg/_steps/step2.py +++ b/voluseg/_steps/step2.py @@ -1,8 +1,8 @@ def align_volumes(parameters): - '''register volumes to a single middle volume''' + """register volumes to a single middle volume""" # do not run if registration is set to none - if not parameters['registration']: + if not parameters["registration"]: return import os @@ -22,24 +22,26 @@ def align_volumes(parameters): volume_nameRDD = evenly_parallelize(p.volume_names) for color_i in range(p.n_colors): - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - if os.path.isfile(fullname_volmean+hdf): + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + if os.path.isfile(fullname_volmean + hdf): continue - dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i)) - fullname_reference = os.path.join(dir_volume, 'reference') - if load_volume(fullname_reference+nii) is None: - fullname_median = os.path.join(dir_volume, p.volume_names[p.lt//2]) - shutil.copyfile(fullname_median+ori+nii, fullname_reference+nii) + dir_volume = os.path.join(p.dir_output, "volumes", str(color_i)) + fullname_reference = os.path.join(dir_volume, "reference") + if load_volume(fullname_reference + nii) is None: + fullname_median = os.path.join(dir_volume, p.volume_names[p.lt // 2]) + shutil.copyfile(fullname_median + ori + nii, fullname_reference + nii) if p.dir_transform: dir_transform = os.path.join(p.dir_transform, str(color_i)) else: - dir_transform = os.path.join(p.dir_output, 'transforms', str(color_i)) + dir_transform = os.path.join(p.dir_output, "transforms", str(color_i)) os.makedirs(dir_transform, exist_ok=True) def get_fullname_tform(name_volume): - return os.path.join(dir_transform, name_volume+'_tform_0GenericAffine.mat') + return os.path.join( + dir_transform, name_volume + "_tform_0GenericAffine.mat" + ) def load_transform(name_volume): try: @@ -52,77 +54,88 @@ def load_transform(name_volume): def register_volume(tuple_name_volume): import os + # disable ITK multithreading - os.environ['ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS'] = '1' + os.environ["ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS"] = "1" name_volume = tuple_name_volume[1] fullname_volume = os.path.join(dir_volume, name_volume) # skip processing if aligned volume exists - if load_volume(fullname_volume+ali+hdf) is not None: + if load_volume(fullname_volume + ali + hdf) is not None: tform_vector = load_transform(name_volume) return tform_vector # setup registration\ - if p.registration == 'transform': + if p.registration == "transform": cmd = ants_transformation( - dir_ants = p.dir_ants, - in_nii = fullname_volume+ori+nii, - ref_nii = fullname_reference+nii, - out_nii = fullname_volume+ali+nii, - in_tform = get_fullname_tform(name_volume), - interpolation='Linear' - ) + dir_ants=p.dir_ants, + in_nii=fullname_volume + ori + nii, + ref_nii=fullname_reference + nii, + out_nii=fullname_volume + ali + nii, + in_tform=get_fullname_tform(name_volume), + interpolation="Linear", + ) else: cmd = ants_registration( - dir_ants = p.dir_ants, - in_nii = fullname_volume+ori+nii, - ref_nii = fullname_reference+nii, - out_nii = fullname_volume+ali+nii, - prefix_out_tform = os.path.join(dir_transform, name_volume+'_tform_'), - restrict = p.registration_restrict, - typ = 'r', + dir_ants=p.dir_ants, + in_nii=fullname_volume + ori + nii, + ref_nii=fullname_reference + nii, + out_nii=fullname_volume + ali + nii, + prefix_out_tform=os.path.join( + dir_transform, name_volume + "_tform_" + ), + restrict=p.registration_restrict, + typ="r", ) - if p.registration == 'high': + if p.registration == "high": pass - elif p.registration == 'medium': - cmd = cmd.replace('[1000x500x250x125]', '[1000x500x250]')\ - .replace('12x8x4x2', '12x8x4')\ - .replace('4x3x2x1vox', '4x3x2vox') - elif p.registration == 'low': - cmd = cmd.replace('[1000x500x250x125]', '[1000x500]')\ - .replace('12x8x4x2', '12x8')\ - .replace('4x3x2x1vox', '4x3vox') + elif p.registration == "medium": + cmd = ( + cmd.replace("[1000x500x250x125]", "[1000x500x250]") + .replace("12x8x4x2", "12x8x4") + .replace("4x3x2x1vox", "4x3x2vox") + ) + elif p.registration == "low": + cmd = ( + cmd.replace("[1000x500x250x125]", "[1000x500]") + .replace("12x8x4x2", "12x8") + .replace("4x3x2x1vox", "4x3vox") + ) # run registration flag = os.system(cmd) if flag: # if breaks change initialization - flag = os.system(cmd.replace(nii+',1]', nii+',0]')) - if flag and load_volume(fullname_volume+ori+nii).shape[2] == 1: + flag = os.system(cmd.replace(nii + ",1]", nii + ",0]")) + if flag and load_volume(fullname_volume + ori + nii).shape[2] == 1: # if breaks change dimensionality - flag = os.system(cmd.replace('--dimensionality 3', '--dimensionality 2')) + flag = os.system( + cmd.replace("--dimensionality 3", "--dimensionality 2") + ) if not flag: - volume = load_volume(fullname_volume+ali+nii)[:, :, None] - save_volume(fullname_volume+ali+nii, volume, p.affine_mat) + volume = load_volume(fullname_volume + ali + nii)[:, :, None] + save_volume(fullname_volume + ali + nii, volume, p.affine_mat) if flag: - raise Exception('volume %s not registered: flag %d.'%(name_volume, flag)) + raise Exception( + "volume %s not registered: flag %d." % (name_volume, flag) + ) # load aligned volume - volume = load_volume(fullname_volume+ali+nii) + volume = load_volume(fullname_volume + ali + nii) # remove padding if p.planes_pad: - volume = volume[:, :, p.planes_pad:-p.planes_pad] + volume = volume[:, :, p.planes_pad : -p.planes_pad] # save as hdf5 volume = volume.T - save_volume(fullname_volume+ali+hdf, volume) + save_volume(fullname_volume + ali + hdf, volume) # remove nifti files - if load_volume(fullname_volume+ali+hdf) is not None: + if load_volume(fullname_volume + ali + hdf) is not None: try: - os.remove(fullname_volume+ori+nii) - os.remove(fullname_volume+ali+nii) + os.remove(fullname_volume + ori + nii) + os.remove(fullname_volume + ali + nii) except: pass @@ -132,9 +145,9 @@ def register_volume(tuple_name_volume): # run registration and save transforms as needed transforms = volume_nameRDD.map(register_volume).collect() - if not p.registration == 'transform': + if not p.registration == "transform": transforms = np.array(transforms).astype(dtype) - fullname_tforms = os.path.join(dir_transform, 'transforms%d'%(color_i)) - if not os.path.isfile(fullname_tforms+hdf): - with h5py.File(fullname_tforms+hdf, 'w') as file_handle: - file_handle['transforms'] = transforms + fullname_tforms = os.path.join(dir_transform, "transforms%d" % (color_i)) + if not os.path.isfile(fullname_tforms + hdf): + with h5py.File(fullname_tforms + hdf, "w") as file_handle: + file_handle["transforms"] = transforms diff --git a/voluseg/_steps/step3.py b/voluseg/_steps/step3.py index 64a1a0a..256d20c 100755 --- a/voluseg/_steps/step3.py +++ b/voluseg/_steps/step3.py @@ -1,5 +1,5 @@ def mask_volumes(parameters): - '''create intensity mask from the average registered volume''' + """create intensity mask from the average registered volume""" import os import h5py @@ -18,43 +18,46 @@ def mask_volumes(parameters): # set up spark from pyspark.sql.session import SparkSession + spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext # set up matplotlib import warnings import matplotlib + with warnings.catch_warnings(): - warnings.simplefilter('ignore') - matplotlib.use('Agg') + warnings.simplefilter("ignore") + matplotlib.use("Agg") import matplotlib.pyplot as plt p = SimpleNamespace(**parameters) # compute mean timeseries and ranked dff - fullname_timemean = os.path.join(p.dir_output, 'mean_timeseries') + fullname_timemean = os.path.join(p.dir_output, "mean_timeseries") volume_nameRDD = evenly_parallelize(p.volume_names) - if not os.path.isfile(fullname_timemean+hdf): + if not os.path.isfile(fullname_timemean + hdf): dff_rank = np.zeros(p.lt) mean_timeseries_raw = np.zeros((p.n_colors, p.lt)) mean_timeseries = np.zeros((p.n_colors, p.lt)) mean_baseline = np.zeros((p.n_colors, p.lt)) for color_i in range(p.n_colors): - dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i)) + dir_volume = os.path.join(p.dir_output, "volumes", str(color_i)) def mean_volume(tuple_name_volume): import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np name_volume = tuple_name_volume[1] fullname_volume = os.path.join(dir_volume, name_volume) - return np.mean(load_volume(fullname_volume+ali+hdf), dtype='float') + return np.mean(load_volume(fullname_volume + ali + hdf), dtype="float") mean_timeseries_raw[color_i] = volume_nameRDD.map(mean_volume).collect() time, base = clean_signal(parameters, mean_timeseries_raw[color_i]) @@ -62,69 +65,71 @@ def mean_volume(tuple_name_volume): dff_rank += stats.rankdata((time - base) / time) # get high delta-f/f timepoints - if p.type_timepoints == 'custom': + if p.type_timepoints == "custom": timepoints = p.timepoints else: nt = p.timepoints if not nt: timepoints = np.arange(p.lt) else: - if p.type_timepoints == 'dff': + if p.type_timepoints == "dff": timepoints = np.sort(np.argsort(dff_rank)[::-1][:nt]) - elif p.type_timepoints == 'periodic': - timepoints = np.linspace(0, p.lt, nt, dtype='int', endpoint=False) + elif p.type_timepoints == "periodic": + timepoints = np.linspace(0, p.lt, nt, dtype="int", endpoint=False) - with h5py.File(fullname_timemean+hdf, 'w') as file_handle: - file_handle['mean_timeseries_raw'] = mean_timeseries_raw - file_handle['mean_timeseries'] = mean_timeseries - file_handle['mean_baseline'] = mean_baseline - file_handle['timepoints'] = timepoints + with h5py.File(fullname_timemean + hdf, "w") as file_handle: + file_handle["mean_timeseries_raw"] = mean_timeseries_raw + file_handle["mean_timeseries"] = mean_timeseries + file_handle["mean_baseline"] = mean_baseline + file_handle["timepoints"] = timepoints # load timepoints - with h5py.File(fullname_timemean+hdf, 'r') as file_handle: - timepoints = file_handle['timepoints'][()] + with h5py.File(fullname_timemean + hdf, "r") as file_handle: + timepoints = file_handle["timepoints"][()] for color_i in range(p.n_colors): - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - if os.path.isfile(fullname_volmean+hdf): + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + if os.path.isfile(fullname_volmean + hdf): continue - dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i)) - dir_plot = os.path.join(p.dir_output, 'mask_plots', str(color_i)) + dir_volume = os.path.join(p.dir_output, "volumes", str(color_i)) + dir_plot = os.path.join(p.dir_output, "mask_plots", str(color_i)) os.makedirs(dir_plot, exist_ok=True) fullname_volume = os.path.join(dir_volume, p.volume_names[0]) - lx, ly, lz = load_volume(fullname_volume+ali+hdf).T.shape + lx, ly, lz = load_volume(fullname_volume + ali + hdf).T.shape class accum_param(pyspark.accumulators.AccumulatorParam): - '''define accumulator class''' + """define accumulator class""" def zero(self, val0): import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np - return np.zeros(val0.shape, dtype='float') + return np.zeros(val0.shape, dtype="float") def addInPlace(self, val1, val2): import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np - if p.type_mask == 'max': - return np.maximum(val1, val2, dtype='float') + if p.type_mask == "max": + return np.maximum(val1, val2, dtype="float") else: - return np.add(val1, val2, dtype='float') + return np.add(val1, val2, dtype="float") # geometric mean volume_accum = sc.accumulator(np.zeros((lx, ly, lz)), accum_param()) @@ -132,8 +137,8 @@ def addInPlace(self, val1, val2): def add_volume(tuple_name_volume): name_volume = tuple_name_volume[1] fullname_volume = os.path.join(dir_volume, name_volume) - volume = load_volume(fullname_volume+ali+hdf).T - if p.type_mask == 'geomean': + volume = load_volume(fullname_volume + ali + hdf).T + if p.type_mask == "geomean": volume = np.log10(volume) volume_accum.add(volume) @@ -143,20 +148,26 @@ def add_volume(tuple_name_volume): for name_volume in p.volume_names[timepoints]: add_volume(([], name_volume)) volume_mean = volume_accum.value - if p.type_mask != 'max': + if p.type_mask != "max": volume_mean = volume_mean / len(timepoints) - if p.type_mask == 'geomean': - volume_mean = 10 ** volume_mean + if p.type_mask == "geomean": + volume_mean = 10**volume_mean # get peaks by comparing to a median-smoothed volume ball_radi = ball(0.5 * p.diam_cell, p.affine_mat)[0] volume_peak = volume_mean >= median_filter(volume_mean, footprint=ball_radi) # compute power and probability - voxel_intensity = np.percentile(volume_mean[volume_mean>0], np.r_[5:95:0.001])[:, None] - gmm = mixture.GaussianMixture(n_components=2, max_iter=100, n_init=100).fit(voxel_intensity) + voxel_intensity = np.percentile( + volume_mean[volume_mean > 0], np.r_[5:95:0.001] + )[:, None] + gmm = mixture.GaussianMixture(n_components=2, max_iter=100, n_init=100).fit( + voxel_intensity + ) voxel_probability = gmm.predict_proba(voxel_intensity) - voxel_probability = voxel_probability[:, np.argmax(voxel_intensity[np.argmax(voxel_probability, 0)])] + voxel_probability = voxel_probability[ + :, np.argmax(voxel_intensity[np.argmax(voxel_probability, 0)]) + ] # compute intensity threshold if (p.thr_mask > 0) and (p.thr_mask <= 1): @@ -168,34 +179,34 @@ def add_volume(tuple_name_volume): ix = np.argmin(np.abs(voxel_intensity - thr_intensity)) thr_probability = voxel_probability[ix] else: - thr_intensity = - np.inf + thr_intensity = -np.inf thr_probability = 0 - print('using probability threshold of %f.'%(thr_probability)) - print('using intensity threshold of %f.'%(thr_intensity)) + print("using probability threshold of %f." % (thr_probability)) + print("using intensity threshold of %f." % (thr_intensity)) # get and save brain mask fig = plt.figure(1, (18, 6)) plt.subplot(131) _ = plt.hist(voxel_intensity, 100) - plt.plot(thr_intensity, 0, '|', color='r', markersize=200) - plt.xlabel('voxel intensity') - plt.title('intensity histogram with threshold (red)') + plt.plot(thr_intensity, 0, "|", color="r", markersize=200) + plt.xlabel("voxel intensity") + plt.title("intensity histogram with threshold (red)") plt.subplot(132) _ = plt.hist(voxel_probability, 100) - plt.plot(thr_probability, 0, '|', color='r', markersize=200) - plt.xlabel('voxel probability') - plt.title('probability histogram with threshold (red)') + plt.plot(thr_probability, 0, "|", color="r", markersize=200) + plt.xlabel("voxel probability") + plt.title("probability histogram with threshold (red)") plt.subplot(133) plt.plot(voxel_intensity, voxel_probability, linewidth=3) - plt.plot(thr_intensity, thr_probability, 'x', color='r', markersize=10) - plt.xlabel('voxel intensity') - plt.ylabel('voxel probability') - plt.title('intensity-probability plot with threshold (red)') + plt.plot(thr_intensity, thr_probability, "x", color="r", markersize=10) + plt.xlabel("voxel intensity") + plt.ylabel("voxel probability") + plt.title("intensity-probability plot with threshold (red)") - plt.savefig(os.path.join(dir_plot, 'histogram.png')) + plt.savefig(os.path.join(dir_plot, "histogram.png")) plt.close(fig) # remove all disconnected components less than 5000 cubic microliters in size @@ -205,33 +216,42 @@ def add_volume(tuple_name_volume): volume_mask = morphology.remove_small_objects(volume_mask, thr_size) # compute background fluorescence - background = np.median(volume_mean[volume_mask==0]) + background = np.median(volume_mean[volume_mask == 0]) # save brain mask figures for i in range(lz): fig = plt.figure(1, (18, 6)) plt.subplot(131) - plt.imshow(volume_mean[:, :, i].T, vmin=voxel_intensity[0], vmax=voxel_intensity[-1]) - plt.title('volume intensity (plane %d)'%(i)) + plt.imshow( + volume_mean[:, :, i].T, + vmin=voxel_intensity[0], + vmax=voxel_intensity[-1], + ) + plt.title("volume intensity (plane %d)" % (i)) plt.subplot(132) plt.imshow(volume_mask[:, :, i].T) - plt.title('volume mask (plane %d)'%(i)) + plt.title("volume mask (plane %d)" % (i)) plt.subplot(133) - img = np.stack((volume_mean[:, :, i], volume_mask[:, :, i], volume_mask[:, :, i]), axis=2) - img[:, :, 0] = (img[:, :, 0] - voxel_intensity[0]) / (voxel_intensity[-1] - voxel_intensity[0]) + img = np.stack( + (volume_mean[:, :, i], volume_mask[:, :, i], volume_mask[:, :, i]), + axis=2, + ) + img[:, :, 0] = (img[:, :, 0] - voxel_intensity[0]) / ( + voxel_intensity[-1] - voxel_intensity[0] + ) img[:, :, 0] = np.minimum(np.maximum(img[:, :, 0], 0), 1) plt.imshow(np.transpose(img, [1, 0, 2])) - plt.title('volume mask/intensity overlay (plane %d)'%(i)) + plt.title("volume mask/intensity overlay (plane %d)" % (i)) - plt.savefig(os.path.join(dir_plot, 'mask_z%03d.png'%(i))) + plt.savefig(os.path.join(dir_plot, "mask_z%03d.png" % (i))) plt.close(fig) - with h5py.File(fullname_volmean+hdf, 'w') as file_handle: - file_handle['volume_mask'] = volume_mask.T - file_handle['volume_mean'] = volume_mean.T - file_handle['volume_peak'] = volume_peak.T - file_handle['thr_intensity'] = thr_intensity - file_handle['thr_probability'] = thr_probability - file_handle['background'] = background + with h5py.File(fullname_volmean + hdf, "w") as file_handle: + file_handle["volume_mask"] = volume_mask.T + file_handle["volume_mean"] = volume_mean.T + file_handle["volume_peak"] = volume_peak.T + file_handle["thr_intensity"] = thr_intensity + file_handle["thr_probability"] = thr_probability + file_handle["background"] = background diff --git a/voluseg/_steps/step4.py b/voluseg/_steps/step4.py index d7c4555..14715cf 100755 --- a/voluseg/_steps/step4.py +++ b/voluseg/_steps/step4.py @@ -1,5 +1,5 @@ def detect_cells(parameters): - '''detect cells in volumes''' + """detect cells in volumes""" import os import h5py @@ -16,6 +16,7 @@ def detect_cells(parameters): # set up spark from pyspark.sql.session import SparkSession + spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext @@ -24,31 +25,31 @@ def detect_cells(parameters): ball_diam, ball_diam_xyz0 = ball(1.0 * p.diam_cell, p.affine_mat) # load timepoints - fullname_timemean = os.path.join(p.dir_output, 'mean_timeseries') - with h5py.File(fullname_timemean+hdf, 'r') as file_handle: - timepoints = file_handle['timepoints'][()] + fullname_timemean = os.path.join(p.dir_output, "mean_timeseries") + with h5py.File(fullname_timemean + hdf, "r") as file_handle: + timepoints = file_handle["timepoints"][()] # load plane filename for color_i in range(p.n_colors): - fullname_cells = os.path.join(p.dir_output, 'cells%s_clean'%(color_i)) - if os.path.isfile(fullname_cells+hdf): + fullname_cells = os.path.join(p.dir_output, "cells%s_clean" % (color_i)) + if os.path.isfile(fullname_cells + hdf): continue - dir_cell = os.path.join(p.dir_output, 'cells', str(color_i)) + dir_cell = os.path.join(p.dir_output, "cells", str(color_i)) os.makedirs(dir_cell, exist_ok=True) - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - with h5py.File(fullname_volmean+hdf, 'r') as file_handle: - volume_mean = file_handle['volume_mean'][()].T - volume_mask = file_handle['volume_mask'][()].T - volume_peak = file_handle['volume_peak'][()].T - if 'n_blocks' in file_handle.keys(): + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + with h5py.File(fullname_volmean + hdf, "r") as file_handle: + volume_mean = file_handle["volume_mean"][()].T + volume_mask = file_handle["volume_mask"][()].T + volume_peak = file_handle["volume_peak"][()].T + if "n_blocks" in file_handle.keys(): flag = 0 - n_voxels_cell = file_handle['n_voxels_cell'][()] - n_blocks = file_handle['n_blocks'][()] - block_valids = file_handle['block_valids'][()] - xyz0 = file_handle['block_xyz0'][()] - xyz1 = file_handle['block_xyz1'][()] + n_voxels_cell = file_handle["n_voxels_cell"][()] + n_blocks = file_handle["n_blocks"][()] + block_valids = file_handle["block_valids"][()] + xyz0 = file_handle["block_xyz0"][()] + xyz1 = file_handle["block_xyz1"][()] else: flag = 1 @@ -68,104 +69,162 @@ def detect_cells(parameters): # get number of voxels in cell if (lz == 1) or (rz >= p.diam_cell): # area of a circle - n_voxels_cell = np.pi * ((p.diam_cell / 2.0)**2) / (rx * ry) + n_voxels_cell = np.pi * ((p.diam_cell / 2.0) ** 2) / (rx * ry) else: # volume of a cylinder (change to sphere later) - n_voxels_cell = p.diam_cell * np.pi * ((p.diam_cell / 2.0)**2) / (rx * ry * rz) + n_voxels_cell = ( + p.diam_cell * np.pi * ((p.diam_cell / 2.0) ** 2) / (rx * ry * rz) + ) n_voxels_cell = np.round(n_voxels_cell).astype(int) # get number of voxels in each cell - n_blocks, block_valids, xyz0, xyz1 = \ - define_blocks(lx, ly, lz, p.n_cells_block, n_voxels_cell, volume_mask, volume_peak) + n_blocks, block_valids, xyz0, xyz1 = define_blocks( + lx, ly, lz, p.n_cells_block, n_voxels_cell, volume_mask, volume_peak + ) # save number and indices of blocks - with h5py.File(fullname_volmean+hdf, 'r+') as file_handle: - file_handle['n_voxels_cell'] = n_voxels_cell - file_handle['n_blocks'] = n_blocks - file_handle['block_valids'] = block_valids - file_handle['block_xyz0'] = xyz0 - file_handle['block_xyz1'] = xyz1 + with h5py.File(fullname_volmean + hdf, "r+") as file_handle: + file_handle["n_voxels_cell"] = n_voxels_cell + file_handle["n_blocks"] = n_blocks + file_handle["block_valids"] = block_valids + file_handle["block_xyz0"] = xyz0 + file_handle["block_xyz1"] = xyz1 - print('number of blocks, total: %d.'%(block_valids.sum())) + print("number of blocks, total: %d." % (block_valids.sum())) for ii in np.where(block_valids)[0]: try: - fullname_block = os.path.join(dir_cell, 'block%05d'%(ii)) - with h5py.File(fullname_block+hdf, 'r') as file_handle: - if ('completion' in file_handle.keys()) and file_handle['completion'][()]: + fullname_block = os.path.join(dir_cell, "block%05d" % (ii)) + with h5py.File(fullname_block + hdf, "r") as file_handle: + if ("completion" in file_handle.keys()) and file_handle[ + "completion" + ][()]: block_valids[ii] = 0 except (NameError, OSError): pass - print('number of blocks, remaining: %d.'%(block_valids.sum())) + print("number of blocks, remaining: %d." % (block_valids.sum())) ix = np.where(block_valids)[0] block_ixyz01 = list(zip(ix, xyz0[ix], xyz1[ix])) # detect individual cells with sparse nnmf algorithm def detect_cells_block(tuple_i_xyz0_xyz1): import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np ii, xyz0, xyz1 = tuple_i_xyz0_xyz1[1] - voxel_xyz, voxel_timeseries, peak_idx, voxel_similarity_peak = \ - process_block_data(xyz0, xyz1, parameters, color_i, lxyz, rxyz, - ball_diam, bvolume_mean, bvolume_peak, timepoints) - - n_voxels_block = len(voxel_xyz) # number of voxels in block - - voxel_fraction_peak = np.argsort(((voxel_timeseries[peak_idx])**2).mean(1)) / len(peak_idx) + voxel_xyz, voxel_timeseries, peak_idx, voxel_similarity_peak = ( + process_block_data( + xyz0, + xyz1, + parameters, + color_i, + lxyz, + rxyz, + ball_diam, + bvolume_mean, + bvolume_peak, + timepoints, + ) + ) + + n_voxels_block = len(voxel_xyz) # number of voxels in block + + voxel_fraction_peak = np.argsort( + ((voxel_timeseries[peak_idx]) ** 2).mean(1) + ) / len(peak_idx) for fraction in np.r_[1:0:-0.05]: try: - peak_valids = (voxel_fraction_peak >= (1 - fraction)) # valid voxel indices + peak_valids = voxel_fraction_peak >= ( + 1 - fraction + ) # valid voxel indices - n_cells = np.round(peak_valids.sum() / (0.5 * n_voxels_cell)).astype(int) + n_cells = np.round( + peak_valids.sum() / (0.5 * n_voxels_cell) + ).astype(int) print((fraction, n_cells)) tic = time.time() - voxel_timeseries_valid, voxel_xyz_valid, cell_weight_init_valid, \ - cell_neighborhood_valid, cell_sparseness = initialize_block_cells( - n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, voxel_timeseries, peak_idx, - peak_valids, voxel_similarity_peak, lxyz, rxyz, ball_diam, ball_diam_xyz0) - print('cell initialization: %.1f minutes.\n'%((time.time() - tic) / 60)) + ( + voxel_timeseries_valid, + voxel_xyz_valid, + cell_weight_init_valid, + cell_neighborhood_valid, + cell_sparseness, + ) = initialize_block_cells( + n_voxels_cell, + n_voxels_block, + n_cells, + voxel_xyz, + voxel_timeseries, + peak_idx, + peak_valids, + voxel_similarity_peak, + lxyz, + rxyz, + ball_diam, + ball_diam_xyz0, + ) + print( + "cell initialization: %.1f minutes.\n" + % ((time.time() - tic) / 60) + ) tic = time.time() cell_weights_valid, cell_timeseries_valid, d = nnmf_sparse( - voxel_timeseries_valid, voxel_xyz_valid, cell_weight_init_valid, - cell_neighborhood_valid, cell_sparseness, timepoints=timepoints, - miniter=10, maxiter=100, tolfun=1e-3) + voxel_timeseries_valid, + voxel_xyz_valid, + cell_weight_init_valid, + cell_neighborhood_valid, + cell_sparseness, + timepoints=timepoints, + miniter=10, + maxiter=100, + tolfun=1e-3, + ) success = 1 - print('cell factorization: %.1f minutes.\n'%((time.time() - tic) / 60)) + print( + "cell factorization: %.1f minutes.\n" + % ((time.time() - tic) / 60) + ) break except ValueError as msg: - print('retrying factorization of block %d: %s'%(ii, msg)) + print("retrying factorization of block %d: %s" % (ii, msg)) success = 0 # get cell positions and timeseries, and save cell data - fullname_block = os.path.join(dir_cell, 'block%05d'%(ii)) - with h5py.File(fullname_block+hdf, 'w') as file_handle: + fullname_block = os.path.join(dir_cell, "block%05d" % (ii)) + with h5py.File(fullname_block + hdf, "w") as file_handle: if success: for ci in range(n_cells): ix = cell_weights_valid[:, ci] > 0 xyzi = voxel_xyz_valid[ix] wi = cell_weights_valid[ix, ci] - bi = np.sum(wi * bvolume_mean.value[tuple(zip(*xyzi))]) / np.sum(wi) - ti = bi * cell_timeseries_valid[ci] / np.mean(cell_timeseries_valid[ci]) - - file_handle['/cell/%05d/xyz'%(ci)] = xyzi - file_handle['/cell/%05d/weights'%(ci)] = wi - file_handle['/cell/%05d/timeseries'%(ci)] = ti - - file_handle['n_cells'] = n_cells - file_handle['completion'] = 1 + bi = np.sum( + wi * bvolume_mean.value[tuple(zip(*xyzi))] + ) / np.sum(wi) + ti = ( + bi + * cell_timeseries_valid[ci] + / np.mean(cell_timeseries_valid[ci]) + ) + + file_handle["/cell/%05d/xyz" % (ci)] = xyzi + file_handle["/cell/%05d/weights" % (ci)] = wi + file_handle["/cell/%05d/timeseries" % (ci)] = ti + + file_handle["n_cells"] = n_cells + file_handle["completion"] = 1 if block_valids.any(): evenly_parallelize(block_ixyz01).foreach(detect_cells_block) diff --git a/voluseg/_steps/step4a.py b/voluseg/_steps/step4a.py index 4aad1d0..c5676b2 100755 --- a/voluseg/_steps/step4a.py +++ b/voluseg/_steps/step4a.py @@ -1,13 +1,14 @@ def define_blocks(lx, ly, lz, n_cells_block, n_voxels_cell, volume_mask, volume_peak): - '''get coordinates of individual blocks''' + """get coordinates of individual blocks""" import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np # get initial estimate for the number of blocks @@ -19,10 +20,10 @@ def define_blocks(lx, ly, lz, n_cells_block, n_voxels_cell, volume_mask, volume_ z0_range = np.array([0, 1]) else: part_block = np.round(np.cbrt(n_blocks)).astype(int) - z0_range = np.unique(np.round(np.linspace(0, lz, part_block+1)).astype(int)) + z0_range = np.unique(np.round(np.linspace(0, lz, part_block + 1)).astype(int)) - x0_range = np.unique(np.round(np.linspace(0, lx, part_block+1)).astype(int)) - y0_range = np.unique(np.round(np.linspace(0, ly, part_block+1)).astype(int)) + x0_range = np.unique(np.round(np.linspace(0, lx, part_block + 1)).astype(int)) + y0_range = np.unique(np.round(np.linspace(0, ly, part_block + 1)).astype(int)) ijk, xyz = [], [] for i, x0 in enumerate(x0_range): @@ -32,15 +33,15 @@ def define_blocks(lx, ly, lz, n_cells_block, n_voxels_cell, volume_mask, volume_ xyz.append([x0, y0, z0]) dim = np.max(ijk, 0) - XYZ = np.zeros(np.r_[dim+1, 3], dtype=int) + XYZ = np.zeros(np.r_[dim + 1, 3], dtype=int) XYZ[tuple(zip(*ijk))] = xyz xyz0, xyz1 = [], [] for i in range(dim[0]): for j in range(dim[1]): for k in range(dim[2]): - xyz0.append(XYZ[i , j , k]) - xyz1.append(XYZ[i+1, j+1, k+1]) + xyz0.append(XYZ[i, j, k]) + xyz1.append(XYZ[i + 1, j + 1, k + 1]) # get final block number and coordinates xyz0 = np.array(xyz0) @@ -54,8 +55,8 @@ def define_blocks(lx, ly, lz, n_cells_block, n_voxels_cell, volume_mask, volume_ # get indices of masked blocks block_valids = np.ones(n_blocks, dtype=bool) for i in range(n_blocks): - mask_i = volume_mask[x0[i]:x1[i], y0[i]:y1[i], z0[i]:z1[i]] - peak_i = volume_peak[x0[i]:x1[i], y0[i]:y1[i], z0[i]:z1[i]] + mask_i = volume_mask[x0[i] : x1[i], y0[i] : y1[i], z0[i] : z1[i]] + peak_i = volume_peak[x0[i] : x1[i], y0[i] : y1[i], z0[i] : z1[i]] block_valids[i] = np.any(np.logical_and(mask_i, peak_i)) return n_blocks, block_valids, xyz0, xyz1 diff --git a/voluseg/_steps/step4b.py b/voluseg/_steps/step4b.py index 9f76203..0df23a9 100644 --- a/voluseg/_steps/step4b.py +++ b/voluseg/_steps/step4b.py @@ -1,14 +1,25 @@ -def process_block_data(xyz0, xyz1, parameters, color_i, lxyz, rxyz, - ball_diam, bvolume_mean, bvolume_peak, timepoints): - '''load timeseries in individual blocks, slice-time correct, and find similar timeseries''' +def process_block_data( + xyz0, + xyz1, + parameters, + color_i, + lxyz, + rxyz, + ball_diam, + bvolume_mean, + bvolume_peak, + timepoints, +): + """load timeseries in individual blocks, slice-time correct, and find similar timeseries""" import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np import h5py @@ -27,38 +38,41 @@ def process_block_data(xyz0, xyz1, parameters, color_i, lxyz, rxyz, voxel_peak = np.zeros_like(bvolume_peak.value) voxel_peak[x0_:x1_, y0_:y1_, z0_:z1_] = 1 voxel_peak = voxel_peak & bvolume_peak.value & (bvolume_mean.value > 0) - voxel_mask = morphology.binary_dilation(voxel_peak, ball_diam) & (bvolume_mean.value > 0) + voxel_mask = morphology.binary_dilation(voxel_peak, ball_diam) & ( + bvolume_mean.value > 0 + ) voxel_xyz = np.argwhere(voxel_mask) voxel_xyz_peak = np.argwhere(voxel_peak) peak_idx = np.argwhere(voxel_peak[voxel_mask]).T[0] tic = time.time() - dir_volume = os.path.join(p.dir_output, 'volumes', str(color_i)) + dir_volume = os.path.join(p.dir_output, "volumes", str(color_i)) x0, y0, z0 = voxel_xyz.min(0) x1, y1, z1 = voxel_xyz.max(0) + 1 voxel_timeseries_block = [None] * p.lt for ti, name_volume in enumerate(p.volume_names): fullname_volume = os.path.join(dir_volume, name_volume) - with h5py.File(fullname_volume+ali+hdf, 'r') as file_handle: - voxel_timeseries_block[ti] = file_handle['volume'][z0:z1, y0:y1, x0:x1].T + with h5py.File(fullname_volume + ali + hdf, "r") as file_handle: + voxel_timeseries_block[ti] = file_handle["volume"][z0:z1, y0:y1, x0:x1].T voxel_timeseries_block = np.transpose(voxel_timeseries_block, (1, 2, 3, 0)) voxel_timeseries = voxel_timeseries_block[voxel_mask[x0:x1, y0:y1, z0:z1]] voxel_timeseries = voxel_timeseries.astype(dtype) del voxel_timeseries_block - print('data loading: %.1f minutes.\n'%((time.time() - tic) / 60)) + print("data loading: %.1f minutes.\n" % ((time.time() - tic) / 60)) # slice-time correct if more than one slice and t_section is positive if (lz > 1) and (p.t_section > 0): for i, zi in enumerate(voxel_xyz[:, 2]): # get timepoints of midpoint and zi plane for interpolation - timepoints_zi = np.arange(p.lt) / p.f_volume + zi * p.t_section + timepoints_zi = np.arange(p.lt) / p.f_volume + zi * p.t_section timepoints_zm = np.arange(p.lt) / p.f_volume + (lz / 2) * p.t_section # make spline interpolator and interpolate timeseries - spline_interpolator_xyzi = \ - interpolate.InterpolatedUnivariateSpline(timepoints_zi, voxel_timeseries[i]) + spline_interpolator_xyzi = interpolate.InterpolatedUnivariateSpline( + timepoints_zi, voxel_timeseries[i] + ) voxel_timeseries[i] = spline_interpolator_xyzi(timepoints_zm) def normalize(timeseries): @@ -68,19 +82,21 @@ def normalize(timeseries): # get voxel connectivity from proximities (distances) and similarities (correlations) voxel_xyz_phys_peak = voxel_xyz_peak * rxyz - voxel_timeseries_peak_nrm = normalize(voxel_timeseries[np.ix_(peak_idx, timepoints)]) + voxel_timeseries_peak_nrm = normalize( + voxel_timeseries[np.ix_(peak_idx, timepoints)] + ) # compute voxel peak similarity: combination of high proximity and high correlation tic = time.time() n_peaks = len(peak_idx) voxel_similarity_peak = np.zeros((n_peaks, n_peaks), dtype=bool) for i in range(n_peaks): - dist_i = (((voxel_xyz_phys_peak[i] - voxel_xyz_phys_peak)**2).sum(1))**0.5 + dist_i = (((voxel_xyz_phys_peak[i] - voxel_xyz_phys_peak) ** 2).sum(1)) ** 0.5 neib_i = dist_i < p.diam_cell corr_i = np.dot(voxel_timeseries_peak_nrm[i], voxel_timeseries_peak_nrm.T) voxel_similarity_peak[i] = neib_i & (corr_i > np.median(corr_i[neib_i])) voxel_similarity_peak = voxel_similarity_peak | voxel_similarity_peak.T - print('voxel similarity: %.1f minutes.\n'%((time.time() - tic) / 60)) + print("voxel similarity: %.1f minutes.\n" % ((time.time() - tic) / 60)) return (voxel_xyz, voxel_timeseries, peak_idx, voxel_similarity_peak) diff --git a/voluseg/_steps/step4c.py b/voluseg/_steps/step4c.py index ba383f7..1880f94 100644 --- a/voluseg/_steps/step4c.py +++ b/voluseg/_steps/step4c.py @@ -1,22 +1,31 @@ -import numpy as np - +import os from sklearn import cluster from voluseg._tools.sparseness import sparseness -def initialize_block_cells(n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, - voxel_timeseries, peak_idx, peak_valids, voxel_similarity_peak, - lxyz, rxyz, ball_diam, ball_diam_xyz0): - '''initialize cell positions in individual blocks''' +def initialize_block_cells( + n_voxels_cell, + n_voxels_block, + n_cells, + voxel_xyz, + voxel_timeseries, + peak_idx, + peak_valids, + voxel_similarity_peak, + lxyz, + rxyz, + ball_diam, + ball_diam_xyz0, +): + """initialize cell positions in individual blocks""" - import os # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' - + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" + import numpy as np # get valid voxels of peaks peak_idx_valid = peak_idx[peak_valids] @@ -24,12 +33,13 @@ def initialize_block_cells(n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, voxel_xyz_phys_peak_valid = voxel_xyz_peak_valid * rxyz # cluster valid voxels of peaks by distance and similarity - cell_clusters = \ - cluster.AgglomerativeClustering( - n_clusters=n_cells, - connectivity=voxel_similarity_peak[np.ix_(peak_valids,peak_valids)], - linkage='ward')\ - .fit(voxel_xyz_phys_peak_valid) # physical location of voxels + cell_clusters = cluster.AgglomerativeClustering( + n_clusters=n_cells, + connectivity=voxel_similarity_peak[np.ix_(peak_valids, peak_valids)], + linkage="ward", + ).fit( + voxel_xyz_phys_peak_valid + ) # physical location of voxels cell_labels_peak_valid = cell_clusters.labels_ # initialize weights for n_cells and background @@ -38,18 +48,20 @@ def initialize_block_cells(n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, cell_sparseness = np.zeros(n_cells + 1) for cell_i in range(n_cells): # initialize cells with binary weights - cell_weight_init[peak_idx_valid, cell_i] = (cell_labels_peak_valid == cell_i) + cell_weight_init[peak_idx_valid, cell_i] = cell_labels_peak_valid == cell_i # compute cell centroid cell_idx = np.argwhere(cell_labels_peak_valid == cell_i).T[0] cell_xyz_phys = voxel_xyz_phys_peak_valid[cell_idx] cell_dist = np.zeros(len(cell_xyz_phys)) for j, voxel_xyz_phys in enumerate(cell_xyz_phys): - cell_dist[j] = ((((voxel_xyz_phys - cell_xyz_phys)**2).sum(1))**0.5).sum() + cell_dist[j] = ( + (((voxel_xyz_phys - cell_xyz_phys) ** 2).sum(1)) ** 0.5 + ).sum() cell_xyz0 = voxel_xyz_peak_valid[cell_idx[np.argmin(cell_dist)]] # find neighborhood voxels - for voxel_xyz_neib in (cell_xyz0 - ball_diam_xyz0 + np.argwhere(ball_diam)): + for voxel_xyz_neib in cell_xyz0 - ball_diam_xyz0 + np.argwhere(ball_diam): cell_neighborhood[(voxel_xyz == voxel_xyz_neib).all(1), cell_i] = 1 # make cell sparseness @@ -69,5 +81,10 @@ def initialize_block_cells(n_voxels_cell, n_voxels_block, n_cells, voxel_xyz, cell_neighborhood_valid[:, -1] = 1 cell_sparseness[-1] = 0 - return voxel_timeseries_valid, voxel_xyz_valid, \ - cell_weight_init_valid, cell_neighborhood_valid, cell_sparseness + return ( + voxel_timeseries_valid, + voxel_xyz_valid, + cell_weight_init_valid, + cell_neighborhood_valid, + cell_sparseness, + ) diff --git a/voluseg/_steps/step4d.py b/voluseg/_steps/step4d.py index bf90642..fe52e44 100644 --- a/voluseg/_steps/step4d.py +++ b/voluseg/_steps/step4d.py @@ -1,21 +1,33 @@ -def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, - timeseries_mean=1.0, timepoints=None, verbosity=1): - ''' +def nnmf_sparse( + V0, + XYZ0, + W0, + B0, + S0, + tolfun=1e-4, + miniter=10, + maxiter=100, + timeseries_mean=1.0, + timepoints=None, + verbosity=1, +): + """ cell detection via nonnegative matrix factorization with sparseness projection V0 = voxel_timeseries_valid XYZ0 = voxel_xyz_valid W0 = cell_weight_init_valid B0 = cell_neighborhood_valid S0 = cell_sparseness - ''' + """ import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np from scipy import stats @@ -24,12 +36,12 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, from voluseg._tools.sparseness_projection import sparseness_projection # CAUTION: variable is modified in-place to save memory - V0 *= (timeseries_mean / V0.mean(1)[:, None]) # normalize voxel timeseries + V0 *= timeseries_mean / V0.mean(1)[:, None] # normalize voxel timeseries if timepoints is not None: - V = V0[:, timepoints].astype(float) # copy input signal + V = V0[:, timepoints].astype(float) # copy input signal else: - V = V0.astype(float) # copy input signal + V = V0.astype(float) # copy input signal XYZ = XYZ0.astype(int) W = W0.astype(float) @@ -37,22 +49,22 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, S = S0.copy() # get dimensions - n, t = V.shape + n, t = V.shape n_, c = W.shape assert n_ == n - H = np.zeros((c, t)) # zero timeseries array - dnorm_prev = np.full(2, np.inf) # last two d-norms + H = np.zeros((c, t)) # zero timeseries array + dnorm_prev = np.full(2, np.inf) # last two d-norms for ii in range(maxiter): # save current states H_ = H.copy() # Alternate least squares with regularization H = np.maximum(linalg.lstsq(W, V)[0], 0) - H *= (timeseries_mean / H.mean(1)[:, None]) # normalize component timeseries + H *= timeseries_mean / H.mean(1)[:, None] # normalize component timeseries W = np.maximum(linalg.lstsq(V.T, H.T)[0], 0) - W[np.logical_not(B)] = 0 # restrict component boundaries + W[np.logical_not(B)] = 0 # restrict component boundaries for ci in range(c): W_ci = W[B[:, ci], ci] if np.any(W_ci) and (S[ci] > 0): @@ -66,7 +78,7 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, L_ci = np.zeros(np.ptp(XYZ_ci, 0) + 1, dtype=bool) L_ci[tuple(zip(*XYZ_ci))] = W_ci > 0 L_ci = measure.label(L_ci, connectivity=3) - lci_mode = stats.mode(L_ci[L_ci>0]).mode + lci_mode = stats.mode(L_ci[L_ci > 0]).mode # backwards compatibility with old scipy behavior if not np.isscalar(lci_mode): lci_mode = lci_mode[0] @@ -76,9 +88,9 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, # Get norm of difference and check for convergence dnorm = np.sqrt(np.mean(np.square(V - W.dot(H)))) / timeseries_mean - diffh = np.sqrt(np.mean(np.square(H - H_ ))) / timeseries_mean + diffh = np.sqrt(np.mean(np.square(H - H_))) / timeseries_mean if ((dnorm_prev.max(0) - dnorm) < tolfun) & (diffh < tolfun): - if (ii >= miniter): + if ii >= miniter: break dnorm_prev[1] = dnorm_prev[0] dnorm_prev[0] = dnorm @@ -88,6 +100,6 @@ def nnmf_sparse(V0, XYZ0, W0, B0, S0, tolfun=1e-4, miniter=10, maxiter=100, # Perform final regression on full input timeseries H = np.maximum(linalg.lstsq(W, V0)[0], 0) - H *= (timeseries_mean / H.mean(1)[:, None]) # normalize component timeseries + H *= timeseries_mean / H.mean(1)[:, None] # normalize component timeseries return (W, H, dnorm) diff --git a/voluseg/_steps/step4e.py b/voluseg/_steps/step4e.py index 9a576d4..34f03d5 100644 --- a/voluseg/_steps/step4e.py +++ b/voluseg/_steps/step4e.py @@ -1,5 +1,5 @@ def collect_blocks(color_i, parameters): - '''collect cells across all blocks''' + """collect cells across all blocks""" import os import h5py @@ -11,19 +11,20 @@ def collect_blocks(color_i, parameters): # set up spark import pyspark from pyspark.sql.session import SparkSession + spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext p = SimpleNamespace(**parameters) - dir_cell = os.path.join(p.dir_output, 'cells', str(color_i)) + dir_cell = os.path.join(p.dir_output, "cells", str(color_i)) - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - with h5py.File(fullname_volmean+hdf, 'r') as file_handle: - block_valids = file_handle['block_valids'][()] + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + with h5py.File(fullname_volmean + hdf, "r") as file_handle: + block_valids = file_handle["block_valids"][()] class accum_data(pyspark.accumulators.AccumulatorParam): - '''define accumulator class''' + """define accumulator class""" def zero(self, val0): return [[]] * 4 @@ -43,18 +44,20 @@ def add_data(tuple_ii): cell_weights = [] cell_timeseries = [] try: - fullname_block = os.path.join(dir_cell, 'block%05d'%(ii)) - with h5py.File(fullname_block+hdf, 'r') as file_handle: - for ci in range(file_handle['n_cells'][()]): - cell_xyz.append(file_handle['/cell/%05d/xyz'%(ci)][()]) - cell_weights.append(file_handle['/cell/%05d/weights'%(ci)][()]) - cell_timeseries.append(file_handle['/cell/%05d/timeseries'%(ci)][()]) + fullname_block = os.path.join(dir_cell, "block%05d" % (ii)) + with h5py.File(fullname_block + hdf, "r") as file_handle: + for ci in range(file_handle["n_cells"][()]): + cell_xyz.append(file_handle["/cell/%05d/xyz" % (ci)][()]) + cell_weights.append(file_handle["/cell/%05d/weights" % (ci)][()]) + cell_timeseries.append( + file_handle["/cell/%05d/timeseries" % (ci)][()] + ) cell_block_id.append(ii) except KeyError: - print('block %d is empty.'%ii) + print("block %d is empty." % ii) except IOError: - print('block %d does not exist.'%ii) + print("block %d does not exist." % ii) if p.parallel_clean: cell_data.add([cell_block_id, cell_xyz, cell_weights, cell_timeseries]) @@ -66,8 +69,10 @@ def add_data(tuple_ii): cell_block_id, cell_xyz, cell_weights, cell_timeseries = cell_data.value else: idx_block_valids = np.argwhere(block_valids).T[0] - valids_tuple = zip([[]]*len(idx_block_valids), idx_block_valids) - cell_block_id, cell_xyz, cell_weights, cell_timeseries = list(zip(*map(add_data, valids_tuple))) + valids_tuple = zip([[]] * len(idx_block_valids), idx_block_valids) + cell_block_id, cell_xyz, cell_weights, cell_timeseries = list( + zip(*map(add_data, valids_tuple)) + ) cell_block_id = [ii for bi in cell_block_id for ii in bi] cell_xyz = [xyzi for ci in cell_xyz for xyzi in ci] cell_weights = [wi for ci in cell_weights for wi in ci] @@ -84,4 +89,10 @@ def add_data(tuple_ii): cell_weights_array[ci, :li] = cell_weights[ci] cell_timeseries_array = np.array(cell_timeseries) - return cell_block_id, cell_xyz_array, cell_weights_array, cell_timeseries_array, cell_lengths + return ( + cell_block_id, + cell_xyz_array, + cell_weights_array, + cell_timeseries_array, + cell_lengths, + ) diff --git a/voluseg/_steps/step5.py b/voluseg/_steps/step5.py index 8c38271..c0b997c 100755 --- a/voluseg/_steps/step5.py +++ b/voluseg/_steps/step5.py @@ -1,5 +1,5 @@ def clean_cells(parameters): - '''remove noise cells, detrend and detect baseline''' + """remove noise cells, detrend and detect baseline""" import os import h5py @@ -14,6 +14,7 @@ def clean_cells(parameters): # set up spark from pyspark.sql.session import SparkSession + spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext @@ -22,17 +23,18 @@ def clean_cells(parameters): thr_similarity = 0.5 for color_i in range(p.n_colors): - fullname_cells = os.path.join(p.dir_output, 'cells%s_clean'%(color_i)) - if os.path.isfile(fullname_cells+hdf): + fullname_cells = os.path.join(p.dir_output, "cells%s_clean" % (color_i)) + if os.path.isfile(fullname_cells + hdf): continue - cell_block_id, cell_xyz, cell_weights, cell_timeseries, cell_lengths = \ + cell_block_id, cell_xyz, cell_weights, cell_timeseries, cell_lengths = ( collect_blocks(color_i, parameters) + ) - fullname_volmean = os.path.join(p.dir_output, 'volume%d'%(color_i)) - with h5py.File(fullname_volmean+hdf, 'r') as file_handle: - volume_mask = file_handle['volume_mask'][()].T - volume_mean = file_handle['volume_mean'][()].T + fullname_volmean = os.path.join(p.dir_output, "volume%d" % (color_i)) + with h5py.File(fullname_volmean + hdf, "r") as file_handle: + volume_mask = file_handle["volume_mask"][()].T + volume_mean = file_handle["volume_mean"][()].T x, y, z = volume_mask.shape cell_x = cell_xyz[:, :, 0] @@ -42,27 +44,33 @@ def clean_cells(parameters): ix = np.any(np.isnan(cell_timeseries), 1) if np.any(ix): - print('nans (to be removed): %d'%np.count_nonzero(ix)) + print("nans (to be removed): %d" % np.count_nonzero(ix)) cell_timeseries[ix] = 0 # keep cells that exceed threshold cell_valids = np.zeros(len(cell_w), dtype=bool) for i, (li, xi, yi, zi) in enumerate(zip(cell_lengths, cell_x, cell_y, cell_z)): if (p.thr_mask > 0) and (p.thr_mask <= 1): - cell_valids[i] = np.mean(volume_mask[xi[:li], yi[:li], zi[:li]]) > p.thr_mask + cell_valids[i] = ( + np.mean(volume_mask[xi[:li], yi[:li], zi[:li]]) > p.thr_mask + ) elif p.thr_mask > 1: - cell_valids[i] = np.mean(volume_mean[xi[:li], yi[:li], zi[:li]]) > p.thr_mask + cell_valids[i] = ( + np.mean(volume_mean[xi[:li], yi[:li], zi[:li]]) > p.thr_mask + ) # brain mask array volume_list = [[[[] for zi in range(z)] for yi in range(y)] for xi in range(x)] - volume_cell_n = np.zeros((x, y, z), dtype='int') + volume_cell_n = np.zeros((x, y, z), dtype="int") for i, (li, vi) in enumerate(zip(cell_lengths, cell_valids)): for j in range(li if vi else 0): xij, yij, zij = cell_x[i, j], cell_y[i, j], cell_z[i, j] volume_list[xij][yij][zij].append(i) volume_cell_n[xij, yij, zij] += 1 - pair_cells = [pi for a in volume_list for b in a for c in b for pi in combinations(c, 2)] + pair_cells = [ + pi for a in volume_list for b in a for c in b for pi in combinations(c, 2) + ] assert len(pair_cells) == np.sum(volume_cell_n * (volume_cell_n - 1) / 2) # remove duplicate cells @@ -70,7 +78,7 @@ def clean_cells(parameters): for pi, fi in zip(pair_id, pair_count): pair_overlap = (fi / np.mean(cell_lengths[pi])) > thr_similarity pair_correlation = np.corrcoef(cell_timeseries[pi])[0, 1] > thr_similarity - if (pair_overlap and pair_correlation): + if pair_overlap and pair_correlation: cell_valids[pi[np.argmin(cell_w[pi])]] = 0 ## get valid version of cells @@ -89,14 +97,15 @@ def clean_cells(parameters): def get_timebase(timeseries_tuple): timeseries = timeseries_tuple[1] return clean_signal(bparameters.value, timeseries) + if p.parallel_clean: - print('Computing baseline in parallel mode... ', end='') + print("Computing baseline in parallel mode... ", end="") timebase = evenly_parallelize(cell_timeseries).map(get_timebase).collect() else: - print('Computing baseline in serial mode... ', end='') - timeseries_tuple = zip([[]]*len(cell_timeseries), cell_timeseries) + print("Computing baseline in serial mode... ", end="") + timeseries_tuple = zip([[]] * len(cell_timeseries), cell_timeseries) timebase = map(get_timebase, timeseries_tuple) - print('done.') + print("done.") cell_timeseries1, cell_baseline1 = list(zip(*timebase)) @@ -120,38 +129,38 @@ def get_timebase(timeseries_tuple): volume_id[xij, yij, zij] = i volume_weight[xij, yij, zij] = cell_weights[i, j] - with h5py.File(fullname_volmean+hdf, 'r') as file_handle: - background = file_handle['background'][()] - - with h5py.File(fullname_cells+hdf, 'w') as file_handle: - file_handle['n'] = n - file_handle['t'] = p.lt - file_handle['x'] = x - file_handle['y'] = y - file_handle['z'] = z - file_handle['cell_x'] = cell_x - file_handle['cell_y'] = cell_y - file_handle['cell_z'] = cell_z - file_handle['cell_block_id'] = cell_block_id - file_handle['volume_id'] = volume_id - file_handle['volume_weight'] = volume_weight - file_handle['cell_weights'] = cell_weights - file_handle['cell_timeseries_raw'] = cell_timeseries - file_handle['cell_timeseries'] = cell_timeseries1 - file_handle['cell_baseline'] = cell_baseline1 - file_handle['background'] = background + with h5py.File(fullname_volmean + hdf, "r") as file_handle: + background = file_handle["background"][()] + + with h5py.File(fullname_cells + hdf, "w") as file_handle: + file_handle["n"] = n + file_handle["t"] = p.lt + file_handle["x"] = x + file_handle["y"] = y + file_handle["z"] = z + file_handle["cell_x"] = cell_x + file_handle["cell_y"] = cell_y + file_handle["cell_z"] = cell_z + file_handle["cell_block_id"] = cell_block_id + file_handle["volume_id"] = volume_id + file_handle["volume_weight"] = volume_weight + file_handle["cell_weights"] = cell_weights + file_handle["cell_timeseries_raw"] = cell_timeseries + file_handle["cell_timeseries"] = cell_timeseries1 + file_handle["cell_baseline"] = cell_baseline1 + file_handle["background"] = background # clean up completion = 1 for color_i in range(p.n_colors): - fullname_cells = os.path.join(p.dir_output, 'cells%s_clean'%(color_i)) - if not os.path.isfile(fullname_cells+hdf): + fullname_cells = os.path.join(p.dir_output, "cells%s_clean" % (color_i)) + if not os.path.isfile(fullname_cells + hdf): completion = 0 if not p.save_volume: if completion: try: - shutil.rmtree(os.path.join(p.dir_output, 'cells')) - shutil.rmtree(os.path.join(p.dir_output, 'volumes')) + shutil.rmtree(os.path.join(p.dir_output, "cells")) + shutil.rmtree(os.path.join(p.dir_output, "volumes")) except: pass diff --git a/voluseg/_tools/ants_registration.py b/voluseg/_tools/ants_registration.py index 6d0a886..2d567e2 100644 --- a/voluseg/_tools/ants_registration.py +++ b/voluseg/_tools/ants_registration.py @@ -1,38 +1,61 @@ -def ants_registration(dir_ants, in_nii, ref_nii, out_nii, prefix_out_tform, typ, in_tform=None, restrict=None): - '''ants registration''' +def ants_registration( + dir_ants, + in_nii, + ref_nii, + out_nii, + prefix_out_tform, + typ, + in_tform=None, + restrict=None, +): + """ants registration""" import os - lin_tform_params = ' '.join([ - '--metric MI[%s,%s,1,32,Regular,0.25]'%(ref_nii, in_nii), - '--convergence [1000x500x250x125]', - '--shrink-factors 12x8x4x2', - '--smoothing-sigmas 4x3x2x1vox', - ]) - syn_tform_params = ' '.join([ - '--metric CC[%s,%s,1,4]'%(ref_nii, in_nii), - '--convergence [100x100x70x50x20]', - '--shrink-factors 10x6x4x2x1', - '--smoothing-sigmas 5x3x2x1x0vox', - ]) + lin_tform_params = " ".join( + [ + "--metric MI[%s,%s,1,32,Regular,0.25]" % (ref_nii, in_nii), + "--convergence [1000x500x250x125]", + "--shrink-factors 12x8x4x2", + "--smoothing-sigmas 4x3x2x1vox", + ] + ) + syn_tform_params = " ".join( + [ + "--metric CC[%s,%s,1,4]" % (ref_nii, in_nii), + "--convergence [100x100x70x50x20]", + "--shrink-factors 10x6x4x2x1", + "--smoothing-sigmas 5x3x2x1x0vox", + ] + ) - initial_moving_transform = (in_tform if in_tform else '[%s,%s,1]'%(ref_nii, in_nii)) - antsRegistration_call = ' '.join([ - os.path.join(dir_ants, 'antsRegistration'), - ('--initial-moving-transform ' + initial_moving_transform if not restrict else ''), - '--output [%s,%s]'%(prefix_out_tform, out_nii), - '--dimensionality 3', - '--float 1', - '--interpolation Linear', - '--winsorize-image-intensities [0.005,0.995]', - '--use-histogram-matching 0', - ('--restrict-deformation ' + restrict if restrict else ''), - ('--transform Translation[0.1] ' + lin_tform_params if 't' in typ else ''), - ('--transform Rigid[0.1] ' + lin_tform_params if 'r' in typ else ''), - ('--transform Similarity[0.1] ' + lin_tform_params if 'i' in typ else ''), - ('--transform Affine[0.1] ' + lin_tform_params if 'a' in typ else ''), - ('--transform SyN[0.1,3,0] ' + syn_tform_params if 's' in typ else ''), - ('--transform BSplineSyN[0.1,26,0,3]' + syn_tform_params if 'b' in typ else '') - ]) + initial_moving_transform = in_tform if in_tform else "[%s,%s,1]" % (ref_nii, in_nii) + antsRegistration_call = " ".join( + [ + os.path.join(dir_ants, "antsRegistration"), + ( + "--initial-moving-transform " + initial_moving_transform + if not restrict + else "" + ), + "--output [%s,%s]" % (prefix_out_tform, out_nii), + "--dimensionality 3", + "--float 1", + "--interpolation Linear", + "--winsorize-image-intensities [0.005,0.995]", + "--use-histogram-matching 0", + ("--restrict-deformation " + restrict if restrict else ""), + ("--transform Translation[0.1] " + lin_tform_params if "t" in typ else ""), + ("--transform Rigid[0.1] " + lin_tform_params if "r" in typ else ""), + ("--transform Similarity[0.1] " + lin_tform_params if "i" in typ else ""), + ("--transform Affine[0.1] " + lin_tform_params if "a" in typ else ""), + ("--transform SyN[0.1,3,0] " + syn_tform_params if "s" in typ else ""), + ( + "--transform BSplineSyN[0.1,26,0,3]" + syn_tform_params + if "b" in typ + else "" + ), + ] + ) - return(antsRegistration_call) + return antsRegistration_call diff --git a/voluseg/_tools/ants_transformation.py b/voluseg/_tools/ants_transformation.py index ffde555..7772457 100644 --- a/voluseg/_tools/ants_transformation.py +++ b/voluseg/_tools/ants_transformation.py @@ -1,17 +1,26 @@ -def ants_transformation(dir_ants, in_nii, ref_nii, out_nii, in_tform, interpolation='Linear'): - '''application of ants transform''' - +def ants_transformation( + dir_ants, in_nii, ref_nii, out_nii, in_tform, interpolation="Linear" +): + """application of ants transform""" + import os - antsTransformation_call = ' '.join([ - os.path.join(dir_ants, 'antsApplyTransforms'), - '--dimensionality 3', - '--input', in_nii, - '--reference-image', ref_nii, - '--output', out_nii, - '--interpolation', interpolation, - '--transform', in_tform, - '--float' - ]) + antsTransformation_call = " ".join( + [ + os.path.join(dir_ants, "antsApplyTransforms"), + "--dimensionality 3", + "--input", + in_nii, + "--reference-image", + ref_nii, + "--output", + out_nii, + "--interpolation", + interpolation, + "--transform", + in_tform, + "--float", + ] + ) - return(antsTransformation_call) + return antsTransformation_call diff --git a/voluseg/_tools/ball.py b/voluseg/_tools/ball.py index af7df62..6d77734 100644 --- a/voluseg/_tools/ball.py +++ b/voluseg/_tools/ball.py @@ -1,13 +1,17 @@ def ball(radi, affine_mat): - '''morphological cell balls and midpoints''' + """morphological cell balls and midpoints""" import numpy as np rx, ry, rz, _ = np.diag(affine_mat) - ball = np.ones(( + ball = np.ones( + ( np.maximum(1, np.round(radi / rx).astype(int) * 2 + 1), np.maximum(1, np.round(radi / ry).astype(int) * 2 + 1), - np.maximum(1, np.round(radi / rz).astype(int) * 2 + 1)), dtype=int) + np.maximum(1, np.round(radi / rz).astype(int) * 2 + 1), + ), + dtype=int, + ) ball_xyzm = (np.array(ball.shape) - 1) / 2 for xi in range(ball.shape[0]): diff --git a/voluseg/_tools/clean_signal.py b/voluseg/_tools/clean_signal.py index d62b095..67ff758 100644 --- a/voluseg/_tools/clean_signal.py +++ b/voluseg/_tools/clean_signal.py @@ -1,13 +1,14 @@ def clean_signal(parameters, timeseries): - '''detrend, filter, and estimate dynamic baseline for input timeseries''' + """detrend, filter, and estimate dynamic baseline for input timeseries""" import os + # disable numpy multithreading - os.environ['OMP_NUM_THREADS'] = '1' - os.environ['MKL_NUM_THREADS'] = '1' - os.environ['NUMEXPR_NUM_THREADS'] = '1' - os.environ['OPENBLAS_NUM_THREADS'] = '1' - os.environ['VECLIB_MAXIMUM_THREADS'] = '1' + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + os.environ["NUMEXPR_NUM_THREADS"] = "1" + os.environ["OPENBLAS_NUM_THREADS"] = "1" + os.environ["VECLIB_MAXIMUM_THREADS"] = "1" import numpy as np import pandas as pd @@ -21,24 +22,26 @@ def clean_signal(parameters, timeseries): # p.t_baseline: timescale constant for baseline estimation (in seconds) # p.f_hipass: highpass cutoff frequency # p.f_volume: frequency of imaging a single stack (in Hz) - - if p.detrending == 'standard': + + if p.detrending == "standard": robustify = lambda timeseries: timeseries - elif p.detrending == 'robust': + elif p.detrending == "robust": robustify = lambda timeseries: winsorize(timeseries, [0.1, 0.1]) - + # compute dynamic baseline def compute_baseline(timeseries): timeseries_df = pd.DataFrame(timeseries) - baseline_df = timeseries_df.rolling(ltau, min_periods=1, center=True).quantile(0.1) + baseline_df = timeseries_df.rolling(ltau, min_periods=1, center=True).quantile( + 0.1 + ) baseline_df = baseline_df.rolling(ltau, min_periods=1, center=True).mean() baseline = np.ravel(baseline_df) baseline += np.percentile(timeseries - baseline, 1) - assert(np.allclose(np.percentile(timeseries - baseline, 1), 0)) + assert np.allclose(np.percentile(timeseries - baseline, 1), 0) return baseline # convert to double precision - timeseries = timeseries.astype('float64') + timeseries = timeseries.astype("float64") # timeseries mean timeseries_mean = timeseries.mean() @@ -51,7 +54,7 @@ def compute_baseline(timeseries): coefpoly = np.polyfit(xtime, robustify(timeseries), 2) timeseries -= np.polyval(coefpoly, xtime) timeseries = np.concatenate((timeseries[::-1], timeseries, timeseries[::-1])) - + # highpass filter nyquist = p.f_volume / 2 if (p.f_hipass > 1e-10) and (p.f_hipass < nyquist - 1e-10): @@ -62,9 +65,9 @@ def compute_baseline(timeseries): # restore mean timeseries = timeseries - timeseries.mean() + timeseries_mean baseline = compute_baseline(robustify(timeseries)) - + # slice and convert to single precision - timeseries = timeseries[p.lt:2*p.lt].astype(dtype) - baseline = baseline[p.lt:2*p.lt].astype(dtype) + timeseries = timeseries[p.lt : 2 * p.lt].astype(dtype) + baseline = baseline[p.lt : 2 * p.lt].astype(dtype) - return(timeseries, baseline) + return (timeseries, baseline) diff --git a/voluseg/_tools/constants.py b/voluseg/_tools/constants.py index 76ffff1..85cb301 100755 --- a/voluseg/_tools/constants.py +++ b/voluseg/_tools/constants.py @@ -1,5 +1,5 @@ -ori = '_original' -ali = '_aligned' -nii = '.nii.gz' -hdf = '.hdf5' -dtype = 'float32' +ori = "_original" +ali = "_aligned" +nii = ".nii.gz" +hdf = ".hdf5" +dtype = "float32" diff --git a/voluseg/_tools/load_metadata.py b/voluseg/_tools/load_metadata.py index f12a26e..0461ab8 100644 --- a/voluseg/_tools/load_metadata.py +++ b/voluseg/_tools/load_metadata.py @@ -1,41 +1,41 @@ def load_metadata(parameters, filename_channel, filename_stack): - '''fetch z-resolution, exposure time, and stack frequency''' + """fetch z-resolution, exposure time, and stack frequency""" import numpy as np from xml.etree import ElementTree -# try: - with open(filename_channel, 'r') as file_handle: + # try: + with open(filename_channel, "r") as file_handle: xml_str = file_handle.read() -# except: -# print('error: cannot load %s.'%(filename_channel)) + # except: + # print('error: cannot load %s.'%(filename_channel)) try: xml_tree = ElementTree.fromstring(xml_str) except ElementTree.ParseError: - xml_tree = ElementTree.fromstring(xml_str.replace('&', ' ')) + xml_tree = ElementTree.fromstring(xml_str.replace("&", " ")) - for info in xml_tree.findall('info'): - if list(info.attrib.keys())[0] == 'exposure_time': - t_section = float(info.attrib['exposure_time']) / 1000 - parameters['t_section'] = t_section - print('fetched t_section.') - if list(info.attrib.keys())[0] == 'z_step': - res_z = float(info.attrib['z_step']) - parameters['res_z'] = res_z - print('fetched res_z.') + for info in xml_tree.findall("info"): + if list(info.attrib.keys())[0] == "exposure_time": + t_section = float(info.attrib["exposure_time"]) / 1000 + parameters["t_section"] = t_section + print("fetched t_section.") + if list(info.attrib.keys())[0] == "z_step": + res_z = float(info.attrib["z_step"]) + parameters["res_z"] = res_z + print("fetched res_z.") -# try: - if 'Stack_frequency' in filename_stack: - f_volume = np.fromfile(filename_stack, sep='\n')[0] # Hz + # try: + if "Stack_frequency" in filename_stack: + f_volume = np.fromfile(filename_stack, sep="\n")[0] # Hz else: - with open(filename_stack, 'r') as file_handle: - times_stack = np.array(file_handle.read().split('\t'))[1:-1] + with open(filename_stack, "r") as file_handle: + times_stack = np.array(file_handle.read().split("\t"))[1:-1] f_volume = 1.0 / np.mean(np.diff(times_stack.astype(float))) - parameters['f_volume'] = f_volume - print('fetched f_volume.') -# except: -# print('error: cannot load %s.'%(filename_stack)) + parameters["f_volume"] = f_volume + print("fetched f_volume.") + # except: + # print('error: cannot load %s.'%(filename_stack)) return parameters diff --git a/voluseg/_tools/load_parameters.py b/voluseg/_tools/load_parameters.py index 13e0de2..a863be8 100644 --- a/voluseg/_tools/load_parameters.py +++ b/voluseg/_tools/load_parameters.py @@ -1,14 +1,14 @@ def load_parameters(filename): - ''' load previously saved parameters from filename ''' + """load previously saved parameters from filename""" import pickle try: - with open(filename, 'rb') as file_handle: + with open(filename, "rb") as file_handle: parameters = pickle.load(file_handle) - print('parameter file successfully loaded.') + print("parameter file successfully loaded.") return parameters except Exception as msg: - print('parameter file not loaded: %s.'%(msg)) + print("parameter file not loaded: %s." % (msg)) diff --git a/voluseg/_tools/nwb.py b/voluseg/_tools/nwb.py index d143d02..c2704dd 100644 --- a/voluseg/_tools/nwb.py +++ b/voluseg/_tools/nwb.py @@ -23,7 +23,7 @@ def open_nwbfile_local(file_path: str): pynwb.NWBFile The opened NWB file. """ - io = pynwb.NWBHDF5IO(file_path, 'r') + io = pynwb.NWBHDF5IO(file_path, "r") try: nwbfile = io.read() yield nwbfile @@ -86,14 +86,16 @@ def h5_dir_to_nwb_file( if max_volumes is None: max_volumes = len(sorted_paths) for p in sorted_paths[:max_volumes]: - with h5py.File(p, 'r') as hdf: - dataset = hdf['default'][:] + with h5py.File(p, "r") as hdf: + dataset = hdf["default"][:] datasets.append(dataset) concatenated_dataset = np.concatenate(datasets, axis=0) nwbfile = pynwb.NWBFile( session_description="description", identifier=str(uuid4()), - session_start_time=datetime(2018, 4, 25, 2, 30, 3, tzinfo=tz.gettz("US/Pacific")), + session_start_time=datetime( + 2018, 4, 25, 2, 30, 3, tzinfo=tz.gettz("US/Pacific") + ), ) device = nwbfile.create_device( name="Microscope", diff --git a/voluseg/_tools/parameter_dictionary.py b/voluseg/_tools/parameter_dictionary.py index d17aa9f..8a97261 100755 --- a/voluseg/_tools/parameter_dictionary.py +++ b/voluseg/_tools/parameter_dictionary.py @@ -1,32 +1,32 @@ def parameter_dictionary(): - '''parameter dictionary with specified defaults''' + """parameter dictionary with specified defaults""" - return { - 'detrending': 'standard', # type of detrending: 'standard', 'robust', or 'none' - 'registration': 'medium', # quality of registration: 'high', 'medium', 'low' or 'none' - 'registration_restrict': '', # restrict registration (e.g. 1x1x1x1x1x1x0x0x0x1x1x0) - 'diam_cell': 6.0, # cell diameter (microns) - 'dir_ants': '', # path to ANTs directory - 'dir_input': '', # path to input directory/ies (separate multiple directories by ';') - 'dir_output': '', # path to output directory - 'dir_transform': '', # path to transform directory - 'ds': 2, # spatial coarse-graining in x-y dimension - 'planes_pad': 0, # number of planes to pad the volume with (for robust registration) - 'planes_packed': False, # packed planes in each volume (for single plane imaging with packed planes) - 'parallel_clean': True, # parallelization of final cleaning (True is fast but memory intensive) - 'parallel_volume': True, # parallelization of mean-volume computation (True is fast but memory intensive) - 'save_volume': False, # save registered volumes after segmentation (True keeps a copy of the volumes) - 'type_timepoints': 'dff', # type of timepoints to use for cell detection: 'dff', 'periodic' or 'custom' - 'type_mask': 'geomean', # type of volume averaging for mask: 'mean', 'geomean' or 'max' - 'timepoints': 1000, # number ('dff'/'periodic') or vector ('custom') of timepoints for segmentation - 'f_hipass': 0, # frequency (Hz) for high-pass filtering of cell timeseries - 'f_volume': 2.0, # imaging frequency (Hz) - 'n_cells_block': 316, # number of cells in block (small n_cells is fast but can lead to blocky output) - 'n_colors': 1, # number of brain colors (2 in two-color volumes) - 'res_x': 0.40625, # x resolution (microns) - 'res_y': 0.40625, # y resolution (microns) - 'res_z': 5.0, # z resolution (microns) - 't_baseline': 300, # interval for baseline calculation (seconds) - 't_section': 0.01, # exposure time (seconds): time of slice acquisition - 'thr_mask': 0.5, # threshold for volume mask: 0 < thr <= 1 (probability) or thr > 1 (intensity) - } + return { + "detrending": "standard", # type of detrending: 'standard', 'robust', or 'none' + "registration": "medium", # quality of registration: 'high', 'medium', 'low' or 'none' + "registration_restrict": "", # restrict registration (e.g. 1x1x1x1x1x1x0x0x0x1x1x0) + "diam_cell": 6.0, # cell diameter (microns) + "dir_ants": "", # path to ANTs directory + "dir_input": "", # path to input directory/ies (separate multiple directories by ';') + "dir_output": "", # path to output directory + "dir_transform": "", # path to transform directory + "ds": 2, # spatial coarse-graining in x-y dimension + "planes_pad": 0, # number of planes to pad the volume with (for robust registration) + "planes_packed": False, # packed planes in each volume (for single plane imaging with packed planes) + "parallel_clean": True, # parallelization of final cleaning (True is fast but memory intensive) + "parallel_volume": True, # parallelization of mean-volume computation (True is fast but memory intensive) + "save_volume": False, # save registered volumes after segmentation (True keeps a copy of the volumes) + "type_timepoints": "dff", # type of timepoints to use for cell detection: 'dff', 'periodic' or 'custom' + "type_mask": "geomean", # type of volume averaging for mask: 'mean', 'geomean' or 'max' + "timepoints": 1000, # number ('dff'/'periodic') or vector ('custom') of timepoints for segmentation + "f_hipass": 0, # frequency (Hz) for high-pass filtering of cell timeseries + "f_volume": 2.0, # imaging frequency (Hz) + "n_cells_block": 316, # number of cells in block (small n_cells is fast but can lead to blocky output) + "n_colors": 1, # number of brain colors (2 in two-color volumes) + "res_x": 0.40625, # x resolution (microns) + "res_y": 0.40625, # y resolution (microns) + "res_z": 5.0, # z resolution (microns) + "t_baseline": 300, # interval for baseline calculation (seconds) + "t_section": 0.01, # exposure time (seconds): time of slice acquisition + "thr_mask": 0.5, # threshold for volume mask: 0 < thr <= 1 (probability) or thr > 1 (intensity) + } diff --git a/voluseg/_tools/sparseness.py b/voluseg/_tools/sparseness.py index a321108..9c7596b 100644 --- a/voluseg/_tools/sparseness.py +++ b/voluseg/_tools/sparseness.py @@ -1,8 +1,9 @@ def sparseness(W, dim=0): - '''vector sparseness along specified dimension''' + """vector sparseness along specified dimension""" import numpy as np + n = W.shape[dim] - l2 = np.sqrt(np.sum(np.square(W), dim)) # l2-norm - l1 = np.sum(np.abs( W), dim) # l1-norm + l2 = np.sqrt(np.sum(np.square(W), dim)) # l2-norm + l1 = np.sum(np.abs(W), dim) # l1-norm return (np.sqrt(n) - l1 / l2) / (np.sqrt(n) - 1) # sparseness diff --git a/voluseg/_tools/sparseness_projection.py b/voluseg/_tools/sparseness_projection.py index 6f240f8..ce90f39 100644 --- a/voluseg/_tools/sparseness_projection.py +++ b/voluseg/_tools/sparseness_projection.py @@ -1,18 +1,18 @@ def sparseness_projection(Si, s, at_least_as_sparse=False): - '''Hoyer sparseness projection''' + """Hoyer sparseness projection""" import numpy as np - assert(Si.ndim == 1) - S = np.copy(Si) # copy input signal + assert Si.ndim == 1 + S = np.copy(Si) # copy input signal if s <= 0: - return np.maximum(S, 0) # enforce nonnegativity + return np.maximum(S, 0) # enforce nonnegativity d = S.size - L2 = np.sqrt(np.sum(np.square(S))) # fixed l2-norm - L1 = L2 * (np.sqrt(d) * (1 - s) + s) # desired l1-norm + L2 = np.sqrt(np.sum(np.square(S))) # fixed l2-norm + L1 = L2 * (np.sqrt(d) * (1 - s) + s) # desired l1-norm # quit if at_least_sparse=True and original exceeds target sparseness if at_least_as_sparse: @@ -42,9 +42,9 @@ def sparseness_projection(Si, s, at_least_as_sparse=False): # For convenience, we square both sides and find the roots, # 0 = (l2[P*Alph + M])^2 - (L2)^2 # 0 = sum((P*Alph)^2) + sum(2*P*M*Alph) + sum(M^2) - L2^2 - A = np.sum(P * P) + A = np.sum(P * P) B = 2 * np.sum(P * M) - C = np.sum(M * M) - L2**2 + C = np.sum(M * M) - L2**2 Alph = (-B + np.real(np.sqrt(B**2 - 4 * A * C))) / (2 * A) diff --git a/voluseg/_update.py b/voluseg/_update.py index 72df05b..6ada62f 100644 --- a/voluseg/_update.py +++ b/voluseg/_update.py @@ -1,5 +1,8 @@ def voluseg_update(): - '''shortcut to update package''' + """shortcut to update package""" import os - os.system('pip install --upgrade --force-reinstall git+https://github.com/mikarubi/voluseg.git') + + os.system( + "pip install --upgrade --force-reinstall git+https://github.com/mikarubi/voluseg.git" + )