Skip to content

Commit

Permalink
Merge pull request #185 from gwaygenomics/update-doc-style
Browse files Browse the repository at this point in the history
Update doc style
  • Loading branch information
gwaybio authored Feb 4, 2022
2 parents 1279cd2 + 442219e commit 48af26a
Show file tree
Hide file tree
Showing 18 changed files with 352 additions and 352 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = "alabaster"
html_theme = "sphinx_rtd_theme"

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
Expand Down
72 changes: 34 additions & 38 deletions pycytominer/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,42 @@ def consensus(
):
"""Form level 5 consensus profile data.
:param profiles: A file or pandas DataFrame of profile data
:type profiles: str
:param replicate_columns: Metadata columns indicating which replicates to collapse, defaults to ["Metadata_Plate", "Metadata_Well"]
:type replicate_columns: list
:param operation: The method used to form consensus profiles, defaults to "median"
:type operation: str
:param features: The features to collapse, defaults to "infer"
:type features: str, list
:param output_file: If specified, the location to write the file, defaults to "none"
:type output_file: str
:param modz_args: Additional custom arguments passed as kwargs if operation="modz". See pycytominer.cyto_utils.modz for more details.
:type modz_args: dict
:param compression_options: the method to compress output data, defaults to None. See pycytominer.cyto_utils.output.py for options
:type compression_options: str
:param float_format: decimal precision to use in writing output file, defaults to None. For example, use "%.3g" for 3 decimal precision.
Parameters
----------
profiles : pandas.core.frame.DataFrame or file
DataFrame or file of profiles.
replicate_columns : list, defaults to ["Metadata_Plate", "Metadata_Well"]
Metadata columns indicating which replicates to collapse
operation : str, defaults to "median"
The method used to form consensus profiles.
features : list
A list of strings corresponding to feature measurement column names in the
`profiles` DataFrame. All features listed must be found in `profiles`.
Defaults to "infer". If "infer", then assume cell painting features are those
prefixed with "Cells", "Nuclei", or "Cytoplasm".
output_file : str, optional
If provided, will write consensus profiles to file. If not specified, will
return the normalized profiles as output.
compression_options : str or dict, optional
Contains compression options as input to
pd.DataFrame.to_csv(compression=compression_options). pandas version >= 1.2.
float_format : str, optional
Decimal precision to use in writing output file as input to
pd.DataFrame.to_csv(float_format=float_format). For example, use "%.3g" for 3
decimal precision.
modz_args : dict, optional
Additional custom arguments passed as kwargs if operation="modz".
See pycytominer.cyto_utils.modz for more details.
:Example:
Returns
-------
consensus_df : pandas.core.frame.DataFrame, optional
The consensus profile DataFrame. If output_file="none", then return the
DataFrame. If you specify output_file, then write to file and do not return
data.
Examples
--------
import pandas as pd
from pycytominer import consensus
Expand Down Expand Up @@ -107,25 +125,3 @@ def consensus(
)
else:
return consensus_df


data_df = pd.concat(
[
pd.DataFrame(
{
"Metadata_Plate": "X",
"Metadata_Well": "a",
"Cells_x": [0.1, 0.3, 0.8],
"Nuclei_y": [0.5, 0.3, 0.1],
}
),
pd.DataFrame(
{
"Metadata_Plate": "X",
"Metadata_Well": "b",
"Cells_x": [0.4, 0.2, -0.5],
"Nuclei_y": [-0.8, 1.2, -0.5],
}
),
]
).reset_index(drop=True)
32 changes: 28 additions & 4 deletions pycytominer/cyto_utils/annotate_custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,23 @@
def annotate_cmap(
annotated, annotate_join_on, cell_id="unknown", perturbation_mode="none"
):
"""
cell_id - [default: "unknown"] provide a string to annotate cell id column
"""Annotates data frame with custom options according to CMAP specifications
Parameters
----------
annotated : pandas.core.frame.DataFrame
DataFrame of profiles.
annotate_join_on : str
Typically the well metadata, but how to join external data
cell_id : str, default "unknown"
provide a string to annotate cell id column
perturbation_mode : str, default "none"
How to annotate CMAP specific data (options = ["chemical" , "genetic"])
Returns
-------
annotated
CMAP annotated data
"""
pert_opts = ["none", "chemical", "genetic"]
assert perturbation_mode in pert_opts, "perturbation mode must be one of {}".format(
Expand Down Expand Up @@ -91,8 +106,17 @@ def annotate_cmap(


def cp_clean(profiles):
"""
Specifically clean certain column names derived from different CellProfiler versions
"""Specifically clean certain column names derived from different CellProfiler versions
Parameters
----------
profiles : pandas.core.frame.DataFrame
DataFrame of profiles.
Returns
-------
profiles
Renamed to standard metadata
"""

profiles = profiles.rename(
Expand Down
13 changes: 0 additions & 13 deletions pycytominer/cyto_utils/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ def _check_subsampling(self):
-------
None
Nothing is returned.
"""

# Check that the user didn't specify both subset frac and subsample all
Expand All @@ -187,7 +186,6 @@ def set_output_file(self, output_file):
-------
None
Nothing is returned.
"""

self.output_file = output_file
Expand All @@ -204,7 +202,6 @@ def set_subsample_frac(self, subsample_frac):
-------
None
Nothing is returned.
"""

self.subsample_frac = subsample_frac
Expand All @@ -222,7 +219,6 @@ def set_subsample_n(self, subsample_n):
-------
None
Nothing is returned.
"""

try:
Expand All @@ -243,7 +239,6 @@ def set_subsample_random_state(self, random_state):
-------
None
Nothing is returned.
"""

self.subsampling_random_state = random_state
Expand All @@ -255,7 +250,6 @@ def load_image(self):
-------
None
Nothing is returned.
"""

image_query = "select * from image"
Expand Down Expand Up @@ -300,7 +294,6 @@ def count_cells(self, compartment="cells", count_subset=False):
-------
pandas.core.frame.DataFrame
DataFrame of cell counts in the experiment.
"""

check_compartments(compartment)
Expand Down Expand Up @@ -343,7 +336,6 @@ def subsample_profiles(self, df, rename_col=True):
-------
pandas.core.frame.DataFrame
A subsampled pandas dataframe of single cell profiles.
"""

if self.subsampling_random_state == "none":
Expand Down Expand Up @@ -384,7 +376,6 @@ def get_subsample(self, df=None, compartment="cells", rename_col=True):
-------
None
Nothing is returned.
"""

check_compartments(compartment)
Expand Down Expand Up @@ -418,7 +409,6 @@ def load_compartment(self, compartment):
-------
pandas.core.frame.DataFrame
Compartment dataframe.
"""
compartment_query = "select * from {}".format(compartment)
df = pd.read_sql(sql=compartment_query, con=self.conn)
Expand Down Expand Up @@ -456,7 +446,6 @@ def aggregate_compartment(
-------
pandas.core.frame.DataFrame
DataFrame of aggregated profiles.
"""

check_compartments(compartment)
Expand Down Expand Up @@ -642,7 +631,6 @@ def merge_single_cells(
-------
pandas.core.frame.DataFrame
Either a dataframe (if output_file="none") or will write to file.
"""

# Load the single cell dataframe by merging on the specific linking columns
Expand Down Expand Up @@ -783,7 +771,6 @@ def aggregate_profiles(
-------
pandas.core.frame.DataFrame
Either a dataframe (if output_file="none") or will write to file.
"""

if output_file != "none":
Expand Down
23 changes: 14 additions & 9 deletions pycytominer/cyto_utils/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ def infer_delim(file):
"""
Sniff the delimiter in the given file
Arguments
---------
Parameters
----------
file : str
File name
Expand All @@ -33,8 +33,8 @@ def load_profiles(profiles):
"""
Unless a dataframe is provided, load the given profile dataframe from path or string
Arguments
---------
Parameters
----------
profiles : {str, pandas.DataFrame}
file location or actual pandas dataframe of profiles
Expand All @@ -55,8 +55,8 @@ def load_platemap(platemap, add_metadata_id=True):
"""
Unless a dataframe is provided, load the given platemap dataframe from path or string
Arguments
---------
Parameters
----------
platemap : pandas dataframe
location or actual pandas dataframe of platemap file
Expand All @@ -65,7 +65,7 @@ def load_platemap(platemap, add_metadata_id=True):
Return
------
platemap : pandas DataFrame
platemap : pandas.core.frame.DataFrame
pandas DataFrame of profiles
"""
if not isinstance(platemap, pd.DataFrame):
Expand Down Expand Up @@ -93,12 +93,17 @@ def load_npz(npz_file, fallback_feature_prefix="DP"):
If the npz file does not exist, this function returns an empty dataframe.
Arguments
---------
Parameters
----------
npz_file : str
file path to the compressed output (typically DeepProfiler output)
fallback_feature_prefix :str
a string to prefix all features [default: "DP"].
Return
------
df : pandas.core.frame.DataFrame
pandas DataFrame of profiles
"""
try:
npz = np.load(npz_file, allow_pickle=True)
Expand Down
72 changes: 43 additions & 29 deletions pycytominer/cyto_utils/modz.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,29 @@


def modz_base(population_df, method="spearman", min_weight=0.01, precision=4):
"""
Perform a modified z score transformation. This code is modified from cmapPy.
"""Perform a modified z score transformation.
This code is modified from cmapPy.
(see https://github.com/cytomining/pycytominer/issues/52). Note that this will
apply the transformation to the FULL population_df.
See modz() for replicate level procedures.
Arguments:
population_df - pandas DataFrame that includes metadata and observation features.
rows are samples and columns are features
method - string indicating which correlation metric to use [default: "spearman"]
min_weight - the minimum correlation to clip all non-negative values lower to
precision - how many significant digits to round weights to
Return:
modz transformed dataframe - a consensus signature of the input population_df
weighted by replicate correlation
Parameters
----------
population_df : pandas.core.frame.DataFrame
DataFrame that includes metadata and observation features.
method : str, default "spearman"
indicating which correlation metric to use.
min_weight : float, default 0.01
the minimum correlation to clip all non-negative values lower to
precision : int, default 4
how many significant digits to round weights to
Returns
-------
modz_df : pandas.core.frame.DataFrame
modz transformed dataframe - a consensus signature of the input data
weighted by replicate correlation
"""
assert population_df.shape[0] > 0, "population_df must include at least one sample"

Expand Down Expand Up @@ -73,23 +80,30 @@ def modz(
min_weight=0.01,
precision=4,
):
"""
Collapse replicates into a consensus signature using a weighted transformation
Arguments:
population_df - pandas DataFrame that includes metadata and observation features.
rows are samples and columns are features
replicate_columns - a string or list of column(s) in the population dataframe that
indicate replicate level information
features - a list of features present in the population dataframe [default: "infer"]
if "infer", then assume cell painting features are those that start with
"Cells_", "Nuclei_", or "Cytoplasm_"
method - string indicating which correlation metric to use [default: "spearman"]
min_weight - the minimum correlation to clip all non-negative values lower to
precision - how many significant digits to round weights to
Return:
Consensus signatures for all replicates in the given DataFrame
"""Collapse replicates into a consensus signature using a weighted transformation
Parameters
----------
population_df : pandas.core.frame.DataFrame
DataFrame that includes metadata and observation features.
replicate_columns : str, list
a string or list of column(s) in the population dataframe that
indicate replicate level information
features : list, default "infer"
List of features present in the population dataframe [default: "infer"]
if "infer", then assume cell painting features are those that start with
"Cells_", "Nuclei_", or "Cytoplasm_".
method : str, default "spearman"
indicating which correlation metric to use.
min_weight : float, default 0.01
the minimum correlation to clip all non-negative values lower to
precision : int, default 4
how many significant digits to round weights to
Returns
-------
modz_df : pandas.core.frame.DataFrame
Consensus signatures with metadata for all replicates in the given DataFrame
"""
population_features = population_df.columns.tolist()
assert_error = "{} not in input dataframe".format(replicate_columns)
Expand Down
Loading

0 comments on commit 48af26a

Please sign in to comment.