From 548b5b340eaf6658c786c9aa7a69821ce8e0cd32 Mon Sep 17 00:00:00 2001 From: Jean-Francois Pombert Date: Fri, 3 May 2024 08:32:42 -0400 Subject: [PATCH] + heatmaps hauto, hmin and hmax options --- Plots/paf_to_heatmap.py | 71 ++++++++++++++------ Plots/protein_cluster_hm.py | 67 ++++++++++++++----- README.md | 5 +- run_syny.pl | 128 +++++++++++++++++++++--------------- 4 files changed, 178 insertions(+), 93 deletions(-) diff --git a/Plots/paf_to_heatmap.py b/Plots/paf_to_heatmap.py index d55b0da..22e3e46 100755 --- a/Plots/paf_to_heatmap.py +++ b/Plots/paf_to_heatmap.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## Pombert lab, 2024 -version = '0.1b' -updated = '2024-05-01' +version = '0.2' +updated = '2024-05-03' name = 'paf_to_heatmap.py' import sys @@ -35,11 +35,14 @@ -o (--outdir) Output directory [Default: ./] -h (--height) Figure height in inches [Default: 10] -w (--width) Figure width in inches [Default: 10] +-x (--matrix) Matrix output file [Default: matrix.tsv] -c (--palette) Seaborn color palette [Default: winter_r] # See https://www.practicalpythonfordatascience.com/ap_seaborn_palette # for a list of color palettes --fontsize Font size [Default: 8] --x (--matrix) Matrix output file [Default: matrix.tsv] +--vmax Set maximum color bar value [Default: 100] +--vmin Set minimum color bar value [Default: 0] +--vauto Set color bar values automatically instead """ # Print custom message if argv is empty @@ -60,6 +63,9 @@ cmd.add_argument("-c", "--palette", default='winter_r') cmd.add_argument("-x", "--matrix", default='matrix.tsv') cmd.add_argument("--fontsize", default=8) +cmd.add_argument("--vmin", default=0) +cmd.add_argument("--vmax", default=100) +cmd.add_argument("--vauto", action='store_true') args = cmd.parse_args() paf_files = args.paf @@ -67,9 +73,12 @@ outdir = args.outdir height = args.height width = args.width +matrix_file = args.matrix color_palette = args.palette fontsize = int(args.fontsize) -matrix_file = args.matrix +vmin = int(args.vmin) +vmax = int(args.vmax) +vauto = args.vauto ################################################################################ ## Working on output directory @@ -208,14 +217,25 @@ heatmap_svg = svgdir + '/' + 'colinear_bases.mmap.heatmap.svg' ## Clustered heatmaps - cm = sns.clustermap( - data[0:], - cmap=color_palette, - annot=True, - fmt='.1f', - vmin=0, - vmax=100 - ) + cm = None + + if vauto: + cm = sns.clustermap( + data[0:], + cmap=color_palette, + annot=True, + fmt='.1f' + ) + + else: + cm = sns.clustermap( + data[0:], + cmap=color_palette, + annot=True, + fmt='.1f', + vmin=vmin, + vmax=vmax + ) cm.fig.suptitle(f"% of total bases in pairwise alignments", x=0.5, y=0.95) print(f"1 / 4 - Plotting {clustered_png}") @@ -227,14 +247,25 @@ plt.close('all') ## Normal heatmaps - hm = sns.heatmap( - data[0:], - cmap=color_palette, - annot=True, - fmt='.1f', - vmin=0, - vmax=100 - ) + hm = None + + if vauto: + hm = sns.heatmap( + data[0:], + cmap=color_palette, + annot=True, + fmt='.1f' + ) + + else: + hm = sns.heatmap( + data[0:], + cmap=color_palette, + annot=True, + fmt='.1f', + vmin=vmin, + vmax=vmax + ) hm.figure.suptitle(f"% of colinear bases in pairwise alignments", x=0.5, y=0.95) print(f"3 / 4 - Plotting {heatmap_png}") diff --git a/Plots/protein_cluster_hm.py b/Plots/protein_cluster_hm.py index 268fba6..3de2fc7 100755 --- a/Plots/protein_cluster_hm.py +++ b/Plots/protein_cluster_hm.py @@ -2,8 +2,8 @@ ## Pombert lab, 2024 name = 'protein_cluster_hm.py' -version = '0.3c' -updated = '2024-05-01' +version = '0.3d' +updated = '2024-05-03' import sys import os @@ -37,6 +37,9 @@ -h (--height) Figure height in inches [Default: 10] -w (--width) Figure width in inches [Default: 10] --fontsize Font size [Default: 8] +--vmax Set maximum color bar value [Default: 100] +--vmin Set minimum color bar value [Default: 0] +--vauto Set color bar values automatically instead --threads Number of threads to use [Default: 16] """ @@ -56,6 +59,9 @@ cmd.add_argument("-w", "--width", default=10) cmd.add_argument("-p", "--palette", default='winter_r') cmd.add_argument("--fontsize", default=8) +cmd.add_argument("--vmin", default=0) +cmd.add_argument("--vmax", default=100) +cmd.add_argument("--vauto", action='store_true') cmd.add_argument("--threads", default=16) args = cmd.parse_args() @@ -65,6 +71,9 @@ width = args.width color_palette = args.palette fontsize = int(args.fontsize) +vmin = int(args.vmin) +vmax = int(args.vmax) +vauto = args.vauto threads = int(args.threads) ################################################################################ @@ -111,14 +120,25 @@ def heatmap(tsv_file): heatmap_svg = svgdir + '/' + 'proteins_in_clusters.gap_' + gap + '.heatmap.svg' ## Clustered heatmaps - cm = sns.clustermap( - data[0:], - cmap=color_palette, - annot=True, - fmt='.1f', - vmin=0, - vmax=100, - ) + cm = None + + if vauto: + cm = sns.clustermap( + data[0:], + cmap=color_palette, + annot=True, + fmt='.1f' + ) + + else: + cm = sns.clustermap( + data[0:], + cmap=color_palette, + annot=True, + fmt='.1f', + vmin=vmin, + vmax=vmax + ) cm.fig.suptitle(f"% of protein-coding genes in clusters (gap = {gap})", x=0.5, y=0.95) with counter.get_lock(): @@ -133,14 +153,25 @@ def heatmap(tsv_file): plt.close('all') ## Normal heatmaps - hm = sns.heatmap( - data[0:], - cmap=color_palette, - annot=True, - fmt='.1f', - vmin=0, - vmax=100 - ) + hm = None + + if vauto: + hm = sns.heatmap( + data[0:], + cmap=color_palette, + annot=True, + fmt='.1f' + ) + + else: + hm = sns.heatmap( + data[0:], + cmap=color_palette, + annot=True, + fmt='.1f', + vmin=vmin, + vmax=vmax + ) hm.figure.suptitle(f"% of proteins found in clusters (gap = {gap})", x=0.5, y=0.95) with counter.get_lock(): diff --git a/README.md b/README.md index 7fb2754..c7932c3 100644 --- a/README.md +++ b/README.md @@ -286,6 +286,9 @@ Options for run_SYNY.pl are: -hw (--hwidth) Heatmap figure width in inches [Default: 10] --hfsize Heatmap font size [Default: 8] --hmpalette Heatmap color palette [Default: winter_r] +--hmax Set maximum color bar value [Default: 100] +--hmin Set minimum color bar value [Default: 0] +--hauto Set color bar values automatically instead --no_heatmap Turn off heatmaps ``` The output directory will be structured as follows: @@ -535,7 +538,7 @@ In the above example, small heatmaps with 25 datapoints (5 * 5 genomes) will be

-Heatmap dimensions (default: 10 x 10) can be modified with the `--hheight` and `--hwidth` command line switches. The color palette (default: winter_r) can be modified with the `--hmpalette` command line switch (see this [URL](https://github.com/PombertLab/SYNY/blob/main/Images/python_color_palettes.png) for a list of color palettes). Color palettes available on the operating system can be listed and/or plotted with [check_mp_colors.py](https://github.com/PombertLab/SYNY/blob/main/Utils/check_mp_colors.py). +Heatmap dimensions (default: 10 x 10) can be modified with the `--hheight` and `--hwidth` command line switches. Color bar min/max values (defaults: 0/100) can be changed with the `--hmin` and `--hmax` command line switches or calculated automatically with `--hauto`. The heatmap color palette (default: winter_r) can be changed with the `--hmpalette` command line switch (see this [URL](https://github.com/PombertLab/SYNY/blob/main/Images/python_color_palettes.png) for a list of color palettes). Color palettes available on the operating system can be listed and/or plotted with [check_mp_colors.py](https://github.com/PombertLab/SYNY/blob/main/Utils/check_mp_colors.py). #### Circos plots diff --git a/run_syny.pl b/run_syny.pl index d3b95d7..4dd82d5 100755 --- a/run_syny.pl +++ b/run_syny.pl @@ -2,7 +2,7 @@ # Pombert lab, 2022 my $name = 'run_syny.pl'; -my $version = '0.6.5c'; +my $version = '0.6.6'; my $updated = '2024-05-02'; use strict; @@ -39,69 +39,72 @@ --circos pair OPTIONS: --h (--help) Display all command line options --t (--threads) Number of threads to use [Default: 16] --p (--pthreads) Number of graphs to plot in parralel; defaults to --threads if unspecified --a (--annot) GenBank GBF/GBFF Annotation files (GZIP files are supported) --o (--outdir) Output directory [Default = SYNY] --e (--evalue) DIAMOND BLASTP evalue cutoff [Default = 1e-10] --g (--gaps) Allowable number of gaps between gene pairs [Default = 0] ---asm Specify minimap2 max divergence preset (--asm 5, 10 or 20) [Default: off] ---resume Resume minimap2 computations (skip completed alignments) ---no_map Skip minimap2 pairwise genome alignments ---no_clus Skip gene cluster reconstructions +-h (--help) Display all command line options +-t (--threads) Number of threads to use [Default: 16] +-p (--pthreads) Number of graphs to plot in parralel; defaults to --threads if unspecified +-a (--annot) GenBank GBF/GBFF Annotation files (GZIP files are supported) +-o (--outdir) Output directory [Default = SYNY] +-e (--evalue) DIAMOND BLASTP evalue cutoff [Default = 1e-10] +-g (--gaps) Allowable number of gaps between gene pairs [Default = 0] +--asm Specify minimap2 max divergence preset (--asm 5, 10 or 20) [Default: off] +--resume Resume minimap2 computations (skip completed alignments) +--no_map Skip minimap2 pairwise genome alignments +--no_clus Skip gene cluster reconstructions EXIT my $plot_options = <<"PLOT_OPTIONS"; ### Circos plots --c (--circos) Circos plot mode: pair (pairwise), cat (concatenated), all (cat + pair) [Default: pair] ---orientation Karyotype orientation: normal, inverted or both [Default: normal] ---circos_prefix Prefix for concatenated plots [Default: circos] --r (--ref) Reference to use for concatenated plots; uses first genome (alphabetically) if ommitted --u (--unit) Size unit (Kb or Mb) [Default: Mb] ---winsize Sliding windows size (nucleotide biases) [Default: 10000] ---stepsize Sliding windows step (nucleotide biases) [Default: 5000] ---labels Contig label type: mixed (arabic + roman numbers), arabic, roman, or names [Default: mixed] ---label_size Contig label size [Default: 36] ---label_font Contig label font [Default: bold] ---custom_file Load custom colors from file ---list_preset List available custom color presets ---custom_preset Use a custom color preset, e.g.: --custom_preset chloropicon ---max_ticks Set max number of ticks [Default: 5000] ---max_ideograms Set max number of ideograms [Default: 200] ---max_links Set max number of links [Default: 75000] ---max_points_per_track Set max number of points per track [Default: 75000] ---clusters Color by cluster instead of contig/chromosome [Default: off] ---no_ntbiases Turn off nucleotide biases subplots ---no_cticks Turn off ticks in Circos plots ---no_circos Turn off Circos plots +-c (--circos) Circos plot mode: pair (pairwise), cat (concatenated), all (cat + pair) [Default: pair] +--orientation Karyotype orientation: normal, inverted or both [Default: normal] +--circos_prefix Prefix for concatenated plots [Default: circos] +-r (--ref) Reference to use for concatenated plots; uses first genome (alphabetically) if ommitted +-u (--unit) Size unit (Kb or Mb) [Default: Mb] +--winsize Sliding windows size (nucleotide biases) [Default: 10000] +--stepsize Sliding windows step (nucleotide biases) [Default: 5000] +--labels Contig label type: mixed (arabic + roman numbers), arabic, roman, or names [Default: mixed] +--label_size Contig label size [Default: 36] +--label_font Contig label font [Default: bold] +--custom_file Load custom colors from file +--list_preset List available custom color presets +--custom_preset Use a custom color preset, e.g.: --custom_preset chloropicon +--max_ticks Set max number of ticks [Default: 5000] +--max_ideograms Set max number of ideograms [Default: 200] +--max_links Set max number of links [Default: 75000] +--max_points_per_track Set max number of points per track [Default: 75000] +--clusters Color by cluster instead of contig/chromosome [Default: off] +--no_ntbiases Turn off nucleotide biases subplots +--no_cticks Turn off ticks in Circos plots +--no_circos Turn off Circos plots ### Barplots --bh (--bheight) Barplot figure height in inches [Default: 10.8] --bw (--bwidth) Barplot figure width in inches [Default: 19.2] ---bfsize Barplot font size [Default: 8] ---palette Barplot color palette [Default: Spectral] ---monobar Use a monochrome barplot color instead: e.g. --monobar blue ---no_barplot Turn off barplots +-bh (--bheight) Barplot figure height in inches [Default: 10.8] +-bw (--bwidth) Barplot figure width in inches [Default: 19.2] +--bfsize Barplot font size [Default: 8] +--palette Barplot color palette [Default: Spectral] +--monobar Use a monochrome barplot color instead: e.g. --monobar blue +--no_barplot Turn off barplots ### Dotplots --dh (--dheight) Dotplot figure height in inches [Default: 10.8] --dw (--dwidth) Dotplot figure width in inches [Default: 19.2] ---dfsize Dotplot font size [Default: 8] --m (--multi) Axes units multiplier (for dotplots) [Default: 1e5] ---color Dotplot color [Default: blue] ---dotpalette Use a color palette instead: e.g. --dotpalette inferno ---noticks Turn off ticks on x and y axes ---wdis Horizontal distance (width) between subplots [Default: 0.05] ---hdis Vertical distance (height) between subplots [Default: 0.1] ---no_dotplot Turn off dotplots +-dh (--dheight) Dotplot figure height in inches [Default: 10.8] +-dw (--dwidth) Dotplot figure width in inches [Default: 19.2] +--dfsize Dotplot font size [Default: 8] +-m (--multi) Axes units multiplier (for dotplots) [Default: 1e5] +--color Dotplot color [Default: blue] +--dotpalette Use a color palette instead: e.g. --dotpalette inferno +--noticks Turn off ticks on x and y axes +--wdis Horizontal distance (width) between subplots [Default: 0.05] +--hdis Vertical distance (height) between subplots [Default: 0.1] +--no_dotplot Turn off dotplots ### Heatmaps --hh (--hheight) Heatmap figure height in inches [Default: 10] --hw (--hwidth) Heatmap figure width in inches [Default: 10] ---hfsize Heatmap font size [Default: 8] ---hmpalette Heatmap color palette [Default: winter_r] ---no_heatmap Turn off heatmaps +-hh (--hheight) Heatmap figure height in inches [Default: 10] +-hw (--hwidth) Heatmap figure width in inches [Default: 10] +--hfsize Heatmap font size [Default: 8] +--hmpalette Heatmap color palette [Default: winter_r] +--hmax Set maximum color bar value [Default: 100] +--hmin Set minimum color bar value [Default: 0] +--hauto Set color bar values automatically instead +--no_heatmap Turn off heatmaps PLOT_OPTIONS die ("\n$usage\n") unless (@ARGV); @@ -170,6 +173,9 @@ my $hwidth = 10; my $hfsize = 8; my $hmpalette = 'winter_r'; +my $hmax = 100; +my $hmin = 0; +my $hauto; my $no_heatmap; GetOptions( @@ -231,6 +237,9 @@ 'hw|hwidth=s' => \$hwidth, 'hfsize=i' => \$hfsize, 'hmpalette=s' => \$hmpalette, + 'hmax=i' => \$hmax, + 'hmin=i' => \$hmin, + 'hauto' => \$hauto, 'no_heatmap' => \$no_heatmap, ); @@ -485,6 +494,11 @@ $resume_flag = '--resume'; } +my $hm_vauto_flag = ''; +if ($hauto){ + $hm_vauto_flag = '--vauto'; +} + ################################################################################################### ## Get PAF files with minimap2 ################################################################################################### @@ -632,6 +646,9 @@ --palette $hmpalette \\ --matrix $minimap2_dir/paf_matrix.tsv \\ --fontsize $hfsize \\ + --vmax $hmax \\ + --vmin $hmin \\ + $hm_vauto_flag \\ 2>> $log_err ") == 0 or checksig(); @@ -1015,6 +1032,9 @@ --width $hwidth \\ --palette $hmpalette \\ --fontsize $hfsize \\ + --vmax $hmax \\ + --vmin $hmin \\ + $hm_vauto_flag \\ 2>> $log_err ") == 0 or checksig();