From 1ab442dd6f8084b322638bed434cbb4adf17bcaa Mon Sep 17 00:00:00 2001 From: mdbarnesUCSD Date: Mon, 20 Nov 2023 15:35:37 -0800 Subject: [PATCH] Update to use the latest SigProfilerAssignment with COSMIC v3.4 signatures. --- .gitignore | 5 +-- README.md | 2 +- .../data/TextInput/Samples_SV.txt | 33 +++++++++++++++++++ SigProfilerExtractor/sigpro.py | 4 ++- SigProfilerExtractor/subroutines.py | 4 +-- setup.py | 6 ++-- test.py | 7 ++++ 7 files changed, 50 insertions(+), 11 deletions(-) create mode 100644 SigProfilerExtractor/data/TextInput/Samples_SV.txt diff --git a/.gitignore b/.gitignore index 666ff32..8ae1e3d 100644 --- a/.gitignore +++ b/.gitignore @@ -136,10 +136,7 @@ test_matobj_output/ test_segCNV_output/ test_text_output/ test_vcf_output/ -test_matrix_48_output/ -test_matrix_78_output/ -test_matrix_83_output/ -test_matrix_96_output/ +test_matrix_*_output/ SigProfilerExtractor/data/VCFInput/logs/ SigProfilerExtractor/data/VCFInput/input/ SigProfilerExtractor/data/VCFInput/output/ \ No newline at end of file diff --git a/README.md b/README.md index d20eefe..c6cda00 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37", | | **combined_stability** | Float | Default is 1.0. The cutoff thresh-hold of the combined stability (sum of average and minimum stability). Solutions with combined stabilities below this thresh-hold will not be considered. | | | **allow_stability_drop** | Boolean | Default is False. Defines if solutions with a drop in stability with respect to the highest stable number of signatures will be considered. | | **Decomposition** | | | | -| | **cosmic_version** | Float | Takes a positive float among 1, 2, 3, 3.1, 3.2 and 3.3. Default is 3.3. Defines the version of the COSMIC reference signatures. | +| | **cosmic_version** | Float | Takes a positive float among 1, 2, 3, 3.1, 3.2, 3.3, and 3.4. Default is 3.4. Defines the version of the COSMIC reference signatures. | | | **make_decomposition_plots** | Boolean | Defualt is True. If True, Denovo to Cosmic sigantures decompostion plots will be created as a part the results. | | | **collapse_to_SBS96** | Boolean | Defualt is True. If True, SBS288 and SBS1536 Denovo signatures will be mapped to SBS96 reference signatures. If False, those will be mapped to reference signatures of the same context. | **Others** | | | | diff --git a/SigProfilerExtractor/data/TextInput/Samples_SV.txt b/SigProfilerExtractor/data/TextInput/Samples_SV.txt new file mode 100644 index 0000000..fe7b83e --- /dev/null +++ b/SigProfilerExtractor/data/TextInput/Samples_SV.txt @@ -0,0 +1,33 @@ +Mutation Types PD8969a PD18031a PD4103a PD14472a PD9572a +clustered_del_1-10Kb 0 0 5 0 0 +clustered_del_10-100Kb 0 1 4 0 0 +clustered_del_100Kb-1Mb 0 2 9 0 0 +clustered_del_1Mb-10Mb 0 2 16 0 1 +clustered_del_>10Mb 0 0 13 0 0 +clustered_tds_1-10Kb 0 0 1 0 0 +clustered_tds_10-100Kb 0 0 3 0 1 +clustered_tds_100Kb-1Mb 0 4 5 0 0 +clustered_tds_1Mb-10Mb 0 1 11 0 0 +clustered_tds_>10Mb 0 2 8 0 0 +clustered_inv_1-10Kb 0 7 7 0 0 +clustered_inv_10-100Kb 0 7 7 0 0 +clustered_inv_100Kb-1Mb 0 7 14 0 3 +clustered_inv_1Mb-10Mb 0 3 33 0 0 +clustered_inv_>10Mb 0 3 19 0 1 +clustered_trans 0 11 186 0 1 +non-clustered_del_1-10Kb 14 2 14 0 1 +non-clustered_del_10-100Kb 10 8 11 0 0 +non-clustered_del_100Kb-1Mb 4 2 23 0 0 +non-clustered_del_1Mb-10Mb 2 1 18 1 0 +non-clustered_del_>10Mb 3 0 11 1 0 +non-clustered_tds_1-10Kb 1 0 1 0 0 +non-clustered_tds_10-100Kb 2 3 1 0 0 +non-clustered_tds_100Kb-1Mb 4 1 7 0 1 +non-clustered_tds_1Mb-10Mb 2 2 11 0 0 +non-clustered_tds_>10Mb 4 0 10 1 0 +non-clustered_inv_1-10Kb 5 6 13 0 1 +non-clustered_inv_10-100Kb 2 3 5 0 0 +non-clustered_inv_100Kb-1Mb 4 1 18 0 1 +non-clustered_inv_1Mb-10Mb 7 2 19 0 0 +non-clustered_inv_>10Mb 11 1 27 1 0 +non-clustered_trans 51 11 83 3 6 diff --git a/SigProfilerExtractor/sigpro.py b/SigProfilerExtractor/sigpro.py index 4e2e0b6..71fac19 100644 --- a/SigProfilerExtractor/sigpro.py +++ b/SigProfilerExtractor/sigpro.py @@ -101,6 +101,8 @@ def importdata(datatype="matrix"): data = paths+"/data/CSVInput/csv_example.csv" elif datatype=="seg:BATTENBERG": data = paths+"/data/CNVInput/Battenberg_test.tsv" + elif datatype=="matrix_SV": + data = paths+"/data/TextInput/Samples_SV.txt" elif datatype=="vcf": data = paths+"/data/VCFInput/" return data @@ -165,7 +167,7 @@ def sigProfilerExtractor(input_type, input_data, reference_genome="GRCh37", opportunity_genome = "GRCh37", - cosmic_version=3.3, + cosmic_version=3.4, context_type = "default", exome = False, minimum_signatures=1, diff --git a/SigProfilerExtractor/subroutines.py b/SigProfilerExtractor/subroutines.py index 38a8d57..a8d29e9 100644 --- a/SigProfilerExtractor/subroutines.py +++ b/SigProfilerExtractor/subroutines.py @@ -1099,9 +1099,9 @@ def export_information(loopResults, mutation_type, output, index, colnames, sequ elif m=="ID83": plot.plotID(signature_subdirectory+"/"+mutation_type+"_S"+str(i)+"_Signatures"+".txt", signature_subdirectory+"/Signature_plot/" , "S"+str(i), "83", True, custom_text_upper=stability_list, custom_text_middle=total_mutation_list) elif m=="CNV48": - plot.plotCNV(signature_subdirectory+"/"+mutation_type+"_S"+str(i)+"_Signatures"+".txt", signature_subdirectory+"/Signature_plot/" , "S"+str(i), "pdf", percentage=True, aggregate=False) + plot.plotCNV(signature_subdirectory+"/"+mutation_type+"_S"+str(i)+"_Signatures"+".txt", signature_subdirectory+"/Signature_plot/" , "S"+str(i), percentage=True, aggregate=False) elif m=="SV32": - plot.plotSV(signature_subdirectory+"/"+mutation_type+"_S"+str(i)+"_Signatures"+".txt", signature_subdirectory+"/Signature_plot/" , "S"+str(i), "pdf", percentage=True, aggregate=False) + plot.plotSV(signature_subdirectory+"/"+mutation_type+"_S"+str(i)+"_Signatures"+".txt", signature_subdirectory+"/Signature_plot/" , "S"+str(i), percentage=True, aggregate=False) elif m=="SBS96" or m=="SBS288" or m=="SBS384" or m=="SBS1536" or m=="SBS4608": # parse 'm' to be accepted by the plotSBS function tmp_m = m diff --git a/setup.py b/setup.py index a817fd4..b78f188 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ if os.path.exists("dist"): shutil.rmtree("dist") -VERSION = '1.1.22' +VERSION = '1.1.23' with open('README.md') as f: @@ -20,7 +20,7 @@ def write_version_py(filename='SigProfilerExtractor/version.py'): # THIS FILE IS GENERATED FROM SIGPROFILEREXTRACTOR SETUP.PY short_version = '%(version)s' version = '%(version)s' -Update = 'Upgrade v1.1.22: Resolve issue with 4608 plotting as custom' +Update = 'Upgrade v1.1.23: Update to use the latest SigProfilerAssignment with COSMIC v3.4 signatures.' """ fh = open(filename, 'w') @@ -34,7 +34,7 @@ def write_version_py(filename='SigProfilerExtractor/version.py'): 'nimfa>=1.1.0', 'SigProfilerMatrixGenerator>=1.2.17', 'sigProfilerPlotting>=1.3.16', - 'SigProfilerAssignment>=0.0.32', + 'SigProfilerAssignment>=0.1.0', 'pillow', 'statsmodels>=0.9.0', 'scikit-learn>=0.24.2', diff --git a/test.py b/test.py index fa302c3..2fbb941 100755 --- a/test.py +++ b/test.py @@ -45,6 +45,12 @@ def run_seg_48(): minimum_signatures=3, maximum_signatures=3, nmf_replicates=5, min_nmf_iterations=100, max_nmf_iterations=1000, nmf_test_conv=100) +def run_matrix_32(): + data = sig.importdata("matrix_SV") + sig.sigProfilerExtractor("matrix", "test_matrix_32_output", data, + minimum_signatures=3, maximum_signatures=3, nmf_replicates=5, + min_nmf_iterations=100, max_nmf_iterations=1000, nmf_test_conv=100) + def run_matobj(): data = sig.importdata("matobj") sig.sigProfilerExtractor("matobj", "test_matobj_output", data, @@ -63,6 +69,7 @@ def run_csv(): run_matrix_78() run_matrix_83() run_matrix_48() + run_matrix_32() run_seg_48() run_vcf() # run_matobj()