Skip to content

Commit

Permalink
code changes to (1) move key field to end of line because it was mess…
Browse files Browse the repository at this point in the history
…ing up the filter step (2) change identified reference data to match code changes and (3) change filter reference data to add additional key that tagged along from identify step
  • Loading branch information
sowmyaiyer committed Apr 10, 2018
1 parent d98432e commit 12609a0
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 15 deletions.
6 changes: 3 additions & 3 deletions guideseq/identifyOfftargetSites.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,15 +329,15 @@ def analyze(sam_filename, reference_genome, outfile, annotations, search_radius,

with open(outfile, 'w') as f, open(outfile_unmerged, 'w') as f_unmerged:
# Write header
print('Window.key','Chromosome', 'Min.Position', 'Max.Position', 'Name', 'Filename', 'Position', 'WindowSequence', # 0:6
print('Chromosome', 'Min.Position', 'Max.Position', 'Name', 'Filename', 'Position', 'WindowSequence', # 0:6
'+.mi', '-.mi', 'bi.sum.mi', 'bi.geometric_mean.mi', '+.total', '-.total', 'total.sum', 'total.geometric_mean', # 7:14
'primer1.mi', 'primer2.mi', 'primer.geometric_mean', 'position.stdev', # 15:18
'Site_SubstitutionsOnly.Sequence', 'Site_SubstitutionsOnly.NumSubstitutions', # 19:20
'Site_SubstitutionsOnly.Strand', 'Site_SubstitutionsOnly.Start', 'Site_SubstitutionsOnly.End', # 21:23
'Site_GapsAllowed.Sequence', 'Site_GapsAllowed.Length', 'Site_GapsAllowed.Score', # 24:26
'Site_GapsAllowed.Substitutions', 'Site_GapsAllowed.Insertions', 'Site_GapsAllowed.Deletions', # 27:29
'Site_GapsAllowed.Strand', 'Site_GapsAllowed.Start', 'Site_GapsAllowed.End', # 30:32
'Cell', 'Targetsite', 'TargetSequence', 'RealignedTargetSequence', sep='\t', file=f) # 33:36
'Cell', 'Targetsite', 'TargetSequence', 'RealignedTargetSequence', 'Window.key',sep='\t', file=f) # 33:36

print('Window.key','Chromosome', 'Min.Position', 'Max.Position', 'Name', 'Filename', 'Position', 'WindowSequence', # 0:6
'+.mi', '-.mi', 'bi.sum.mi', 'bi.geometric_mean.mi', '+.total', '-.total', 'total.sum', 'total.geometric_mean', # 7:14
Expand Down Expand Up @@ -418,7 +418,7 @@ def analyze(sam_filename, reference_genome, outfile, annotations, search_radius,
print(*output_row_with_key, sep='\t', file=f_unmerged)

for key in sorted(output_dict.keys()):
output_dict[key].insert(0, key)
output_dict[key].append(key)
print(*output_dict[key], sep='\t', file=f)

def assignPrimerstoReads(read_sequence, sam_flag):
Expand Down
2 changes: 1 addition & 1 deletion test/data/filtered/EMX1_backgroundFiltered.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1:236259170-236261754 1473 1486 chr1:236259170-236261754_1486_7 EMX1.sam 1486 ATGGAGCAGGCGACCAGGGGTGACTCAGAATGGAGCAGGTGACCAGGGGT 7 0 7 0.0 33 0 33 0.0 2 5 3.16227766017 7.116178749862878 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none
1:236259170-236261754 1473 1486 chr1:236259170-236261754_1486_7 EMX1.sam 1486 ATGGAGCAGGCGACCAGGGGTGACTCAGAATGGAGCAGGTGACCAGGGGT 7 0 7 0.0 33 0 33 0.0 2 5 3.16227766017 7.116178749862878 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none chr1:236259170-236261754_1465_1486
14 changes: 7 additions & 7 deletions test/data/identified/EMX1_identifiedOfftargets.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Window.key Chromosome Min.Position Max.Position Name Filename Position WindowSequence +.mi -.mi bi.sum.mi bi.geometric_mean.mi +.total -.total total.sum total.geometric_mean primer1.mi primer2.mi primer.geometric_mean position.stdev Site_SubstitutionsOnly.Sequence Site_SubstitutionsOnly.NumSubstitutions Site_SubstitutionsOnly.Strand Site_SubstitutionsOnly.Start Site_SubstitutionsOnly.End Site_GapsAllowed.Sequence Site_GapsAllowed.Length Site_GapsAllowed.Score Site_GapsAllowed.Substitutions Site_GapsAllowed.Insertions Site_GapsAllowed.Deletions Site_GapsAllowed.Strand Site_GapsAllowed.Start Site_GapsAllowed.End Cell Targetsite TargetSequence RealignedTargetSequence
chr15:44108746-44110769_1000_1023 15:44108746-44110769 1007 1025 chr15:44108746-44110769_1017_189 EMX1.sam 1017 GTAGACAAGAGTCTAAGCAGAAGAAGAAGAGAGCCACTACCCAACCATCT 116 73 189 92.0217365626 258 148 406 195.407267009 96 80 87.6356092008 4.931631338038255 GAGTCTAAGCAGAAGAAGAAGAG 3 + 1000 1023 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none
chr1:236259170-236261754_1465_1486 1:236259170-236261754 1465 1486 chr1:236259170-236261754_1486_7 EMX1.sam 1486 ATGGAGCAGGCGACCAGGGGTGACTCAGAATGGAGCAGGTGACCAGGGGT 7 0 7 0.0 33 0 33 0.0 2 5 3.16227766017 7.116178749862878 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none
chr1:236259170-236261754_1531_1539 1:236259170-236261754 1531 1539 chr1:236259170-236261754_1531_5 EMX1.sam 1531 GGGGTGACTCAGAATGGAGCAGGTGACCAGGGGAATAGACGTTAACTACT 0 5 5 0.0 0 5 5 0.0 1 2 1.41421356237 2.947456530637899 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none
chr2:73159981-73162004_1000_1023 2:73159981-73162004 1008 1024 chr2:73159981-73162004_1017_489 EMX1.sam 1017 AAGGGCCTGAGTCCGAGCAGAAGAAGAAGGGCTCCCATCACATCAACCGG 243 246 489 244.49539873 619 541 1160 578.68730762 236 231 233.486616319 4.710360920354193 GAGTCCGAGCAGAAGAAGAAGGG 0 + 1000 1023 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none
chr3:197899267-197901348_1075_1081 3:197899267-197901348 1075 1081 chr3:197899267-197901348_1080_10 EMX1.sam 1080 TTAGGGTTAGGGTTAGGGTTAGGGTTCGGGTTTAGGGTTCAGGTTTATGG 0 10 10 0.0 0 32 32 0.0 9 1 3.0 2.5495097567963922 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none
chr6:9117792-9119815_1000_1023 6:9117792-9119815 1007 1007 chr6:9117792-9119815_1007_4 EMX1.sam 1007 ATGTCCTCAGAGTTCTGTCCATTCTTCTTCTGCTCAGACGTTTTGTCTGA 1 3 4 1.73205080757 1 9 10 3.0 2 2 2.0 0.0 ACGTCTGAGCAGAAGAAGAATGG 3 - 1000 1023 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none
Chromosome Min.Position Max.Position Name Filename Position WindowSequence +.mi -.mi bi.sum.mi bi.geometric_mean.mi +.total -.total total.sum total.geometric_mean primer1.mi primer2.mi primer.geometric_mean position.stdev Site_SubstitutionsOnly.Sequence Site_SubstitutionsOnly.NumSubstitutions Site_SubstitutionsOnly.Strand Site_SubstitutionsOnly.Start Site_SubstitutionsOnly.End Site_GapsAllowed.Sequence Site_GapsAllowed.Length Site_GapsAllowed.Score Site_GapsAllowed.Substitutions Site_GapsAllowed.Insertions Site_GapsAllowed.Deletions Site_GapsAllowed.Strand Site_GapsAllowed.Start Site_GapsAllowed.End Cell Targetsite TargetSequence RealignedTargetSequence Window.key
15:44108746-44110769 1007 1025 chr15:44108746-44110769_1017_189 EMX1.sam 1017 GTAGACAAGAGTCTAAGCAGAAGAAGAAGAGAGCCACTACCCAACCATCT 116 73 189 92.0217365626 258 148 406 195.407267009 96 80 87.6356092008 4.931631338038255 GAGTCTAAGCAGAAGAAGAAGAG 3 + 1000 1023 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none chr15:44108746-44110769_1000_1023
1:236259170-236261754 1465 1486 chr1:236259170-236261754_1486_7 EMX1.sam 1486 ATGGAGCAGGCGACCAGGGGTGACTCAGAATGGAGCAGGTGACCAGGGGT 7 0 7 0.0 33 0 33 0.0 2 5 3.16227766017 7.116178749862878 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none chr1:236259170-236261754_1465_1486
1:236259170-236261754 1531 1539 chr1:236259170-236261754_1531_5 EMX1.sam 1531 GGGGTGACTCAGAATGGAGCAGGTGACCAGGGGAATAGACGTTAACTACT 0 5 5 0.0 0 5 5 0.0 1 2 1.41421356237 2.947456530637899 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none chr1:236259170-236261754_1531_1539
2:73159981-73162004 1008 1024 chr2:73159981-73162004_1017_489 EMX1.sam 1017 AAGGGCCTGAGTCCGAGCAGAAGAAGAAGGGCTCCCATCACATCAACCGG 243 246 489 244.49539873 619 541 1160 578.68730762 236 231 233.486616319 4.710360920354193 GAGTCCGAGCAGAAGAAGAAGGG 0 + 1000 1023 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none chr2:73159981-73162004_1000_1023
3:197899267-197901348 1075 1081 chr3:197899267-197901348_1080_10 EMX1.sam 1080 TTAGGGTTAGGGTTAGGGTTAGGGTTCGGGTTTAGGGTTCAGGTTTATGG 0 10 10 0.0 0 32 32 0.0 9 1 3.0 2.5495097567963922 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none chr3:197899267-197901348_1075_1081
6:9117792-9119815 1007 1007 chr6:9117792-9119815_1007_4 EMX1.sam 1007 ATGTCCTCAGAGTTCTGTCCATTCTTCTTCTGCTCAGACGTTTTGTCTGA 1 3 4 1.73205080757 1 9 10 3.0 2 2 2.0 0.0 ACGTCTGAGCAGAAGAAGAATGG 3 - 1000 1023 EMX_site1 EMX1 GAGTCCGAGCAGAAGAAGAANGG none chr6:9117792-9119815_1000_1023
8 changes: 4 additions & 4 deletions test/data/identified/control_identifiedOfftargets.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Window.key Chromosome Min.Position Max.Position Name Filename Position WindowSequence +.mi -.mi bi.sum.mi bi.geometric_mean.mi +.total -.total total.sum total.geometric_mean primer1.mi primer2.mi primer.geometric_mean position.stdev Site_SubstitutionsOnly.Sequence Site_SubstitutionsOnly.NumSubstitutions Site_SubstitutionsOnly.Strand Site_SubstitutionsOnly.Start Site_SubstitutionsOnly.End Site_GapsAllowed.Sequence Site_GapsAllowed.Length Site_GapsAllowed.Score Site_GapsAllowed.Substitutions Site_GapsAllowed.Insertions Site_GapsAllowed.Deletions Site_GapsAllowed.Strand Site_GapsAllowed.Start Site_GapsAllowed.End Cell Targetsite TargetSequence RealignedTargetSequence
chr1:236259170-236261754_1473_1490 1:236259170-236261754 1473 1490 chr1:236259170-236261754_1481_7 control.sam 1481 TCAGAATGGAGCAGGCGACCAGGGGTGACTCAGAATGGAGCAGGTGACCA 1 6 7 2.44948974278 1 9 10 3.0 2 5 3.16227766017 5.535341001239219 Control control None none
chr1:236259170-236261754_1521_1531 1:236259170-236261754 1521 1531 chr1:236259170-236261754_1523_14 control.sam 1523 GGTGACCAGGGGTGACTCAGAATGGAGCAGGTGACCAGGGGAATAGACGT 0 14 14 0.0 0 18 18 0.0 7 7 7.0 3.7094473981982814 Control control None none
chr3:197899267-197901348_1035_1040 3:197899267-197901348 1035 1040 chr3:197899267-197901348_1040_3 control.sam 1040 TAGGGTTGGGTTAGGGTTAGGGTTCGGGTTAGGGTTAGGGTTAGGGTTAG 3 0 3 0.0 5 0 5 0.0 1 1 1.0 2.0548046676563256 Control control None none
Chromosome Min.Position Max.Position Name Filename Position WindowSequence +.mi -.mi bi.sum.mi bi.geometric_mean.mi +.total -.total total.sum total.geometric_mean primer1.mi primer2.mi primer.geometric_mean position.stdev Site_SubstitutionsOnly.Sequence Site_SubstitutionsOnly.NumSubstitutions Site_SubstitutionsOnly.Strand Site_SubstitutionsOnly.Start Site_SubstitutionsOnly.End Site_GapsAllowed.Sequence Site_GapsAllowed.Length Site_GapsAllowed.Score Site_GapsAllowed.Substitutions Site_GapsAllowed.Insertions Site_GapsAllowed.Deletions Site_GapsAllowed.Strand Site_GapsAllowed.Start Site_GapsAllowed.End Cell Targetsite TargetSequence RealignedTargetSequence Window.key
1:236259170-236261754 1473 1490 chr1:236259170-236261754_1481_7 control.sam 1481 TCAGAATGGAGCAGGCGACCAGGGGTGACTCAGAATGGAGCAGGTGACCA 1 6 7 2.44948974278 1 9 10 3.0 2 5 3.16227766017 5.535341001239219 Control control None none chr1:236259170-236261754_1473_1490
1:236259170-236261754 1521 1531 chr1:236259170-236261754_1523_14 control.sam 1523 GGTGACCAGGGGTGACTCAGAATGGAGCAGGTGACCAGGGGAATAGACGT 0 14 14 0.0 0 18 18 0.0 7 7 7.0 3.7094473981982814 Control control None none chr1:236259170-236261754_1521_1531
3:197899267-197901348 1035 1040 chr3:197899267-197901348_1040_3 control.sam 1040 TAGGGTTGGGTTAGGGTTAGGGTTCGGGTTAGGGTTAGGGTTAGGGTTAG 3 0 3 0.0 5 0 5 0.0 1 1 1.0 2.0548046676563256 Control control None none chr3:197899267-197901348_1035_1040

0 comments on commit 12609a0

Please sign in to comment.