forked from fjruizruano/satminer
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfastq_paired_combine_id.py
executable file
·54 lines (39 loc) · 1.34 KB
/
fastq_paired_combine_id.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/python
import sys
from subprocess import call
print "Usage: fastq_paired_combine_id.py file_1.fastq file_2.fastq"
try:
one = sys.argv[1]
except:
one = raw_input("Introduce FASTQ file 1: ")
try:
two = sys.argv[2]
except:
two = raw_input("Introduce FASTQ file 2: ")
get_reads = """awk 'NR == 1 || NR % 4 == 1' """
trim_reads = """awk '{print substr($0,2, length($0))}' """
#trim_reads = """awk '{print $0}' """
files = sys.argv[1:3]
print "Getting read names"
for file in files:
call(get_reads + "%s > %s" % (file, file+".r"), shell=True)
call(trim_reads + "%s > %s" % (file+".r", file+".t"), shell=True)
call("rm %s" % (file+".r"), shell=True)
one_data = open(files[0]+".t").readlines()
two_data = open(files[1]+".t").readlines()
print "Getting common reads"
commun = set(one_data) & set(two_data)
call("rm %s.t %s.t" % (files[0], files[1]), shell=True)
name = files[0]
name = name.split(".")
name = ".".join(name[:-1])
name = name[:-2]
out = open(name+".list", "w")
for el in commun:
out.write("%s/1\n%s/2\n" % (el[:-1], el[:-1]))
out.close()
print "Extracting reads from original files"
call("seqtk subseq %s %s.list > %s_paired_1.fastq" % (one,name,name), shell=True)
call("seqtk subseq %s %s.list > %s_paired_2.fastq" % (two,name,name), shell=True)
call("rm %s.list" % (name), shell=True)
print "We're done!"