-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathrun_ABUNDANCE.sh
executable file
·119 lines (93 loc) · 3.5 KB
/
run_ABUNDANCE.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/bin/bash
###******************************************************************###
###******** WEVOTE ********###
###******** WEighted VOting Taxonomic IdEntification ********###
###******** ********###
###******** Copyright by Ahmed A. Metwally (C) 2016 ********###
###******** Finn-Perkins' Lab & Dai's Lab, UIC ********###
###******** ********###
###******** This code is released under GNU GPLv3 License ********###
###******** ********###
###******** Please report bugs & suggestions to: ********###
###******** <ametwa2@uic.edu> ********###
###******************************************************************###
## parse commandline arguments
Usage="\nUsage: $0 -i <input-file> -p <output-prefix> --db <path-to-taxonomy-DB> <options>
Implemented options:
-h|--help \t help flag
-i|--input <input-file> \t input query
-p|--prefix <output-prefix> \t Output prefix
--db <taxonomy_db> \t taxonomy database path
--threads <num-threads> \t Number of threads
--seqcount <contig-reads-count-file> \t File that contains how many reads are used to assemble each contig
"
my_dir=`dirname $0`
query=""
prefix=""
taxonomyDB=""
seqID_count_file=""
if [ $# -lt 2 ]; then
echo -e "$Usage"
exit
fi
while [[ $# > 0 ]]
do
key="$1"
case $key in
-h|--help)
echo -e "$Usage"
;;
-i|--input)
query=$2
queryPath=`readlink -f $query | xargs dirname`
queryName=`basename $query`
query=$queryPath/$queryName
prefix=`basename ${query%.*}`
dirPath=`pwd`
echo "Query= "$query
shift
;;
-p|--prefix)
prefix=$2
echo "Output prefix= "$prefix
shift
;;
--db)
taxonomyDB=$2
echo "Taxonomy Database located at= "$taxonomyDB
shift
;;
--threads)
threads=$2
shift
echo "Num of threads= "$threads
;;
--seqcount)
seqID_count_file=$2
shift
echo "Contig-reads-map file located at = "$seqID_count_file
;;
*)
echo "Sorry " $1 " is an invalid option was received."
echo -e "$Usage"
exit
;;
esac
shift
done
#### Preprocessing the WEVOTE output for read or contigs abundance
if [ "$seqID_count_file" == "" ] ; then
echo "read abundance"
awk '{ print $NF ",1" }' $query > $prefix"_WEVOTE_taxID_count.csv"
else
echo "contig abundance"
awk '{ print $1 "\t" $NF}' $query | awk -F '[_\t]' '{print $2 "\t" $NF}' | sort -k1,1 > $prefix"_WEVOTE_seqID_taxID.txt"
join $prefix"_WEVOTE_seqID_taxID.txt" $seqID_count_file | cut -f2,3 -d " " | sed "s/ /,/g" > $prefix"_WEVOTE_taxID_count.csv"
fi
### Manipulate the NCBI taxonomy database
if [[ ! -f $taxonomyDB/names_wevote.dmp || ! -f $taxonomyDB/nodes_wevote.dmp ]]; then
echo "Downloading NCBI Taxonomy Database in progress ........"
$my_dir/downloadTaxonomyDB.sh $taxonomyDB
fi
### Get the abundance and full taxonomy
$my_dir/bin/ABUNDANCE -i $prefix"_WEVOTE_taxID_count.csv" -p $prefix -d $taxonomyDB