-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_star_genomes.qsub
117 lines (109 loc) · 3.65 KB
/
generate_star_genomes.qsub
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/bin/bash -l
#$ -S /bin/bash
#$ -cwd
#$ -j y
#$ -m eas
#$ -pe omp 8
#$ -N generate_star_genomes
# Build a set of STAR genomes for a given reference
# Adam Gower, based on scripts by Josh Campbell
# Parse command-line arguments
eval set -- "$(
getopt --options=r:f:o:s: \
--longoptions=reference:,fasta-path:,output-path:,star-version: \
--name "$0" -- "$@"
)"
while true
do
case "$1" in
-r|--reference)
reference="$2"
shift 2 ;;
-f|--fasta-path)
fasta_path="$(readlink --canonicalize "$2")"
shift 2 ;;
-o|--output-path)
output_path="$(readlink --canonicalize "$2")"
shift 2 ;;
-s|--star-version)
star_version="$2"
shift 2 ;;
--)
shift
break ;;
*)
echo "Internal error"
exit 1 ;;
esac
done
if [[ ${reference} == "" ]]
then
echo "Usage:"
echo " qsub [qsub flags] generate_star_genomes.qsub [options]"
echo " -r|--reference [reference name]"
echo " -f|--fasta-path [Path to FASTA files]"
echo "Options:"
echo " -r, --reference Reference name"
echo " (e.g., hg38, mm10, bosTau9, MesAur1.0, etc.)"
echo " -f, --fasta-path Path to FASTA input files"
echo " (i.e., path containing 'base' folder, etc.)"
echo -n " -o, --output-path "
echo -n "Path where output will be written "
echo "(will be created if it does not exist)"
echo -n " "
echo "(Default: ./[STAR version]/[reference name]/)"
echo " -s, --star-version Version of STAR to use"
echo " (if omitted, default STAR version is used)"
else
if [[ ${star_version} == "" ]]
then
# If no STAR version was specified, load the default module
module load star
else
# Check whether a valid module version was specified
# If so, load it; if not, load the default module
if [[ $(module avail star/${star_version} |& wc -l) != 0 ]]
then
module load star/${star_version}
else
module load star
fi
fi
# Get version of STAR module that was loaded
star_version="$(STAR --version | sed 's/STAR_//')"
# If output path is not specified, a directory named with the STAR version and
# UCSC genome build will be created within the current working directory,
# and output will be written there
if [[ "${output_path}" == "" ]]
then
output_path="$(pwd)/${star_version}/${reference}"
fi
# Create the output path if it does not already exist
[ ! -d "${output_path}" ] && mkdir --verbose -p "${output_path}"/
# Ensure directory is not world-readable yet (to keep others out until ready)
chmod 2700 "${output_path}"/
echo "Reference name: ${reference}"
echo "FASTA input files located in: ${fasta_path}/"
echo "STAR version: ${star_version}"
echo "STAR genomes will be written to: ${output_path}/"
cd "${fasta_path}"/ || exit
# Run STAR in 'genomeGenerate' mode for each FASTA file
for type in *
do
# Create genome-level directory that is not world-readable yet
# (to keep users out until it's ready)
mkdir --verbose --mode=2700 "${output_path}"/${type}/
# Generate genome
# Note: '--outTmpDir $TMPDIR' throws an error;
# STAR needs to make the temporary directory itself
STAR \
--runMode genomeGenerate \
--genomeDir "${output_path}"/${type} \
--genomeFastaFiles "${fasta_path}"/${type}/${reference}.fa \
--outFileNamePrefix "${output_path}"/${type}/ \
--outTmpDir $TMPDIR/${type} \
--runThreadN $NSLOTS
done
# Make all output files and directories world-readable and read-only
chmod -Rc ugo-w,ugo+rX ${output_path}/
fi