-
Notifications
You must be signed in to change notification settings - Fork 0
/
sf_search_user_files_all_volumes.sh
executable file
·143 lines (126 loc) · 5.62 KB
/
sf_search_user_files_all_volumes.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
## check if running as root
if [[ "$EUID" -ne 0 ]]; then
echo "Error: Must be run as root." >&2
exit 1
fi
volumes="alldata clustersysshare home1 home2 home3 home4 mapseq-analysis nextgenout2 nextgenout3 nextgenout4 nextgenout5 peroulab seqdata seqdata_hosp seqware-analysis"
user="$1"
out_file="/tmp/$1_files"
out_file_dirs="/tmp/${user}_dirs"
truncate -s 0 $out_file
echo "*****Searching for files owned by $user*****"
for entry in $volumes
do
echo "Searching $entry"
sf query $entry: -d , -h --username $1 --format "vol_path username uid groupname size ct" >> $out_file
done
echo "*****Changing starfish output format to file paths*****"
sed -i 's/alldata:/\/datastore\/alldata\//g' $out_file
sed -i 's/clustersysshare:/\/datastore\/clustersysshare\//g' $out_file
sed -i 's/home1:/\/home\//g' $out_file
sed -i 's/home2:/\/home\//g' $out_file
sed -i 's/home3:/\/home\//g' $out_file
sed -i 's/home4:/\/home\//g' $out_file
sed -i 's/mapseq-analysis:/\/datastore\/rclbg\/mapseq-analysis\//g' $out_file
sed -i 's/nextgenout2:/\/datastore\/nextgenout2\//g' $out_file
sed -i 's/nextgenout3:/\/datastore\/nextgenout3\//g' $out_file
sed -i 's/nextgenout4:/\/datastore\/nextgenout4\//g' $out_file
sed -i 's/nextgenout5:/\/datastore\/nextgenout5\//g' $out_file
sed -i 's/peroulab:/\/datastore\/labnproject\/peroulab\//g' $out_file
sed -i 's/seqdata:/\/datastore\/labnproject\/seqdata\//g' $out_file
sed -i 's/seqdata_hosp:/\/datastore\/seqdata_hosp\//g' $out_file
sed -i 's/seqware-analysis:/\/datastore\/labnproject\/seqware-analysis\//g' $out_file
echo "*****Removing unneeded lines*****"
sed -i '/vol_path/d' $out_file
echo "*****Sorting uniquely by size*****"
sort -u -t, -k5 -n $out_file > ${out_file}.sorted
mv ${out_file}.sorted $out_file
echo
echo "*****Testing file paths to make sure files exist*****"
total="$(wc -l $out_file | awk '{print $1}')"
COUNTER=1
while read -r line
do
#Files that exist will get saved to a file. If ls finds nothing, the error goes to /dev/null
echo "$line" | awk -F ',' '{print $1}' | xargs -I '{}' ls -lah "{}" 2>> ${out_file}.exists 1>/dev/null
#Display a rolling counter
if (($COUNTER < $total))
then
echo "$COUNTER/$total";printf "\033[A"
COUNTER=$((COUNTER+1))
#For the last line we want to increment the counter and display a final message but we don't want to clear the line.
else
echo "$COUNTER/$total"
echo
COUNTER=$(($COUNTER+1))
fi
done < $out_file
if test -f "${out_file}.exists"
then
echo
echo "*****Removing files that don't exist from output file*****"
total="$(wc -l ${out_file}.exists | awk '{print $1}')"
COUNTER=1
while read -r line;
do
#Get line number of file that doesn't exist
line_number="$(echo "$line" | awk -F 'access ' '{print $2}' | awk -F ':' '{print $1}' | xargs -I '{}' grep -n {} $out_file | awk -F ':' '{print $1}')"
#Delete that line number from $out_file
sed -i "${line_number}d" $out_file 2>/dev/null
if (($COUNTER < $total))
then
echo "$COUNTER/$total";printf "\033[A"
COUNTER=$((COUNTER+1))
#For the last line we want to increment the counter and display a final message but we don't want to clear the line.
else
echo "$COUNTER/$total"
echo
COUNTER=$(($COUNTER+1))
fi
done < ${out_file}.exists
#Remove empty lines from $out_file
sed '/^$/d' -i $out_file
rm ${out_file}.exists
echo "*****Converting timestamps to human readable dates*****"
total="$(wc -l $out_file | awk '{print $1}')"
COUNTER=1
while read -r line;
do
ctime="$(echo $line | awk -F ',' '{print $NF}')"
ctime_converted="$(date -d @$ctime)"
line_clipped="$(echo $line | awk 'BEGIN {FS=",";OFS=",";} {$NF=""; print $0}')"
echo ${line_clipped}$ctime_converted >> ${out_file}_converted
if (($COUNTER < $total))
then
echo "$COUNTER/$total";printf "\033[A"
COUNTER=$((COUNTER+1))
#For the last line we want to increment the counter and display a final message but we don't want to clear the line.
else
echo "$COUNTER/$total"
echo
COUNTER=$(($COUNTER+1))
fi
done < $out_file
rm $out_file
mv ${out_file}_converted $out_file
echo "*****Extracting directories from files*****"
awk -F '/' '{print "/"$2"/"$3"/"$4"/"$5}' $out_file | awk -F ',' '{print $1}' | sort -u > $out_file_dirs
echo >> $out_file_dirs
echo "/datastore/nextgenout2/share/labs directories:" >> $out_file_dirs
grep "nextgenout2/share/labs" $out_file | awk -F '/' '{print "/"$2"/"$3"/"$4"/"$5"/"$6"/"$7}' | awk -F ',' '{print $1}' | sort -u >> $out_file_dirs
echo >> $out_file_dirs
echo "/datastore/nextgenout3/share/labs directories:" >> $out_file_dirs
grep "nextgenout3/share/labs" $out_file | awk -F '/' '{print "/"$2"/"$3"/"$4"/"$5"/"$6"/"$7}' | awk -F ',' '{print $1}' | sort -u >> $out_file_dirs
echo >> $out_file_dirs
echo "/datastore/nextgenout4/share/labs directories:" >> $out_file_dirs
grep "nextgenout4/share/labs" $out_file | awk -F '/' '{print "/"$2"/"$3"/"$4"/"$5"/"$6"/"$7}' | awk -F ',' '{print $1}' | sort -u >> $out_file_dirs
echo >> $out_file_dirs
echo "/datastore/nextgenout5/share/labs directories:" >> $out_file_dirs
grep "nextgenout5/share/labs" $out_file | awk -F '/' '{print "/"$2"/"$3"/"$4"/"$5"/"$6"/"$7}' | awk -F ',' '{print $1}' | sort -u >> $out_file_dirs
echo "Result saved to:"
echo "$out_file"
echo "$out_file_dirs"
else
echo "No files owned by $user found"
fi