-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_iris.sh
executable file
·70 lines (62 loc) · 1.97 KB
/
get_iris.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/bash
# Get details of all IRIs across all Bioportal transforms
# This creates bioportal-prefixes.tsv
# yes, it's kind of awkward but it works
#
TX_PATH="./transformed/ontologies/"
NODE_FILES=$(find $TX_PATH -name *_nodes.tsv)
# Run
echo "ontology prefix delimiter native"
for file in $NODE_FILES
do
dirpath=${file%/*}/
ontology=$(basename "$dirpath" | tr -d '\n')
prefixes=$(cut -f1 $file | rev | cut -d"#" -f2- | rev | sort | uniq)
delimiter=$(printf "#")
# If this delimiter didn't appear to work, try another
prefix_count=$(wc -w <<< "$prefixes")
too_many=100
if (( $prefix_count > $too_many ))
then
prefixes=$(cut -f1 $file | rev | cut -d"/" -f2- | rev | sort | uniq)
delimiter=$(printf "/")
fi
# If this delimiter didn't appear to work, try another
# Keeping in mind that : is the standard CURIE delimiter
# at least for OBO imports
prefix_count=$(wc -w <<< "$prefixes")
too_many=100
if (( $prefix_count > $too_many ))
then
prefixes=$(cut -f1 $file | rev | cut -d":" -f2- | rev | sort | uniq)
delimiter=$(printf ":")
fi
prefix_count=$(wc -w <<< "$prefixes")
too_many=100
if (( $prefix_count > $too_many ))
then
prefixes=$(cut -f1 $file | rev | cut -d"." -f2- | rev | sort | uniq)
delimiter=$(printf ".")
fi
# Ran out of standard delimiters, so we may have some strangeness in IRI format
prefix_count=$(wc -w <<< "$prefixes")
too_many=100
if (( $prefix_count > $too_many ))
then
prefixes=$(cut -f1 $file | rev | cut -f2 | rev | sort | uniq)
delimiter=$(printf "OTHER")
fi
for prefix in $prefixes
do
if [[ "$ontology" = "$prefix" ]]
then
native=$(printf "True")
elif [[ "$prefix" = "id" ]]
then
native=$(printf "False")
else
native=$(printf "Unknown")
fi
printf "$ontology\t$prefix\t$delimiter\t$native\n"
done
done