-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfh-S3-cromwell.conf
executable file
·212 lines (196 loc) · 8.17 KB
/
fh-S3-cromwell.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
include required(classpath("application"))
services { MetadataService {
class = "cromwell.services.metadata.impl.MetadataServiceActor"
config { metadata-read-row-number-safety-threshold = 2000000 } } }
database {
profile = "slick.jdbc.MySQLProfile$"
db {
driver = "com.mysql.cj.jdbc.Driver"
connectionTimeout = 5000
}
}
webservice {
binding-timeout = 10s
}
workflow-heartbeats {
heartbeat-interval: 2 minutes
ttl: 10 minutes
write-failure-shutdown-duration: 5 minutes
write-batch-size: 10000
write-threshold: 10000
}
system {
file-hash-cache = true
input-read-limits {
tsv = 1073741823
object = 1073741823
string = 1073741823
lines = 1073741823
json = 1073741823
}
io {
number-of-requests = 100000
per = 5 seconds # normally 100 seconds
number-of-attempts = 10
}
}
workflow-options {
# save all workflow logs to refer back to
workflow-log-temporary = false
}
akka.http.server.request-timeout = 60s
call-caching {
# Allows re-use of existing results for jobs you've already run (default: false)
enabled = true
# Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies
# to fail for external reasons which should not invalidate the cache (e.g. auth differences between users):
# (default: true)
invalidate-bad-cache-results = true
}
aws {
application-name = "cromwell"
auths = [
{
name = "default"
scheme = "default"
}
]
region = "us-west-2"
}
engine {
filesystems {
local {localization: ["soft-link", "copy" ]}
s3 { auth = "default" }
}
}
# Backend and filesystem
backend {
default = "gizmo"
providers {
gizmo {
actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
config {
glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY"
# For BeeGFS so softlink is used instead of hardlink
concurrent-job-limit = 5000
runtime-attributes = """
Int cpu = 1
String? walltime
Int memory_mb = 2000
String partition = "campus-new"
String? docker
String? modules = ""
String? dockerSL
String? account
"""
submit = """
set -e
source /app/lmod/lmod/init/bash
module use /app/modules/all
module purge
if [ ! -z '${dockerSL}' ]; then
# Ensure apptainer is loaded if it's installed as a module
module load Apptainer/1.1.6
# Build the Docker image into a apptainer image
DOCKER_NAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< ${dockerSL})
# The image will live together with all the other images to force caching of the .sif files themselves - note, always use docker hub tags!!!
IMAGE=$SINGULARITYCACHEDIR/$DOCKER_NAME.sif
echo $DOCKER_NAME
echo $IMAGE
echo $SINGULARITYCACHEDIR
echo $SCRATCHPATH
if [ ! -f $IMAGE ]; then # If we already have the image, skip everything
apptainer pull $IMAGE docker://${dockerSL}
fi
# Submit the script to SLURM
sbatch \
--partition=${partition} \
-J ${job_name} \
-D ${cwd} \
-o ${cwd}/execution/stdout \
-e ${cwd}/execution/stderr \
--cpus-per-task=${cpu} ${"--time=" + walltime} ${"-A " + account} \
--wrap "apptainer exec --bind $SCRATCHPATH $IMAGE ${job_shell} ${script}"
else
module load ${modules}
sbatch \
--partition=${partition} \
-J ${job_name} \
-D ${cwd} \
-o ${out} \
-e ${err} \
--cpus-per-task=${cpu} ${"--time=" + walltime} ${"-A " + account} \
--wrap "/bin/bash ${script}"
fi
"""
submit-docker = """
set -e
source /app/lmod/lmod/init/bash
module use /app/modules/all
module purge
# Ensure apptainer is loaded if it's installed as a module
module load Apptainer/1.1.6
# Build the Docker image into a apptainer image
DOCKER_NAME=$(sed -e 's/[^A-Za-z0-9._-]/_/g' <<< ${docker})
# The image will live together with all the other images to force "caching" of the .sif files themselves - note, always use docker hub tags!!!
IMAGE=$SINGULARITYCACHEDIR/$DOCKER_NAME.sif
if [ ! -f $IMAGE ]; then # If we already have the image, skip everything
apptainer pull $IMAGE docker://${docker}
fi
# Submit the script to SLURM
sbatch \
--partition=${partition} \
-J ${job_name} \
-D ${cwd} \
-o ${cwd}/execution/stdout \
-e ${cwd}/execution/stderr \
--cpus-per-task=${cpu} ${"--time=" + walltime} ${"-A " + account} \
--wrap "apptainer exec --bind ${cwd}:${docker_cwd} --bind $HOME $IMAGE ${job_shell} ${docker_script}"
"""
filesystems {
local {
localization: [
## for local SLURM, hardlink doesn't work. Options for this and caching: , "soft-link" , "hard-link", "copy"
"soft-link", "copy"
]
## call caching config relating to the filesystem side
caching {
# When copying a cached result, what type of file duplication should occur. Attempted in the order listed below: "hard-link", "soft-link", "copy", "cached-copy".
duplication-strategy: [
"soft-link", "copy"
]
#cached-copy An experimental feature. This copies files to a file cache in
# <workflow_root>/cached-inputs and then hard links them in the <call_dir>/inputs
# directory. cached-copy is intended for a shared filesystem that runs on multiple
# physical disks, where docker containers are used. Hard-links don't work between
# different physical disks and soft-links don't work with docker.
# Copying uses a lot of space if a multitude of tasks use the same input.
# cached-copy copies the file only once to the physical disk containing the
# <workflow_root> and then uses hard links for every task that needs the input file. This can save a lot of space.
# Possible values: md5, xxh64, fingerprint, path, path+modtime
# For extended explanation check: https://cromwell.readthedocs.io/en/stable/Configuring/#call-caching
# "md5" will compute an md5 hash of the file content.
# "xxh64" will compute an xxh64 hash of the file content. Much faster than md5
# "fingerprint" will take last modified time, size and hash the first 10 mb with xxh64 to create a file fingerprint.
# This strategy will only be effective if the duplication-strategy (above) is set to "hard-link", as copying changes the last modified time.
# "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link",
# in order to allow for the original file path to be hashed.
# "path+modtime" will compute an md5 hash of the file path and the last modified time. The same conditions as for "path" apply here.
# Default: "md5"
hashing-strategy: "path+modtime"
# When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash.
# If false or the md5 does not exist, will proceed with the above-defined hashing strategy.
# Default: false
check-sibling-md5: false
}
}
s3 { auth = "default" }
}
kill = "scancel ${job_id}"
kill-docker = "scancel ${job_id}"
check-alive = "squeue -j ${job_id}"
job-id-regex = "Submitted batch job (\\d+).*"
}
}
}
}