-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathawsbatchwarc.template
271 lines (271 loc) · 7.11 KB
/
awsbatchwarc.template
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
AWSTemplateFormatVersion: '2010-09-09'
Description: Orchestrating an Application Process with AWS Batch using CloudFormation
Parameters:
DockerImage:
Description: Docker image or a repository from a registry
Default: yinlinchen/vtl:iipc_v3
Type: String
JDName:
Description: Job definition name
Default: WARCJobDefinition
Type: String
JQName:
Description: Job queue name
Default: WARCJobQueue
Type: String
LambdaRoleName:
Description: Lambda role name
Default: warc-lambda-role
Type: String
LambdaFunctionName:
Description: Lambda function name
Default: WARCBatchLambda
Type: String
BatchRepositoryName:
Description: Batch process repository name
Default: warc-batch-repository
Type: String
S3BucketName:
Description: S3 bucket name
Default: warc-batch-bucket
Type: String
Resources:
VPC:
Type: AWS::EC2::VPC
Properties:
CidrBlock: 10.0.0.0/16
EnableDnsHostnames: true
InternetGateway:
Type: AWS::EC2::InternetGateway
RouteTable:
Type: AWS::EC2::RouteTable
Properties:
VpcId:
Ref: VPC
VPCGatewayAttachment:
Type: AWS::EC2::VPCGatewayAttachment
Properties:
VpcId:
Ref: VPC
InternetGatewayId:
Ref: InternetGateway
SecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: EC2 Security Group for instances launched in the VPC by Batch
VpcId:
Ref: VPC
Subnet:
Type: AWS::EC2::Subnet
Properties:
CidrBlock: 10.0.0.0/24
VpcId:
Ref: VPC
MapPublicIpOnLaunch: 'True'
Route:
Type: AWS::EC2::Route
Properties:
RouteTableId:
Ref: RouteTable
DestinationCidrBlock: 0.0.0.0/0
GatewayId:
Ref: InternetGateway
SubnetRouteTableAssociation:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
RouteTableId:
Ref: RouteTable
SubnetId:
Ref: Subnet
LambdaExecutionRole:
Type: AWS::IAM::Role
Properties:
RoleName:
Fn::Sub: ${LambdaRoleName}
AssumeRolePolicyDocument:
Statement:
- Action:
- sts:AssumeRole
Effect: Allow
Principal:
Service:
- lambda.amazonaws.com
Version: 2012-10-17
ManagedPolicyArns:
- arn:aws:iam::aws:policy/AWSLambdaExecute
- arn:aws:iam::aws:policy/AmazonS3FullAccess
- arn:aws:iam::aws:policy/AWSBatchFullAccess
- arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole
Path: /
BatchServiceRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service: batch.amazonaws.com
Action: sts:AssumeRole
ManagedPolicyArns:
- arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole
IamInstanceProfile:
Type: AWS::IAM::InstanceProfile
Properties:
Roles:
- Ref: EcsInstanceRole
EcsInstanceRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: '2008-10-17'
Statement:
- Sid: ''
Effect: Allow
Principal:
Service: ec2.amazonaws.com
Action: sts:AssumeRole
ManagedPolicyArns:
- arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role
- arn:aws:iam::aws:policy/AmazonS3FullAccess
SpotIamFleetRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Sid: ''
Effect: Allow
Principal:
Service: spotfleet.amazonaws.com
Action: sts:AssumeRole
ManagedPolicyArns:
- arn:aws:iam::aws:policy/service-role/AmazonEC2SpotFleetTaggingRole
BatchProcessingJobDefinition:
Type: AWS::Batch::JobDefinition
Properties:
Type: container
JobDefinitionName:
Ref: JDName
ContainerProperties:
Image:
Ref: DockerImage
Vcpus: 4
Memory: 4096
Command:
- /notebooks/setup.sh
RetryStrategy:
Attempts: 1
BatchProcessingJobQueue:
Type: AWS::Batch::JobQueue
Properties:
JobQueueName:
Ref: JQName
Priority: 10
ComputeEnvironmentOrder:
- Order: 1
ComputeEnvironment:
Ref: ComputeEnvironment
ComputeEnvironment:
Type: AWS::Batch::ComputeEnvironment
Properties:
Type: MANAGED
ComputeResources:
Type: SPOT
MinvCpus: 0
DesiredvCpus: 0
MaxvCpus: 256
InstanceTypes:
- optimal
Subnets:
- Ref: Subnet
SecurityGroupIds:
- Ref: SecurityGroup
ImageId: ami-00487452f317c9be1
InstanceRole:
Ref: IamInstanceProfile
BidPercentage: 40
SpotIamFleetRole:
Ref: SpotIamFleetRole
Tags:
name: IIPCSpot
ServiceRole:
Ref: BatchServiceRole
BatchProcessS3Bucket:
Type: AWS::S3::Bucket
DependsOn: BatchProcessBucketPermission
Properties:
BucketName:
Fn::Sub: ${S3BucketName}-${AWS::AccountId}
NotificationConfiguration:
LambdaConfigurations:
- Event: s3:ObjectCreated:*
Function:
Fn::GetAtt:
- BatchProcessingLambdaInvokeFunction
- Arn
BatchProcessBucketPermission:
Type: AWS::Lambda::Permission
Properties:
Action: lambda:InvokeFunction
FunctionName:
Ref: BatchProcessingLambdaInvokeFunction
Principal: s3.amazonaws.com
SourceAccount:
Ref: AWS::AccountId
SourceArn:
Fn::Sub: arn:aws:s3:::${S3BucketName}-${AWS::AccountId}
BatchProcessingLambdaInvokeFunction:
Type: AWS::Lambda::Function
Properties:
FunctionName:
Ref: LambdaFunctionName
Description: Python Function Handler that trigger AWS Batch
Handler: index.lambda_handler
Runtime: python3.7
MemorySize: 2048
Timeout: 600
Role:
Fn::GetAtt:
- LambdaExecutionRole
- Arn
Code:
S3Bucket: iipc-warc-code
S3Key: 48c6e1469ae747f650b1d7f191442bb2
BatchProcessRepository:
Type: AWS::ECR::Repository
Properties:
RepositoryName:
Ref: BatchRepositoryName
RepositoryPolicyText:
Version: '2012-10-17'
Statement:
- Sid: AllowPushPull
Effect: Allow
Principal:
AWS:
- Fn::Sub: arn:aws:iam::${AWS::AccountId}:role/${EcsInstanceRole}
Action:
- ecr:GetDownloadUrlForLayer
- ecr:BatchGetImage
- ecr:BatchCheckLayerAvailability
- ecr:PutImage
- ecr:InitiateLayerUpload
- ecr:UploadLayerPart
- ecr:CompleteLayerUpload
Outputs:
ComputeEnvironmentArn:
Value:
Ref: ComputeEnvironment
BatchProcessingJobQueueArn:
Value:
Ref: BatchProcessingJobQueue
BatchProcessingJobDefinitionArn:
Value:
Ref: BatchProcessingJobDefinition
BucketName:
Value:
Ref: BatchProcessS3Bucket
LambdaName:
Value:
Ref: BatchProcessingLambdaInvokeFunction