Skip to content

Commit

Permalink
wip karpenter
Browse files Browse the repository at this point in the history
  • Loading branch information
smritidahal653 committed Jun 7, 2024
1 parent aa202c6 commit 5771f2d
Show file tree
Hide file tree
Showing 14 changed files with 827 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

# Image URL to use all building/pushing image targets
REGISTRY ?= YOUR_REGISTRY
REGISTRY ?= smritidahal
IMG_NAME ?= workspace
VERSION ?= v0.2.2
GPU_PROVISIONER_VERSION ?= 0.2.0
Expand Down
22 changes: 13 additions & 9 deletions charts/kaito/workspace/templates/nvidia-device-plugin-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,20 @@ spec:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.azure.com/cluster
operator: Exists
- key: type
operator: NotIn
values:
- virtual-kubelet
- key: karpenter.sh/provisioner-name
operator: Exists
- key: kaito.sh/machine-type
- key: k8s.io/cloud-provider-aws
operator: Exists
# - key: karpenter.k8s.aws/instance-gpu-count
# operator: Exists
# - key: kubernetes.azure.com/cluster
# operator: Exists
# - key: type
# operator: NotIn
# values:
# - virtual-kubelet
# - key: karpenter.sh/provisioner-name
# operator: Exists
# - key: kaito.sh/machine-type
# operator: Exists
tolerations:
# Allow this pod to be rescheduled while the node is in "critical add-ons only" mode.
# This, along with the annotation above marks this pod as a critical add-on.
Expand Down
4 changes: 2 additions & 2 deletions charts/kaito/workspace/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: mcr.microsoft.com/aks/kaito/workspace
pullPolicy: IfNotPresent
repository: smritidahal/workspace
pullPolicy: Always
tag: 0.2.2
imagePullSecrets: []
podAnnotations: {}
Expand Down
113 changes: 113 additions & 0 deletions controller-policy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
"Statement": [
{
"Action": [
"ssm:GetParameter",
"ec2:DescribeImages",
"ec2:RunInstances",
"ec2:DescribeSubnets",
"ec2:DescribeSecurityGroups",
"ec2:DescribeLaunchTemplates",
"ec2:DescribeInstances",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInstanceTypeOfferings",
"ec2:DescribeAvailabilityZones",
"ec2:DeleteLaunchTemplate",
"ec2:CreateTags",
"ec2:CreateLaunchTemplate",
"ec2:CreateFleet",
"ec2:DescribeSpotPriceHistory",
"pricing:GetProducts"
],
"Effect": "Allow",
"Resource": "*",
"Sid": "Karpenter"
},
{
"Action": "ec2:TerminateInstances",
"Condition": {
"StringLike": {
"ec2:ResourceTag/karpenter.sh/nodepool": "*"
}
},
"Effect": "Allow",
"Resource": "*",
"Sid": "ConditionalEC2Termination"
},
{
"Effect": "Allow",
"Action": "iam:PassRole",
"Resource": "arn:aws:iam::271069111444:role/KarpenterNodeRole-smriti-karpenter-test",
"Sid": "PassNodeIAMRole"
},
{
"Effect": "Allow",
"Action": "eks:DescribeCluster",
"Resource": "arn:aws:eks:us-west-2:271069111444:cluster/smriti-karpenter-test",
"Sid": "EKSClusterEndpointLookup"
},
{
"Sid": "AllowScopedInstanceProfileCreationActions",
"Effect": "Allow",
"Resource": "*",
"Action": [
"iam:CreateInstanceProfile"
],
"Condition": {
"StringEquals": {
"aws:RequestTag/kubernetes.io/cluster/smriti-karpenter-test": "owned",
"aws:RequestTag/topology.kubernetes.io/region": "us-west-2"
},
"StringLike": {
"aws:RequestTag/karpenter.k8s.aws/ec2nodeclass": "*"
}
}
},
{
"Sid": "AllowScopedInstanceProfileTagActions",
"Effect": "Allow",
"Resource": "*",
"Action": [
"iam:TagInstanceProfile"
],
"Condition": {
"StringEquals": {
"aws:ResourceTag/kubernetes.io/cluster/smriti-karpenter-test": "owned",
"aws:ResourceTag/topology.kubernetes.io/region": "us-west-2",
"aws:RequestTag/kubernetes.io/cluster/smriti-karpenter-test": "owned",
"aws:RequestTag/topology.kubernetes.io/region": "us-west-2"
},
"StringLike": {
"aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass": "*",
"aws:RequestTag/karpenter.k8s.aws/ec2nodeclass": "*"
}
}
},
{
"Sid": "AllowScopedInstanceProfileActions",
"Effect": "Allow",
"Resource": "*",
"Action": [
"iam:AddRoleToInstanceProfile",
"iam:RemoveRoleFromInstanceProfile",
"iam:DeleteInstanceProfile"
],
"Condition": {
"StringEquals": {
"aws:ResourceTag/kubernetes.io/cluster/smriti-karpenter-test": "owned",
"aws:ResourceTag/topology.kubernetes.io/region": "us-west-2"
},
"StringLike": {
"aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass": "*"
}
}
},
{
"Sid": "AllowInstanceProfileReadActions",
"Effect": "Allow",
"Resource": "*",
"Action": "iam:GetInstanceProfile"
}
],
"Version": "2012-10-17"
}
18 changes: 18 additions & 0 deletions controller-trust-policy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Federated": "arn:aws:iam::271069111444:oidc-provider/oidc.eks.us-west-2.amazonaws.com/id/D49F4508A5884FAD12B47D2380492694"
},
"Action": "sts:AssumeRoleWithWebIdentity",
"Condition": {
"StringEquals": {
"oidc.eks.us-west-2.amazonaws.com/id/D49F4508A5884FAD12B47D2380492694:aud": "sts.amazonaws.com",
"oidc.eks.us-west-2.amazonaws.com/id/D49F4508A5884FAD12B47D2380492694:sub": "system:serviceaccount:karpenter:karpenter"
}
}
}
]
}
21 changes: 21 additions & 0 deletions deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: inflate
spec:
replicas: 0
selector:
matchLabels:
app: inflate
template:
metadata:
labels:
app: inflate
spec:
terminationGracePeriodSeconds: 0
containers:
- name: inflate
image: public.ecr.aws/eks-distro/kubernetes/pause:3.7
resources:
limits:
nvidia.com/gpu: 1 # Specify the number of GPUs
51 changes: 51 additions & 0 deletions karpenter.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
export AWS_PARTITION="aws" # if you are not using standard partitions, you may need to configure to aws-cn / aws-us-gov
export CLUSTER_NAME="smriti-karpenter-test"
export AWS_REGION="us-west-2"
export OIDC_ENDPOINT="$(aws eks describe-cluster --name ${CLUSTER_NAME} --query "cluster.identity.oidc.issuer" --output text)"
export AWS_ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)"

export KARPENTER_VERSION="0.37.0"
export KARPENTER_NAMESPACE=karpenter

echo "cluster name: ${CLUSTER_NAME}", aws region: "${AWS_REGION}", account id: "${AWS_ACCOUNT_ID}", oidc endpoint: "${OIDC_ENDPOINT}"

export K8S_VERSION=1.28
export ARM_AMI_ID="ami-06378a82bdb4de802"
export AMD_AMI_ID="ami-0408823a87d4095d9"
export GPU_AMI_ID="ami-07dd1b9e1928d9121"
# ARM_AMI_ID="$(aws ssm get-parameter --name /aws/service/eks/optimized-ami/${K8S_VERSION}/amazon-linux-2-arm64/recommended/image_id --query Parameter.Value --output text)"
# AMD_AMI_ID="$(aws ssm get-parameter --name /aws/service/eks/optimized-ami/${K8S_VERSION}/amazon-linux-2/recommended/image_id --query Parameter.Value --output text)"
# GPU_AMI_ID="$(aws ssm get-parameter --name /aws/service/eks/optimized-ami/${K8S_VERSION}/amazon-linux-2-gpu/recommended/image_id --query Parameter.Value --output text)"

# echo "arm ami id: ${ARM_AMI_ID}, amd ami id: ${AMD_AMI_ID}, gpu ami id: ${GPU_AMI_ID}"

# aws iam create-role --role-name "KarpenterNodeRole-${CLUSTER_NAME}" \
# --assume-role-policy-document file://node-trust-policy.json

# aws iam attach-role-policy --role-name "KarpenterNodeRole-${CLUSTER_NAME}" \
# --policy-arn "arn:${AWS_PARTITION}:iam::aws:policy/AmazonEKSWorkerNodePolicy"

# aws iam attach-role-policy --role-name "KarpenterNodeRole-${CLUSTER_NAME}" \
# --policy-arn "arn:${AWS_PARTITION}:iam::aws:policy/AmazonEKS_CNI_Policy"

# aws iam attach-role-policy --role-name "KarpenterNodeRole-${CLUSTER_NAME}" \
# --policy-arn "arn:${AWS_PARTITION}:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"

# aws iam attach-role-policy --role-name "KarpenterNodeRole-${CLUSTER_NAME}" \
# --policy-arn "arn:${AWS_PARTITION}:iam::aws:policy/AmazonSSMManagedInstanceCore"


# aws iam create-role --role-name "KarpenterControllerRole-${CLUSTER_NAME}" \
# --assume-role-policy-document file://controller-trust-policy.json

# aws iam attach-role-policy --role-name "KarpenterControllerRole-${CLUSTER_NAME}" \
# --policy-name "KarpenterControllerPolicy-${CLUSTER_NAME}" \
# --policy-document file://controller-policy.json

helm template karpenter oci://public.ecr.aws/karpenter/karpenter --version "${KARPENTER_VERSION}" --namespace "${KARPENTER_NAMESPACE}" \
--set "settings.clusterName=${CLUSTER_NAME}" \
--set "serviceAccount.annotations.eks\.amazonaws\.com/role-arn=arn:${AWS_PARTITION}:iam::${AWS_ACCOUNT_ID}:role/KarpenterControllerRole-${CLUSTER_NAME}" \
--set controller.resources.requests.cpu=1 \
--set controller.resources.requests.memory=1Gi \
--set controller.resources.limits.cpu=1 \
--set controller.resources.limits.memory=1Gi > karpenter.yaml
Loading

0 comments on commit 5771f2d

Please sign in to comment.