Skip to content

Commit

Permalink
Refactoring terraform
Browse files Browse the repository at this point in the history
  • Loading branch information
namejsjeongkr committed Jan 5, 2025
1 parent 06c0bb1 commit 2d6580d
Show file tree
Hide file tree
Showing 13 changed files with 243 additions and 114 deletions.
7 changes: 7 additions & 0 deletions ai-ml/bionemo/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,10 @@ module "eks" {
}
}
}


data "aws_availability_zones" "available" {}

data "aws_eks_cluster_auth" "this" {
name = module.eks.cluster_name
}
27 changes: 27 additions & 0 deletions ai-ml/bionemo/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#---------------------------------------------------------------
# Local variables
#---------------------------------------------------------------
locals {
name = var.name
region = var.region

# Routable Private subnets only for Private NAT Gateway -> Transit Gateway -> Second VPC for overlapping CIDRs
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.0.0/24", "10.1.1.0/24"] => 256-2 = 254 usable IPs per subnet/AZ
private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k)]
# Routable Public subnets with NAT Gateway and Internet Gateway
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.2.0/26", "10.1.2.64/26"] => 64-2 = 62 usable IPs per subnet/AZ
public_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 5, k + 8)]

database_private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k + 5)]
# RFC6598 range 100.64.0.0/16 for EKS Data Plane for two subnets(32768 IPs per Subnet) across two AZs for EKS Control Plane ENI + Nodes + Pods
# e.g., var.secondary_cidr_blocks = "100.64.0.0/16" => output: ["100.64.0.0/17", "100.64.128.0/17"] => 32768-2 = 32766 usable IPs per subnet/AZ
secondary_ip_range_private_subnets = [for k, v in local.azs : cidrsubnet(element(var.secondary_cidr_blocks, 0), 1, k)]

vpc_cidr = var.vpc_cidr
azs = slice(data.aws_availability_zones.available.names, 0, 2)

tags = {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
}
}
32 changes: 32 additions & 0 deletions ai-ml/bionemo/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
provider "aws" {
region = local.region
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
token = data.aws_eks_cluster_auth.this.token
}

# ECR always authenticates with `us-east-1` region
# Docs -> https://docs.aws.amazon.com/AmazonECR/latest/public/public-registries.html
provider "aws" {
alias = "ecr"
region = "us-east-1"
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
token = data.aws_eks_cluster_auth.this.token
}
}

provider "kubectl" {
apply_retry_count = 10
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false
token = data.aws_eks_cluster_auth.this.token
}
14 changes: 0 additions & 14 deletions ai-ml/bionemo/vpc.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,3 @@
locals {
# Routable Private subnets only for Private NAT Gateway -> Transit Gateway -> Second VPC for overlapping CIDRs
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.0.0/24", "10.1.1.0/24"] => 256-2 = 254 usable IPs per subnet/AZ
private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k)]
# Routable Public subnets with NAT Gateway and Internet Gateway
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.2.0/26", "10.1.2.64/26"] => 64-2 = 62 usable IPs per subnet/AZ
public_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 5, k + 8)]

database_private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k + 5)]
# RFC6598 range 100.64.0.0/16 for EKS Data Plane for two subnets(32768 IPs per Subnet) across two AZs for EKS Control Plane ENI + Nodes + Pods
# e.g., var.secondary_cidr_blocks = "100.64.0.0/16" => output: ["100.64.0.0/17", "100.64.128.0/17"] => 32768-2 = 32766 usable IPs per subnet/AZ
secondary_ip_range_private_subnets = [for k, v in local.azs : cidrsubnet(element(var.secondary_cidr_blocks, 0), 1, k)]
}

#---------------------------------------------------------------
# VPC
#---------------------------------------------------------------
Expand Down
18 changes: 18 additions & 0 deletions ai-ml/ray/terraform/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#---------------------------------------------------------------
# Locals
#---------------------------------------------------------------
locals {
name = var.name
region = var.region

vpc_cidr = "10.0.0.0/16"
secondary_vpc_cidr = "100.64.0.0/16"
azs = slice(data.aws_availability_zones.available.names, 0, 3)

cluster_version = var.eks_cluster_version

tags = {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
}
}
74 changes: 0 additions & 74 deletions ai-ml/ray/terraform/main.tf
Original file line number Diff line number Diff line change
@@ -1,57 +1,3 @@
#---------------------------------------------------------------
# Providers
#---------------------------------------------------------------

provider "aws" {
region = local.region
}

# Used for Karpenter Helm chart
provider "aws" {
region = "us-east-1"
alias = "ecr_public_region"
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
}

provider "kubectl" {
apply_retry_count = 5
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

#---------------------------------------------------------------
# Data Sources
#---------------------------------------------------------------
Expand All @@ -63,26 +9,6 @@ data "aws_ecrpublic_authorization_token" "token" {
provider = aws.ecr_public_region
}

#---------------------------------------------------------------
# Locals
#---------------------------------------------------------------

locals {
name = var.name
region = var.region

vpc_cidr = "10.0.0.0/16"
secondary_vpc_cidr = "100.64.0.0/16"
azs = slice(data.aws_availability_zones.available.names, 0, 3)

cluster_version = var.eks_cluster_version

tags = {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
}
}

#---------------------------------------------------------------
# EKS Cluster
#---------------------------------------------------------------
Expand Down
53 changes: 53 additions & 0 deletions ai-ml/ray/terraform/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#---------------------------------------------------------------
# Providers
#---------------------------------------------------------------

provider "aws" {
region = local.region
}

# Used for Karpenter Helm chart
provider "aws" {
region = "us-east-1"
alias = "ecr_public_region"
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
}

provider "kubectl" {
apply_retry_count = 5
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
3 changes: 0 additions & 3 deletions ai-ml/ray/terraform/variables.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
variable "region" {
description = "Region"
type = string
default = "us-west-2"
}

variable "name" {
description = "Name of the VPC, EKS Cluster and Ray cluster"
default = "ray-cluster"
type = string
}

variable "eks_cluster_version" {
description = "EKS Cluster version"
default = "1.25"
type = string
}
14 changes: 14 additions & 0 deletions ai-ml/trainium-inferentia/eks.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
#---------------------------------------------------------------
# Data Sources
#---------------------------------------------------------------

data "aws_ecrpublic_authorization_token" "token" {
provider = aws.ecr
}

data "aws_caller_identity" "current" {}

data "aws_iam_session_context" "current" {
arn = data.aws_caller_identity.current.arn
}

#---------------------------------------------------------------
# EKS Cluster
#---------------------------------------------------------------
Expand Down
22 changes: 11 additions & 11 deletions ai-ml/trainium-inferentia/jupyterhub.tf
Original file line number Diff line number Diff line change
@@ -1,14 +1,3 @@
#-----------------------------------------------------------------------------------------
# JupyterHub Single User IRSA, maybe that block could be incorporated in add-on registry
#-----------------------------------------------------------------------------------------
resource "kubernetes_namespace_v1" "jupyterhub" {
count = var.enable_jupyterhub ? 1 : 0

metadata {
name = "jupyterhub"
}
}

module "jupyterhub_single_user_irsa" {
count = var.enable_jupyterhub ? 1 : 0

Expand All @@ -28,6 +17,17 @@ module "jupyterhub_single_user_irsa" {
}
}

#-----------------------------------------------------------------------------------------
# JupyterHub Single User IRSA, maybe that block could be incorporated in add-on registry
#-----------------------------------------------------------------------------------------
resource "kubernetes_namespace_v1" "jupyterhub" {
count = var.enable_jupyterhub ? 1 : 0

metadata {
name = "jupyterhub"
}
}

resource "kubernetes_service_account_v1" "jupyterhub_single_user_sa" {
count = var.enable_jupyterhub ? 1 : 0

Expand Down
33 changes: 33 additions & 0 deletions ai-ml/trainium-inferentia/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#---------------------------------------------------------------
# Locals
#---------------------------------------------------------------

locals {
name = var.name
region = var.region

# Routable Private subnets only for Private NAT Gateway -> Transit Gateway -> Second VPC for overlapping CIDRs
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.0.0/24", "10.1.1.0/24"] => 256-2 = 254 usable IPs per subnet/AZ
private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k)]
# Routable Public subnets with NAT Gateway and Internet Gateway
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.2.0/26", "10.1.2.64/26"] => 64-2 = 62 usable IPs per subnet/AZ
public_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 5, k + 8)]
# RFC6598 range 100.64.0.0/16 for EKS Data Plane for two subnets(32768 IPs per Subnet) across two AZs for EKS Control Plane ENI + Nodes + Pods
# e.g., var.secondary_cidr_blocks = "100.64.0.0/16" => output: ["100.64.0.0/17", "100.64.128.0/17"] => 32768-2 = 32766 usable IPs per subnet/AZ
secondary_ip_range_private_subnets = [for k, v in local.azs : cidrsubnet(element(var.secondary_cidr_blocks, 0), 1, k)]

# Trn1 and Inf2 instances are available in specific AZs in us-east-1,
# us-east-2, and us-west-2. For Trn1, the first AZ id (below) should be used.
az_mapping = {
"us-west-2" = ["usw2-az4", "usw2-az1"],
"us-east-1" = ["use1-az6", "use1-az5"],
"us-east-2" = ["use2-az3", "use2-az1"]
}

azs = local.az_mapping[var.region]

tags = {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
}
}
48 changes: 48 additions & 0 deletions ai-ml/trainium-inferentia/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
provider "aws" {
region = local.region
}

provider "aws" {
alias = "ecr"
region = "us-east-1"
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
}

provider "kubectl" {
apply_retry_count = 5
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
Loading

0 comments on commit 2d6580d

Please sign in to comment.