Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Refactoring terraform #720

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions ai-ml/bionemo/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,10 @@ module "eks" {
}
}
}


data "aws_availability_zones" "available" {}

data "aws_eks_cluster_auth" "this" {
name = module.eks.cluster_name
}
27 changes: 27 additions & 0 deletions ai-ml/bionemo/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#---------------------------------------------------------------
# Local variables
#---------------------------------------------------------------
locals {
name = var.name
region = var.region

# Routable Private subnets only for Private NAT Gateway -> Transit Gateway -> Second VPC for overlapping CIDRs
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.0.0/24", "10.1.1.0/24"] => 256-2 = 254 usable IPs per subnet/AZ
private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k)]
# Routable Public subnets with NAT Gateway and Internet Gateway
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.2.0/26", "10.1.2.64/26"] => 64-2 = 62 usable IPs per subnet/AZ
public_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 5, k + 8)]

database_private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k + 5)]
# RFC6598 range 100.64.0.0/16 for EKS Data Plane for two subnets(32768 IPs per Subnet) across two AZs for EKS Control Plane ENI + Nodes + Pods
# e.g., var.secondary_cidr_blocks = "100.64.0.0/16" => output: ["100.64.0.0/17", "100.64.128.0/17"] => 32768-2 = 32766 usable IPs per subnet/AZ
secondary_ip_range_private_subnets = [for k, v in local.azs : cidrsubnet(element(var.secondary_cidr_blocks, 0), 1, k)]

vpc_cidr = var.vpc_cidr
azs = slice(data.aws_availability_zones.available.names, 0, 2)

tags = {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
}
}
32 changes: 32 additions & 0 deletions ai-ml/bionemo/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
provider "aws" {
region = local.region
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
token = data.aws_eks_cluster_auth.this.token
}

# ECR always authenticates with `us-east-1` region
# Docs -> https://docs.aws.amazon.com/AmazonECR/latest/public/public-registries.html
provider "aws" {
alias = "ecr"
region = "us-east-1"
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
token = data.aws_eks_cluster_auth.this.token
}
}

provider "kubectl" {
apply_retry_count = 10
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false
token = data.aws_eks_cluster_auth.this.token
}
14 changes: 0 additions & 14 deletions ai-ml/bionemo/vpc.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,3 @@
locals {
# Routable Private subnets only for Private NAT Gateway -> Transit Gateway -> Second VPC for overlapping CIDRs
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.0.0/24", "10.1.1.0/24"] => 256-2 = 254 usable IPs per subnet/AZ
private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k)]
# Routable Public subnets with NAT Gateway and Internet Gateway
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.2.0/26", "10.1.2.64/26"] => 64-2 = 62 usable IPs per subnet/AZ
public_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 5, k + 8)]

database_private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k + 5)]
# RFC6598 range 100.64.0.0/16 for EKS Data Plane for two subnets(32768 IPs per Subnet) across two AZs for EKS Control Plane ENI + Nodes + Pods
# e.g., var.secondary_cidr_blocks = "100.64.0.0/16" => output: ["100.64.0.0/17", "100.64.128.0/17"] => 32768-2 = 32766 usable IPs per subnet/AZ
secondary_ip_range_private_subnets = [for k, v in local.azs : cidrsubnet(element(var.secondary_cidr_blocks, 0), 1, k)]
}

#---------------------------------------------------------------
# VPC
#---------------------------------------------------------------
Expand Down
18 changes: 18 additions & 0 deletions ai-ml/ray/terraform/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#---------------------------------------------------------------
# Locals
#---------------------------------------------------------------
locals {
name = var.name
region = var.region

vpc_cidr = "10.0.0.0/16"
secondary_vpc_cidr = "100.64.0.0/16"
azs = slice(data.aws_availability_zones.available.names, 0, 3)

cluster_version = var.eks_cluster_version

tags = {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
}
}
74 changes: 0 additions & 74 deletions ai-ml/ray/terraform/main.tf
Original file line number Diff line number Diff line change
@@ -1,57 +1,3 @@
#---------------------------------------------------------------
# Providers
#---------------------------------------------------------------

provider "aws" {
region = local.region
}

# Used for Karpenter Helm chart
provider "aws" {
region = "us-east-1"
alias = "ecr_public_region"
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
}

provider "kubectl" {
apply_retry_count = 5
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

#---------------------------------------------------------------
# Data Sources
#---------------------------------------------------------------
Expand All @@ -63,26 +9,6 @@ data "aws_ecrpublic_authorization_token" "token" {
provider = aws.ecr_public_region
}

#---------------------------------------------------------------
# Locals
#---------------------------------------------------------------

locals {
name = var.name
region = var.region

vpc_cidr = "10.0.0.0/16"
secondary_vpc_cidr = "100.64.0.0/16"
azs = slice(data.aws_availability_zones.available.names, 0, 3)

cluster_version = var.eks_cluster_version

tags = {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
}
}

#---------------------------------------------------------------
# EKS Cluster
#---------------------------------------------------------------
Expand Down
53 changes: 53 additions & 0 deletions ai-ml/ray/terraform/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#---------------------------------------------------------------
# Providers
#---------------------------------------------------------------

provider "aws" {
region = local.region
}

# Used for Karpenter Helm chart
provider "aws" {
region = "us-east-1"
alias = "ecr_public_region"
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
}

provider "kubectl" {
apply_retry_count = 5
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
3 changes: 0 additions & 3 deletions ai-ml/ray/terraform/variables.tf
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
variable "region" {
description = "Region"
type = string
default = "us-west-2"
}

variable "name" {
description = "Name of the VPC, EKS Cluster and Ray cluster"
default = "ray-cluster"
type = string
}

variable "eks_cluster_version" {
description = "EKS Cluster version"
default = "1.25"
type = string
}
14 changes: 14 additions & 0 deletions ai-ml/trainium-inferentia/eks.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
#---------------------------------------------------------------
# Data Sources
#---------------------------------------------------------------

data "aws_ecrpublic_authorization_token" "token" {
provider = aws.ecr
}

data "aws_caller_identity" "current" {}

data "aws_iam_session_context" "current" {
arn = data.aws_caller_identity.current.arn
}

#---------------------------------------------------------------
# EKS Cluster
#---------------------------------------------------------------
Expand Down
22 changes: 11 additions & 11 deletions ai-ml/trainium-inferentia/jupyterhub.tf
Original file line number Diff line number Diff line change
@@ -1,14 +1,3 @@
#-----------------------------------------------------------------------------------------
# JupyterHub Single User IRSA, maybe that block could be incorporated in add-on registry
#-----------------------------------------------------------------------------------------
resource "kubernetes_namespace_v1" "jupyterhub" {
count = var.enable_jupyterhub ? 1 : 0

metadata {
name = "jupyterhub"
}
}

module "jupyterhub_single_user_irsa" {
count = var.enable_jupyterhub ? 1 : 0

Expand All @@ -28,6 +17,17 @@ module "jupyterhub_single_user_irsa" {
}
}

#-----------------------------------------------------------------------------------------
# JupyterHub Single User IRSA, maybe that block could be incorporated in add-on registry
#-----------------------------------------------------------------------------------------
resource "kubernetes_namespace_v1" "jupyterhub" {
count = var.enable_jupyterhub ? 1 : 0

metadata {
name = "jupyterhub"
}
}

resource "kubernetes_service_account_v1" "jupyterhub_single_user_sa" {
count = var.enable_jupyterhub ? 1 : 0

Expand Down
33 changes: 33 additions & 0 deletions ai-ml/trainium-inferentia/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#---------------------------------------------------------------
# Locals
#---------------------------------------------------------------

locals {
name = var.name
region = var.region

# Routable Private subnets only for Private NAT Gateway -> Transit Gateway -> Second VPC for overlapping CIDRs
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.0.0/24", "10.1.1.0/24"] => 256-2 = 254 usable IPs per subnet/AZ
private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k)]
# Routable Public subnets with NAT Gateway and Internet Gateway
# e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.2.0/26", "10.1.2.64/26"] => 64-2 = 62 usable IPs per subnet/AZ
public_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 5, k + 8)]
# RFC6598 range 100.64.0.0/16 for EKS Data Plane for two subnets(32768 IPs per Subnet) across two AZs for EKS Control Plane ENI + Nodes + Pods
# e.g., var.secondary_cidr_blocks = "100.64.0.0/16" => output: ["100.64.0.0/17", "100.64.128.0/17"] => 32768-2 = 32766 usable IPs per subnet/AZ
secondary_ip_range_private_subnets = [for k, v in local.azs : cidrsubnet(element(var.secondary_cidr_blocks, 0), 1, k)]

# Trn1 and Inf2 instances are available in specific AZs in us-east-1,
# us-east-2, and us-west-2. For Trn1, the first AZ id (below) should be used.
az_mapping = {
"us-west-2" = ["usw2-az4", "usw2-az1"],
"us-east-1" = ["use1-az6", "use1-az5"],
"us-east-2" = ["use2-az3", "use2-az1"]
}

azs = local.az_mapping[var.region]

tags = {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
}
}
48 changes: 48 additions & 0 deletions ai-ml/trainium-inferentia/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
provider "aws" {
region = local.region
}

provider "aws" {
alias = "ecr"
region = "us-east-1"
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
}

provider "kubectl" {
apply_retry_count = 5
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
Loading
Loading