From bcfb2d4cdd231cf886a128537e75698aa9908751 Mon Sep 17 00:00:00 2001 From: Aarushi Date: Thu, 25 Jul 2024 22:22:40 +0100 Subject: [PATCH] delay and timeouts for probes --- .../autogpt_server/util/service.py | 6 +-- rnd/infra/helm/autogpt-server/Chart.yaml | 2 +- .../helm/autogpt-server/templates/NOTES.txt | 8 ++-- .../autogpt-server/templates/service.yaml | 4 ++ rnd/infra/helm/autogpt-server/values.dev.yaml | 39 +++++++++++++++---- rnd/infra/helm/autogpt-server/values.yaml | 13 ++++++- rnd/infra/terraform/environments/dev.tfvars | 9 +++++ rnd/infra/terraform/main.tf | 12 +++--- .../terraform/modules/gke_cluster/main.tf | 5 +++ .../terraform/modules/networking/main.tf | 10 +++++ .../terraform/modules/networking/variables.tf | 8 ++++ rnd/infra/terraform/modules/static_ip/main.tf | 3 +- .../terraform/modules/static_ip/outputs.tf | 4 +- rnd/infra/terraform/variables.tf | 12 ++++++ 14 files changed, 109 insertions(+), 26 deletions(-) diff --git a/rnd/autogpt_server/autogpt_server/util/service.py b/rnd/autogpt_server/autogpt_server/util/service.py index ff55135a6a..83da51e2dd 100644 --- a/rnd/autogpt_server/autogpt_server/util/service.py +++ b/rnd/autogpt_server/autogpt_server/util/service.py @@ -33,7 +33,7 @@ class PyroNameServer(AppProcess): def run(self): try: print("Starting NameServer loop") - nameserver.start_ns_loop() + nameserver.start_ns_loop(host="0.0.0.0", port=9090) except KeyboardInterrupt: print("Shutting down NameServer") @@ -77,8 +77,8 @@ class AppService(AppProcess): @conn_retry def __start_pyro(self): - daemon = pyro.Daemon() - ns = pyro.locate_ns() + daemon = pyro.Daemon(host="0.0.0.0") + ns = pyro.locate_ns(host="0.0.0.0", port=9090) uri = daemon.register(self) ns.register(self.service_name, uri) logger.warning(f"Service [{self.service_name}] Ready. Object URI = {uri}") diff --git a/rnd/infra/helm/autogpt-server/Chart.yaml b/rnd/infra/helm/autogpt-server/Chart.yaml index 57ccbd6efe..5dd24327d4 100644 --- a/rnd/infra/helm/autogpt-server/Chart.yaml +++ b/rnd/infra/helm/autogpt-server/Chart.yaml @@ -7,4 +7,4 @@ type: application version: 0.1.0 -appVersion: "1.16.0" +appVersion: "1.0.0" diff --git a/rnd/infra/helm/autogpt-server/templates/NOTES.txt b/rnd/infra/helm/autogpt-server/templates/NOTES.txt index 01105181f4..2b660011ad 100644 --- a/rnd/infra/helm/autogpt-server/templates/NOTES.txt +++ b/rnd/infra/helm/autogpt-server/templates/NOTES.txt @@ -6,16 +6,16 @@ {{- end }} {{- end }} {{- else if contains "NodePort" .Values.service.type }} - export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "autogpt_server.fullname" . }}) + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "autogpt-server.fullname" . }}) export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") echo http://$NODE_IP:$NODE_PORT {{- else if contains "LoadBalancer" .Values.service.type }} NOTE: It may take a few minutes for the LoadBalancer IP to be available. - You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "autogpt_server.fullname" . }}' - export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "autogpt_server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "autogpt-server.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "autogpt-server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") echo http://$SERVICE_IP:{{ .Values.service.port }} {{- else if contains "ClusterIP" .Values.service.type }} - export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "autogpt_server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "autogpt-server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") echo "Visit http://127.0.0.1:8080 to use your application" kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT diff --git a/rnd/infra/helm/autogpt-server/templates/service.yaml b/rnd/infra/helm/autogpt-server/templates/service.yaml index fd2f813bc1..53d970dbc9 100644 --- a/rnd/infra/helm/autogpt-server/templates/service.yaml +++ b/rnd/infra/helm/autogpt-server/templates/service.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "autogpt-server.fullname" . }} labels: {{- include "autogpt-server.labels" . | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} spec: type: {{ .Values.service.type }} ports: diff --git a/rnd/infra/helm/autogpt-server/values.dev.yaml b/rnd/infra/helm/autogpt-server/values.dev.yaml index 72549936c0..fa78f1d658 100644 --- a/rnd/infra/helm/autogpt-server/values.dev.yaml +++ b/rnd/infra/helm/autogpt-server/values.dev.yaml @@ -2,7 +2,7 @@ image: repository: us-east1-docker.pkg.dev/agpt-dev/agpt-server-dev/agpt-server-dev - pullPolicy: IfNotPresent + pullPolicy: Always tag: "latest" serviceAccount: @@ -11,9 +11,11 @@ serviceAccount: name: "dev-agpt-server-sa" service: - type: LoadBalancer - port: 80 + type: ClusterIP + port: 8000 targetPort: 8000 + annotations: + cloud.google.com/neg: '{"ingress": true}' ingress: enabled: true @@ -22,12 +24,16 @@ ingress: kubernetes.io/ingress.class: gce kubernetes.io/ingress.global-static-ip-name: "agpt-dev-agpt-server-ip" networking.gke.io/managed-certificates: "autogpt-server-cert" + kubernetes.io/ingress.allow-http: "true" hosts: - - host: server.agpt.co + - host: dev-server.agpt.co paths: - - path: /* - pathType: ImplementationSpecific - tls: [] + - path: / + pathType: Prefix + backend: + service: + name: autogpt-server + port: 8000 resources: requests: @@ -37,7 +43,24 @@ resources: cpu: 500m memory: 512Mi -domain: "server.agpt.co" +livenessProbe: + httpGet: + path: /docs + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 +readinessProbe: + httpGet: + path: /docs + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + +domain: "dev-server.agpt.co" cloudSqlProxy: image: diff --git a/rnd/infra/helm/autogpt-server/values.yaml b/rnd/infra/helm/autogpt-server/values.yaml index 80797575df..3bdfd8515a 100644 --- a/rnd/infra/helm/autogpt-server/values.yaml +++ b/rnd/infra/helm/autogpt-server/values.yaml @@ -1,6 +1,6 @@ # base values, environment specific variables should be specified/overwritten in environment values -replicaCount: 2 +replicaCount: 1 image: repository: us-east1-docker.pkg.dev/agpt-dev/agpt-server-dev/agpt-server-dev @@ -74,3 +74,14 @@ tolerations: [] affinity: {} domain: "" + +cloudSqlProxy: + image: + repository: gcr.io/cloud-sql-connectors/cloud-sql-proxy + tag: 2.11.4 + instanceConnectionName: "" + port: 5432 + resources: + requests: + memory: "2Gi" + cpu: "1" diff --git a/rnd/infra/terraform/environments/dev.tfvars b/rnd/infra/terraform/environments/dev.tfvars index 7113a50d58..c66283019d 100644 --- a/rnd/infra/terraform/environments/dev.tfvars +++ b/rnd/infra/terraform/environments/dev.tfvars @@ -43,4 +43,13 @@ role_bindings = { "roles/iam.workloadIdentityUser" = [ "serviceAccount:dev-agpt-server-sa@agpt-dev.iam.gserviceaccount.com" ] + "roles/compute.networkUser" = [ + "serviceAccount:dev-agpt-server-sa@agpt-dev.iam.gserviceaccount.com" + ], + "roles/container.hostServiceAgentUser" = [ + "serviceAccount:dev-agpt-server-sa@agpt-dev.iam.gserviceaccount.com" + ] } + +pods_ip_cidr_range = "10.1.0.0/16" +services_ip_cidr_range = "10.2.0.0/20" \ No newline at end of file diff --git a/rnd/infra/terraform/main.tf b/rnd/infra/terraform/main.tf index 57de51eb63..495130b6a0 100644 --- a/rnd/infra/terraform/main.tf +++ b/rnd/infra/terraform/main.tf @@ -30,11 +30,13 @@ module "static_ips" { module "networking" { source = "./modules/networking" - project_id = var.project_id - region = var.region - network_name = var.network_name - subnet_name = var.subnet_name - subnet_cidr = var.subnet_cidr + project_id = var.project_id + region = var.region + network_name = var.network_name + subnet_name = var.subnet_name + subnet_cidr = var.subnet_cidr + pods_ip_cidr_range = var.pods_ip_cidr_range + services_ip_cidr_range = var.services_ip_cidr_range } module "gke_cluster" { diff --git a/rnd/infra/terraform/modules/gke_cluster/main.tf b/rnd/infra/terraform/modules/gke_cluster/main.tf index d30946848e..66d1b2b76d 100644 --- a/rnd/infra/terraform/modules/gke_cluster/main.tf +++ b/rnd/infra/terraform/modules/gke_cluster/main.tf @@ -26,5 +26,10 @@ resource "google_container_cluster" "primary" { network = var.network subnetwork = var.subnetwork + + ip_allocation_policy { + cluster_secondary_range_name = "pods" + services_secondary_range_name = "services" + } } diff --git a/rnd/infra/terraform/modules/networking/main.tf b/rnd/infra/terraform/modules/networking/main.tf index 64aac3a77d..823fd6b89e 100644 --- a/rnd/infra/terraform/modules/networking/main.tf +++ b/rnd/infra/terraform/modules/networking/main.tf @@ -8,5 +8,15 @@ resource "google_compute_subnetwork" "subnet" { ip_cidr_range = var.subnet_cidr region = var.region network = google_compute_network.vpc_network.self_link + + secondary_ip_range { + range_name = "pods" + ip_cidr_range = var.pods_ip_cidr_range + } + + secondary_ip_range { + range_name = "services" + ip_cidr_range = var.services_ip_cidr_range + } } diff --git a/rnd/infra/terraform/modules/networking/variables.tf b/rnd/infra/terraform/modules/networking/variables.tf index 3989bddff0..aa4d409f02 100644 --- a/rnd/infra/terraform/modules/networking/variables.tf +++ b/rnd/infra/terraform/modules/networking/variables.tf @@ -18,4 +18,12 @@ variable "subnet_cidr" { description = "The CIDR range for the subnet" } +variable "pods_ip_cidr_range" { + description = "The IP address range for pods" + default = "10.1.0.0/16" +} +variable "services_ip_cidr_range" { + description = "The IP address range for services" + default = "10.2.0.0/20" +} diff --git a/rnd/infra/terraform/modules/static_ip/main.tf b/rnd/infra/terraform/modules/static_ip/main.tf index 4b6d073541..877b4bf30e 100644 --- a/rnd/infra/terraform/modules/static_ip/main.tf +++ b/rnd/infra/terraform/modules/static_ip/main.tf @@ -1,6 +1,5 @@ -resource "google_compute_address" "static_ip" { +resource "google_compute_global_address" "static_ip" { count = length(var.ip_names) name = "${var.project_id}-${var.ip_names[count.index]}" - region = var.region address_type = "EXTERNAL" } \ No newline at end of file diff --git a/rnd/infra/terraform/modules/static_ip/outputs.tf b/rnd/infra/terraform/modules/static_ip/outputs.tf index 2b03fe3623..27a9aa5804 100644 --- a/rnd/infra/terraform/modules/static_ip/outputs.tf +++ b/rnd/infra/terraform/modules/static_ip/outputs.tf @@ -1,9 +1,9 @@ output "ip_addresses" { description = "Map of created static IP addresses" - value = { for i, ip in google_compute_address.static_ip : var.ip_names[i] => ip.address } + value = { for i, ip in google_compute_global_address.static_ip : var.ip_names[i] => ip.address } } output "ip_names" { description = "List of full names of the created static IP addresses" - value = google_compute_address.static_ip[*].name + value = google_compute_global_address.static_ip[*].name } \ No newline at end of file diff --git a/rnd/infra/terraform/variables.tf b/rnd/infra/terraform/variables.tf index 80a0b77e7d..76da68cce5 100644 --- a/rnd/infra/terraform/variables.tf +++ b/rnd/infra/terraform/variables.tf @@ -99,3 +99,15 @@ variable "role_bindings" { type = map(list(string)) default = {} } + +variable "pods_ip_cidr_range" { + description = "The IP address range for pods" + type = string + default = "10.1.0.0/16" +} + +variable "services_ip_cidr_range" { + description = "The IP address range for services" + type = string + default = "10.2.0.0/20" +} \ No newline at end of file