diff --git a/.github/workflows/run-backend-bdd-tests.yml b/.github/workflows/run-backend-bdd-tests.yml index 7b54aa44e8..d789f7c1d8 100644 --- a/.github/workflows/run-backend-bdd-tests.yml +++ b/.github/workflows/run-backend-bdd-tests.yml @@ -51,6 +51,9 @@ jobs: echo "ACME_DEVELOPMENT_MODE=true" >> .env echo "ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES={\"localhost\": \"host.docker.internal:8087\", \"infisical.com\": \"host.docker.internal:8087\", \"example.com\": \"host.docker.internal:8087\"}" >> .env echo "BDD_NOCK_API_ENABLED=true" >> .env + # use Technitium DNS server for BDD tests + echo "ACME_DNS_RESOLVE_RESOLVER_SERVERS_HOST_ENABLED=true" >> .env + echo "ACME_DNS_RESOLVER_SERVERS=technitium" >> .env # Skip upstream validation, otherwise the ACME client for the upstream will try to # validate the DNS records, which will fail because the DNS records are not actually created. echo "ACME_SKIP_UPSTREAM_VALIDATION=true" >> .env diff --git a/backend/bdd/features/environment.py b/backend/bdd/features/environment.py index 52fda3ecab..8ed1410e01 100644 --- a/backend/bdd/features/environment.py +++ b/backend/bdd/features/environment.py @@ -23,6 +23,9 @@ CERT_CA_ID = os.environ.get("CERT_CA_ID") CERT_TEMPLATE_ID = os.environ.get("CERT_TEMPLATE_ID") AUTH_TOKEN = os.environ.get("INFISICAL_TOKEN") BOOTSTRAP_INFISICAL = int(os.environ.get("BOOTSTRAP_INFISICAL", 0)) +TECHNITIUM_URL = os.environ.get("TECHNITIUM_URL", "http://localhost:5380") +TECHNITIUM_USER = os.environ.get("TECHNITIUM_USER", "admin") +TECHNITIUM_PASSWORD = os.environ.get("TECHNITIUM_PASSWORD", "infisical") # Called mostly from a CI to setup the new Infisical instance to get it ready for BDD tests @@ -188,6 +191,9 @@ def before_all(context: Context): base_vars = { "BASE_URL": BASE_URL, "PEBBLE_URL": PEBBLE_URL, + "TECHNITIUM_URL": TECHNITIUM_URL, + "TECHNITIUM_USER": TECHNITIUM_USER, + "TECHNITIUM_PASSWORD": TECHNITIUM_PASSWORD, } if BOOTSTRAP_INFISICAL: details = bootstrap_infisical(context) @@ -206,6 +212,7 @@ def before_all(context: Context): } context._initial_vars = vars context.http_client = httpx.Client(base_url=BASE_URL) + context.technitium_http_client = httpx.Client(base_url=TECHNITIUM_URL) def before_scenario(context: Context, scenario: typing.Any): diff --git a/backend/bdd/features/pki/acme/auth.feature b/backend/bdd/features/pki/acme/auth.feature index 757a182c87..ef087ee074 100644 --- a/backend/bdd/features/pki/acme/auth.feature +++ b/backend/bdd/features/pki/acme/auth.feature @@ -19,13 +19,17 @@ Feature: Authorization And the value order.authorizations[0].body with jq ".challenges | map(pick(.type, .status)) | sort_by(.type)" should be equal to json """ [ + { + "type": "dns-01", + "status": "pending" + }, { "type": "http-01", "status": "pending" } ] """ - And the value order.authorizations[0].body with jq ".challenges | map(.status) | sort" should be equal to ["pending"] + And the value order.authorizations[0].body with jq ".challenges | map(.status) | sort" should be equal to ["pending", "pending"] And the value order.authorizations[0].body with jq ".identifier" should be equal to json """ { diff --git a/backend/bdd/features/pki/acme/challenge.feature b/backend/bdd/features/pki/acme/challenge.feature index d02eabe817..0d673c567c 100644 --- a/backend/bdd/features/pki/acme/challenge.feature +++ b/backend/bdd/features/pki/acme/challenge.feature @@ -1,6 +1,6 @@ Feature: Challenge - Scenario: Validate challenge + Scenario: Validate challenge with HTTP-01 Given I have an ACME cert profile as "acme_profile" When I have an ACME client connecting to "{BASE_URL}/api/v1/cert-manager/acme/profiles/{acme_profile.id}/directory" Then I register a new ACME account with email fangpen@infisical.com and EAB key id "{acme_profile.eab_kid}" with secret "{acme_profile.eab_secret}" as acme_account @@ -22,6 +22,28 @@ Feature: Challenge And I parse the full-chain certificate from order finalized_order as cert And the value cert with jq ".subject.common_name" should be equal to "localhost" + Scenario: Validate challenge with DNS-01 + Given I have an ACME cert profile as "acme_profile" + When I have an ACME client connecting to "{BASE_URL}/api/v1/cert-manager/acme/profiles/{acme_profile.id}/directory" + Then I register a new ACME account with email fangpen@infisical.com and EAB key id "{acme_profile.eab_kid}" with secret "{acme_profile.eab_secret}" as acme_account + When I create certificate signing request as csr + Then I add names to certificate signing request csr + """ + { + "COMMON_NAME": "example.com" + } + """ + And I create a RSA private key pair as cert_key + And I sign the certificate signing request csr with private key cert_key and output it as csr_pem in PEM format + And I submit the certificate signing request PEM csr_pem certificate order to the ACME server as order + And I select challenge with type dns-01 for domain example.com from order in order as challenge + Then I add domain example.com challenge response DNS records for challenge + And I tell ACME server that challenge is ready to be verified + And I poll and finalize the ACME order order as finalized_order + And the value finalized_order.body with jq ".status" should be equal to "valid" + And I parse the full-chain certificate from order finalized_order as cert + And the value cert with jq ".subject.common_name" should be equal to "example.com" + Scenario: Validate challenge with retry Given I have an ACME cert profile as "acme_profile" When I have an ACME client connecting to "{BASE_URL}/api/v1/cert-manager/acme/profiles/{acme_profile.id}/directory" diff --git a/backend/bdd/features/steps/pki_acme.py b/backend/bdd/features/steps/pki_acme.py index 1ce839638f..a847cbadba 100644 --- a/backend/bdd/features/steps/pki_acme.py +++ b/backend/bdd/features/steps/pki_acme.py @@ -266,9 +266,7 @@ def step_impl(context: Context, ca_id: str, template_id: str, profile_var: str): ) -@given( - 'I create an ACME profile with config as "{profile_var}"' -) +@given('I create an ACME profile with config as "{profile_var}"') def step_impl(context: Context, profile_var: str): profile_slug = faker.slug() jwt_token = context.vars["AUTH_TOKEN"] @@ -1030,6 +1028,58 @@ def step_impl(context: Context, var_path: str, hostname: str): serve_challenges(context=context, challenges=[challenge]) +@then("I add domain {domain} challenge response DNS records for {var_path}") +def step_impl(context: Context, domain: str, var_path: str): + client = context.technitium_http_client + challenge = eval_var(context, var_path, as_json=False) + + zone = domain + domain = f"{challenge.chall.LABEL}.{domain}" + value = challenge.chall.validation(context.acme_client.net.key) + + resp = client.post( + "/api/user/login", + data={ + "user": context.vars["TECHNITIUM_USER"], + "pass": context.vars["TECHNITIUM_PASSWORD"], + }, + ) + resp.raise_for_status() + + token = resp.json()["token"] + resp = client.post( + "/api/zones/create", + params=dict( + token=token, + zone=zone, + type="Primary", + ), + ) + resp.raise_for_status() + error_msg = resp.json().get("errorMessage") + if error_msg is not None and not error_msg.startswith("Zone already exists:"): + raise RuntimeError(f"Unexpected error while creating zone {zone}: {error_msg}") + + resp = client.post( + "/api/zones/records/add", + params=dict( + token=token, + zone=zone, + domain=domain, + type="TXT", + text=value, + ), + ) + resp.raise_for_status() + error_msg = resp.json().get("errorMessage") + if error_msg is not None and not error_msg.startswith( + "Cannot add record: record already exists" + ): + raise RuntimeError( + f"Unexpected error while creating TXT record {domain} for zone {zone}: {error_msg}" + ) + + @then("I tell ACME server that {var_path} is ready to be verified") def step_impl(context: Context, var_path: str): challenge = eval_var(context, var_path, as_json=False) diff --git a/backend/src/ee/services/pki-acme/pki-acme-challenge-service.ts b/backend/src/ee/services/pki-acme/pki-acme-challenge-service.ts index 06ad692365..510d05c044 100644 --- a/backend/src/ee/services/pki-acme/pki-acme-challenge-service.ts +++ b/backend/src/ee/services/pki-acme/pki-acme-challenge-service.ts @@ -1,7 +1,10 @@ +import { resolve4, Resolver } from "node:dns/promises"; + import axios, { AxiosError } from "axios"; import { TPkiAcmeChallenges } from "@app/db/schemas/pki-acme-challenges"; import { getConfig } from "@app/lib/config/env"; +import { crypto } from "@app/lib/crypto/cryptography"; import { BadRequestError, NotFoundError } from "@app/lib/errors"; import { isPrivateIp } from "@app/lib/ip/ipRange"; import { logger } from "@app/lib/logger"; @@ -17,6 +20,7 @@ import { } from "./pki-acme-errors"; import { AcmeAuthStatus, AcmeChallengeStatus, AcmeChallengeType } from "./pki-acme-schemas"; import { TPkiAcmeChallengeServiceFactory } from "./pki-acme-types"; +import { isValidIp } from "@app/lib/ip"; type TPkiAcmeChallengeServiceFactoryDep = { acmeChallengeDAL: Pick< @@ -35,6 +39,9 @@ export const pkiAcmeChallengeServiceFactory = ({ auditLogService }: TPkiAcmeChallengeServiceFactoryDep): TPkiAcmeChallengeServiceFactory => { const appCfg = getConfig(); + + type ChallengeWithAuth = NonNullable>>; + const markChallengeAsReady = async (challengeId: string): Promise => { return acmeChallengeDAL.transaction(async (tx) => { logger.info({ challengeId }, "Validating ACME challenge response"); @@ -55,20 +62,163 @@ export const pkiAcmeChallengeServiceFactory = ({ message: `ACME auth status is ${challenge.auth.status} instead of ${AcmeAuthStatus.Pending}` }); } - - // TODO: support other challenge types here. Currently only HTTP-01 is supported - if (challenge.type !== AcmeChallengeType.HTTP_01) { - throw new BadRequestError({ message: "Only HTTP-01 challenges are supported for now" }); - } const host = challenge.auth.identifierValue; // check if host is a private ip address if (isPrivateIp(host)) { throw new BadRequestError({ message: "Private IP addresses are not allowed" }); } + if (challenge.type !== AcmeChallengeType.HTTP_01 && challenge.type !== AcmeChallengeType.DNS_01) { + throw new BadRequestError({ message: "Only HTTP-01 or DNS-01 challenges are supported for now" }); + } return acmeChallengeDAL.updateById(challengeId, { status: AcmeChallengeStatus.Processing }, tx); }); }; + const validateHttp01Challenge = async (challenge: ChallengeWithAuth): Promise => { + let host = challenge.auth.identifierValue; + if (appCfg.isAcmeDevelopmentMode && appCfg.ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES[host]) { + host = appCfg.ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES[host]; + logger.warn( + { srcHost: challenge.auth.identifierValue, dstHost: host }, + "Using ACME development HTTP-01 challenge host override" + ); + } + const challengeUrl = new URL(`/.well-known/acme-challenge/${challenge.auth.token}`, `http://${host}`); + logger.info({ challengeUrl }, "Performing ACME HTTP-01 challenge validation"); + + // TODO: read config from the profile to get the timeout instead + const timeoutMs = 10 * 1000; // 10 seconds + // Notice: well, we are in a transaction, ideally we should not hold transaction and perform + // a long running operation for long time. But assuming we are not performing a tons of + // challenge validation at the same time, it should be fine. + const challengeResponse = await axios.get(challengeUrl.toString(), { + // In case if we override the host in the development mode, still provide the original host in the header + // to help the upstream server to validate the request + headers: { Host: challenge.auth.identifierValue }, + timeout: timeoutMs, + responseType: "text", + validateStatus: () => true + }); + + if (challengeResponse.status !== 200) { + throw new AcmeIncorrectResponseError({ + message: `ACME challenge response is not 200: ${challengeResponse.status}` + }); + } + + const challengeResponseBody: string = challengeResponse.data; + const thumbprint = challenge.auth.account.publicKeyThumbprint; + const expectedChallengeResponseBody = `${challenge.auth.token}.${thumbprint}`; + + if (challengeResponseBody.trimEnd() !== expectedChallengeResponseBody) { + throw new AcmeIncorrectResponseError({ message: "ACME HTTP-01 challenge response is not correct" }); + } + }; + + const validateDns01Challenge = async (challenge: ChallengeWithAuth): Promise => { + const resolver = new Resolver(); + if (appCfg.ACME_DNS_RESOLVER_SERVERS.length > 0) { + const servers = appCfg.ACME_DNS_RESOLVE_RESOLVER_SERVERS_HOST_ENABLED + ? await Promise.all( + appCfg.ACME_DNS_RESOLVER_SERVERS.map(async (server) => { + if (isValidIp(server)) { + return server; + } + const ips = await resolve4(server); + return ips[0]; + }) + ) + : appCfg.ACME_DNS_RESOLVER_SERVERS; + resolver.setServers(servers); + } + + const recordName = `_acme-challenge.${challenge.auth.identifierValue}`; + const records = await resolver.resolveTxt(recordName); + const recordValues = records.map((chunks) => chunks.join("")); + + const thumbprint = challenge.auth.account.publicKeyThumbprint; + const keyAuthorization = `${challenge.auth.token}.${thumbprint}`; + const digest = crypto.nativeCrypto.createHash("sha256").update(keyAuthorization).digest(); + const expectedChallengeResponseValue = Buffer.from(digest).toString("base64url"); + + if (!recordValues.some((recordValue) => recordValue.trim() === expectedChallengeResponseValue)) { + throw new AcmeIncorrectResponseError({ message: "ACME DNS-01 challenge response is not correct" }); + } + }; + + const handleChallengeValidationError = async ( + exp: unknown, + challenge: ChallengeWithAuth, + challengeId: string, + retryCount: number + ): Promise => { + let finalAttempt = false; + if (retryCount >= 2) { + logger.error( + exp, + `Last attempt to validate ACME challenge response failed, marking ${challengeId} challenge as invalid` + ); + // This is the last attempt to validate the challenge response, if it fails, we mark the challenge as invalid + await acmeChallengeDAL.markAsInvalidCascadeById(challengeId); + finalAttempt = true; + } + + try { + // Properly type and inspect the error + if (axios.isAxiosError(exp)) { + const axiosError = exp as AxiosError; + const errorCode = axiosError.code; + const errorMessage = axiosError.message; + + if (errorCode === "ECONNREFUSED" || errorMessage.includes("ECONNREFUSED")) { + throw new AcmeConnectionError({ message: "Connection refused" }); + } + if (errorCode === "ENOTFOUND" || errorMessage.includes("ENOTFOUND")) { + throw new AcmeDnsFailureError({ message: "Hostname could not be resolved (DNS failure)" }); + } + if (errorCode === "ECONNRESET" || errorMessage.includes("ECONNRESET")) { + throw new AcmeConnectionError({ message: "Connection reset by peer" }); + } + if (errorCode === "ECONNABORTED" || errorMessage.includes("timeout")) { + logger.error(exp, "Connection timed out while validating ACME challenge response"); + throw new AcmeConnectionError({ message: "Connection timed out" }); + } + logger.error(exp, "Unknown error validating ACME challenge response"); + throw new AcmeServerInternalError({ message: "Unknown error validating ACME challenge response" }); + } + if (exp instanceof Error) { + if ((exp as unknown as { code?: string })?.code === "ENOTFOUND") { + throw new AcmeDnsFailureError({ message: "Hostname could not be resolved (DNS failure)" }); + } + logger.error(exp, "Error validating ACME challenge response"); + throw exp; + } + logger.error(exp, "Unknown error validating ACME challenge response"); + throw new AcmeServerInternalError({ message: "Unknown error validating ACME challenge response" }); + } catch (outterExp) { + await auditLogService.createAuditLog({ + projectId: challenge.auth.account.project.id, + actor: { + type: ActorType.ACME_ACCOUNT, + metadata: { + profileId: challenge.auth.account.profileId, + accountId: challenge.auth.account.id + } + }, + event: { + type: finalAttempt ? EventType.FAIL_ACME_CHALLENGE : EventType.ATTEMPT_ACME_CHALLENGE, + metadata: { + challengeId, + type: challenge.type as AcmeChallengeType, + retryCount, + errorMessage: exp instanceof Error ? exp.message : "Unknown error" + } + } + }); + throw outterExp; + } + }; + const validateChallengeResponse = async (challengeId: string, retryCount: number): Promise => { logger.info({ challengeId, retryCount }, "Validating ACME challenge response"); const challenge = await acmeChallengeDAL.findByIdForChallengeValidation(challengeId); @@ -80,41 +230,16 @@ export const pkiAcmeChallengeServiceFactory = ({ message: `ACME challenge is ${challenge.status} instead of ${AcmeChallengeStatus.Processing}` }); } - let host = challenge.auth.identifierValue; - if (appCfg.isAcmeDevelopmentMode && appCfg.ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES[host]) { - host = appCfg.ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES[host]; - logger.warn( - { srcHost: challenge.auth.identifierValue, dstHost: host }, - "Using ACME development HTTP-01 challenge host override" - ); - } - const challengeUrl = new URL(`/.well-known/acme-challenge/${challenge.auth.token}`, `http://${host}`); - logger.info({ challengeUrl }, "Performing ACME HTTP-01 challenge validation"); + try { - // TODO: read config from the profile to get the timeout instead - const timeoutMs = 10 * 1000; // 10 seconds - // Notice: well, we are in a transaction, ideally we should not hold transaction and perform - // a long running operation for long time. But assuming we are not performing a tons of - // challenge validation at the same time, it should be fine. - const challengeResponse = await axios.get(challengeUrl.toString(), { - // In case if we override the host in the development mode, still provide the original host in the header - // to help the upstream server to validate the request - headers: { Host: challenge.auth.identifierValue }, - timeout: timeoutMs, - responseType: "text", - validateStatus: () => true - }); - if (challengeResponse.status !== 200) { - throw new AcmeIncorrectResponseError({ - message: `ACME challenge response is not 200: ${challengeResponse.status}` - }); - } - const challengeResponseBody: string = challengeResponse.data; - const thumbprint = challenge.auth.account.publicKeyThumbprint; - const expectedChallengeResponseBody = `${challenge.auth.token}.${thumbprint}`; - if (challengeResponseBody.trimEnd() !== expectedChallengeResponseBody) { - throw new AcmeIncorrectResponseError({ message: "ACME challenge response is not correct" }); + if (challenge.type === AcmeChallengeType.HTTP_01) { + await validateHttp01Challenge(challenge); + } else if (challenge.type === AcmeChallengeType.DNS_01) { + await validateDns01Challenge(challenge); + } else { + throw new BadRequestError({ message: `Unsupported challenge type: ${challenge.type}` }); } + logger.info({ challengeId }, "ACME challenge response is correct, marking challenge as valid"); await acmeChallengeDAL.markAsValidCascadeById(challengeId); await auditLogService.createAuditLog({ @@ -135,67 +260,7 @@ export const pkiAcmeChallengeServiceFactory = ({ } }); } catch (exp) { - let finalAttempt = false; - if (retryCount >= 2) { - logger.error( - exp, - `Last attempt to validate ACME challenge response failed, marking ${challengeId} challenge as invalid` - ); - // This is the last attempt to validate the challenge response, if it fails, we mark the challenge as invalid - await acmeChallengeDAL.markAsInvalidCascadeById(challengeId); - finalAttempt = true; - } - try { - // Properly type and inspect the error - if (axios.isAxiosError(exp)) { - const axiosError = exp as AxiosError; - const errorCode = axiosError.code; - const errorMessage = axiosError.message; - - if (errorCode === "ECONNREFUSED" || errorMessage.includes("ECONNREFUSED")) { - throw new AcmeConnectionError({ message: "Connection refused" }); - } - if (errorCode === "ENOTFOUND" || errorMessage.includes("ENOTFOUND")) { - throw new AcmeDnsFailureError({ message: "Hostname could not be resolved (DNS failure)" }); - } - if (errorCode === "ECONNRESET" || errorMessage.includes("ECONNRESET")) { - throw new AcmeConnectionError({ message: "Connection reset by peer" }); - } - if (errorCode === "ECONNABORTED" || errorMessage.includes("timeout")) { - logger.error(exp, "Connection timed out while validating ACME challenge response"); - throw new AcmeConnectionError({ message: "Connection timed out" }); - } - logger.error(exp, "Unknown error validating ACME challenge response"); - throw new AcmeServerInternalError({ message: "Unknown error validating ACME challenge response" }); - } - if (exp instanceof Error) { - logger.error(exp, "Error validating ACME challenge response"); - throw exp; - } - logger.error(exp, "Unknown error validating ACME challenge response"); - throw new AcmeServerInternalError({ message: "Unknown error validating ACME challenge response" }); - } catch (outterExp) { - await auditLogService.createAuditLog({ - projectId: challenge.auth.account.project.id, - actor: { - type: ActorType.ACME_ACCOUNT, - metadata: { - profileId: challenge.auth.account.profileId, - accountId: challenge.auth.account.id - } - }, - event: { - type: finalAttempt ? EventType.FAIL_ACME_CHALLENGE : EventType.ATTEMPT_ACME_CHALLENGE, - metadata: { - challengeId, - type: challenge.type as AcmeChallengeType, - retryCount, - errorMessage: exp instanceof Error ? exp.message : "Unknown error" - } - } - }); - throw outterExp; - } + await handleChallengeValidationError(exp, challenge, challengeId, retryCount); } }; diff --git a/backend/src/ee/services/pki-acme/pki-acme-service.ts b/backend/src/ee/services/pki-acme/pki-acme-service.ts index 705cd6c392..c217211a02 100644 --- a/backend/src/ee/services/pki-acme/pki-acme-service.ts +++ b/backend/src/ee/services/pki-acme/pki-acme-service.ts @@ -707,15 +707,17 @@ export const pkiAcmeServiceFactory = ({ tx ); if (!skipDnsOwnershipVerification) { - // TODO: support other challenge types here. Currently only HTTP-01 is supported. - await acmeChallengeDAL.create( - { - authId: auth.id, - status: AcmeChallengeStatus.Pending, - type: AcmeChallengeType.HTTP_01 - }, - tx - ); + for (const challengeType of [AcmeChallengeType.HTTP_01, AcmeChallengeType.DNS_01]) { + // eslint-disable-next-line no-await-in-loop + await acmeChallengeDAL.create( + { + authId: auth.id, + status: AcmeChallengeStatus.Pending, + type: challengeType + }, + tx + ); + } } return auth; }) diff --git a/backend/src/lib/config/env.ts b/backend/src/lib/config/env.ts index c47506514e..a034a7b136 100644 --- a/backend/src/lib/config/env.ts +++ b/backend/src/lib/config/env.ts @@ -119,6 +119,16 @@ const envSchema = z }) .default("{}") ), + ACME_DNS_RESOLVER_SERVERS: zpStr( + z + .string() + .optional() + .transform((val) => { + if (!val) return []; + return val.split(","); + }) + ), + ACME_DNS_RESOLVE_RESOLVER_SERVERS_HOST_ENABLED: zodStrBool.default("false").optional(), DNS_MADE_EASY_SANDBOX_ENABLED: zodStrBool.default("false").optional(), // smtp options SMTP_HOST: zpStr(z.string().optional()), diff --git a/docker-compose.bdd.yml b/docker-compose.bdd.yml index b736838677..ae3f8ef6bd 100644 --- a/docker-compose.bdd.yml +++ b/docker-compose.bdd.yml @@ -94,6 +94,13 @@ services: volumes: - ./backend/bdd/pebble/:/var/data/pebble:ro + technitium: + image: technitium/dns-server:14.2.0 + ports: + - "5380:5380/tcp" + environment: + - DNS_SERVER_ADMIN_PASSWORD=infisical + volumes: postgres-data: driver: local