Merge pull request #5043 from Infisical/PKI-50-add-dns-challenge-for-acme

feature(api): add dns challenge for acme
This commit is contained in:
Fang-Pen Lin
2025-12-17 14:35:42 -08:00
committed by GitHub
9 changed files with 283 additions and 113 deletions

View File

@@ -51,6 +51,9 @@ jobs:
echo "ACME_DEVELOPMENT_MODE=true" >> .env
echo "ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES={\"localhost\": \"host.docker.internal:8087\", \"infisical.com\": \"host.docker.internal:8087\", \"example.com\": \"host.docker.internal:8087\"}" >> .env
echo "BDD_NOCK_API_ENABLED=true" >> .env
# use Technitium DNS server for BDD tests
echo "ACME_DNS_RESOLVE_RESOLVER_SERVERS_HOST_ENABLED=true" >> .env
echo "ACME_DNS_RESOLVER_SERVERS=technitium" >> .env
# Skip upstream validation, otherwise the ACME client for the upstream will try to
# validate the DNS records, which will fail because the DNS records are not actually created.
echo "ACME_SKIP_UPSTREAM_VALIDATION=true" >> .env

View File

@@ -23,6 +23,9 @@ CERT_CA_ID = os.environ.get("CERT_CA_ID")
CERT_TEMPLATE_ID = os.environ.get("CERT_TEMPLATE_ID")
AUTH_TOKEN = os.environ.get("INFISICAL_TOKEN")
BOOTSTRAP_INFISICAL = int(os.environ.get("BOOTSTRAP_INFISICAL", 0))
TECHNITIUM_URL = os.environ.get("TECHNITIUM_URL", "http://localhost:5380")
TECHNITIUM_USER = os.environ.get("TECHNITIUM_USER", "admin")
TECHNITIUM_PASSWORD = os.environ.get("TECHNITIUM_PASSWORD", "infisical")
# Called mostly from a CI to setup the new Infisical instance to get it ready for BDD tests
@@ -188,6 +191,9 @@ def before_all(context: Context):
base_vars = {
"BASE_URL": BASE_URL,
"PEBBLE_URL": PEBBLE_URL,
"TECHNITIUM_URL": TECHNITIUM_URL,
"TECHNITIUM_USER": TECHNITIUM_USER,
"TECHNITIUM_PASSWORD": TECHNITIUM_PASSWORD,
}
if BOOTSTRAP_INFISICAL:
details = bootstrap_infisical(context)
@@ -206,6 +212,7 @@ def before_all(context: Context):
}
context._initial_vars = vars
context.http_client = httpx.Client(base_url=BASE_URL)
context.technitium_http_client = httpx.Client(base_url=TECHNITIUM_URL)
def before_scenario(context: Context, scenario: typing.Any):

View File

@@ -19,13 +19,17 @@ Feature: Authorization
And the value order.authorizations[0].body with jq ".challenges | map(pick(.type, .status)) | sort_by(.type)" should be equal to json
"""
[
{
"type": "dns-01",
"status": "pending"
},
{
"type": "http-01",
"status": "pending"
}
]
"""
And the value order.authorizations[0].body with jq ".challenges | map(.status) | sort" should be equal to ["pending"]
And the value order.authorizations[0].body with jq ".challenges | map(.status) | sort" should be equal to ["pending", "pending"]
And the value order.authorizations[0].body with jq ".identifier" should be equal to json
"""
{

View File

@@ -1,6 +1,6 @@
Feature: Challenge
Scenario: Validate challenge
Scenario: Validate challenge with HTTP-01
Given I have an ACME cert profile as "acme_profile"
When I have an ACME client connecting to "{BASE_URL}/api/v1/cert-manager/acme/profiles/{acme_profile.id}/directory"
Then I register a new ACME account with email fangpen@infisical.com and EAB key id "{acme_profile.eab_kid}" with secret "{acme_profile.eab_secret}" as acme_account
@@ -22,6 +22,28 @@ Feature: Challenge
And I parse the full-chain certificate from order finalized_order as cert
And the value cert with jq ".subject.common_name" should be equal to "localhost"
Scenario: Validate challenge with DNS-01
Given I have an ACME cert profile as "acme_profile"
When I have an ACME client connecting to "{BASE_URL}/api/v1/cert-manager/acme/profiles/{acme_profile.id}/directory"
Then I register a new ACME account with email fangpen@infisical.com and EAB key id "{acme_profile.eab_kid}" with secret "{acme_profile.eab_secret}" as acme_account
When I create certificate signing request as csr
Then I add names to certificate signing request csr
"""
{
"COMMON_NAME": "example.com"
}
"""
And I create a RSA private key pair as cert_key
And I sign the certificate signing request csr with private key cert_key and output it as csr_pem in PEM format
And I submit the certificate signing request PEM csr_pem certificate order to the ACME server as order
And I select challenge with type dns-01 for domain example.com from order in order as challenge
Then I add domain example.com challenge response DNS records for challenge
And I tell ACME server that challenge is ready to be verified
And I poll and finalize the ACME order order as finalized_order
And the value finalized_order.body with jq ".status" should be equal to "valid"
And I parse the full-chain certificate from order finalized_order as cert
And the value cert with jq ".subject.common_name" should be equal to "example.com"
Scenario: Validate challenge with retry
Given I have an ACME cert profile as "acme_profile"
When I have an ACME client connecting to "{BASE_URL}/api/v1/cert-manager/acme/profiles/{acme_profile.id}/directory"

View File

@@ -266,9 +266,7 @@ def step_impl(context: Context, ca_id: str, template_id: str, profile_var: str):
)
@given(
'I create an ACME profile with config as "{profile_var}"'
)
@given('I create an ACME profile with config as "{profile_var}"')
def step_impl(context: Context, profile_var: str):
profile_slug = faker.slug()
jwt_token = context.vars["AUTH_TOKEN"]
@@ -1030,6 +1028,58 @@ def step_impl(context: Context, var_path: str, hostname: str):
serve_challenges(context=context, challenges=[challenge])
@then("I add domain {domain} challenge response DNS records for {var_path}")
def step_impl(context: Context, domain: str, var_path: str):
client = context.technitium_http_client
challenge = eval_var(context, var_path, as_json=False)
zone = domain
domain = f"{challenge.chall.LABEL}.{domain}"
value = challenge.chall.validation(context.acme_client.net.key)
resp = client.post(
"/api/user/login",
data={
"user": context.vars["TECHNITIUM_USER"],
"pass": context.vars["TECHNITIUM_PASSWORD"],
},
)
resp.raise_for_status()
token = resp.json()["token"]
resp = client.post(
"/api/zones/create",
params=dict(
token=token,
zone=zone,
type="Primary",
),
)
resp.raise_for_status()
error_msg = resp.json().get("errorMessage")
if error_msg is not None and not error_msg.startswith("Zone already exists:"):
raise RuntimeError(f"Unexpected error while creating zone {zone}: {error_msg}")
resp = client.post(
"/api/zones/records/add",
params=dict(
token=token,
zone=zone,
domain=domain,
type="TXT",
text=value,
),
)
resp.raise_for_status()
error_msg = resp.json().get("errorMessage")
if error_msg is not None and not error_msg.startswith(
"Cannot add record: record already exists"
):
raise RuntimeError(
f"Unexpected error while creating TXT record {domain} for zone {zone}: {error_msg}"
)
@then("I tell ACME server that {var_path} is ready to be verified")
def step_impl(context: Context, var_path: str):
challenge = eval_var(context, var_path, as_json=False)

View File

@@ -1,7 +1,10 @@
import { resolve4, Resolver } from "node:dns/promises";
import axios, { AxiosError } from "axios";
import { TPkiAcmeChallenges } from "@app/db/schemas/pki-acme-challenges";
import { getConfig } from "@app/lib/config/env";
import { crypto } from "@app/lib/crypto/cryptography";
import { BadRequestError, NotFoundError } from "@app/lib/errors";
import { isPrivateIp } from "@app/lib/ip/ipRange";
import { logger } from "@app/lib/logger";
@@ -17,6 +20,7 @@ import {
} from "./pki-acme-errors";
import { AcmeAuthStatus, AcmeChallengeStatus, AcmeChallengeType } from "./pki-acme-schemas";
import { TPkiAcmeChallengeServiceFactory } from "./pki-acme-types";
import { isValidIp } from "@app/lib/ip";
type TPkiAcmeChallengeServiceFactoryDep = {
acmeChallengeDAL: Pick<
@@ -35,6 +39,9 @@ export const pkiAcmeChallengeServiceFactory = ({
auditLogService
}: TPkiAcmeChallengeServiceFactoryDep): TPkiAcmeChallengeServiceFactory => {
const appCfg = getConfig();
type ChallengeWithAuth = NonNullable<Awaited<ReturnType<typeof acmeChallengeDAL.findByIdForChallengeValidation>>>;
const markChallengeAsReady = async (challengeId: string): Promise<TPkiAcmeChallenges> => {
return acmeChallengeDAL.transaction(async (tx) => {
logger.info({ challengeId }, "Validating ACME challenge response");
@@ -55,20 +62,163 @@ export const pkiAcmeChallengeServiceFactory = ({
message: `ACME auth status is ${challenge.auth.status} instead of ${AcmeAuthStatus.Pending}`
});
}
// TODO: support other challenge types here. Currently only HTTP-01 is supported
if (challenge.type !== AcmeChallengeType.HTTP_01) {
throw new BadRequestError({ message: "Only HTTP-01 challenges are supported for now" });
}
const host = challenge.auth.identifierValue;
// check if host is a private ip address
if (isPrivateIp(host)) {
throw new BadRequestError({ message: "Private IP addresses are not allowed" });
}
if (challenge.type !== AcmeChallengeType.HTTP_01 && challenge.type !== AcmeChallengeType.DNS_01) {
throw new BadRequestError({ message: "Only HTTP-01 or DNS-01 challenges are supported for now" });
}
return acmeChallengeDAL.updateById(challengeId, { status: AcmeChallengeStatus.Processing }, tx);
});
};
const validateHttp01Challenge = async (challenge: ChallengeWithAuth): Promise<void> => {
let host = challenge.auth.identifierValue;
if (appCfg.isAcmeDevelopmentMode && appCfg.ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES[host]) {
host = appCfg.ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES[host];
logger.warn(
{ srcHost: challenge.auth.identifierValue, dstHost: host },
"Using ACME development HTTP-01 challenge host override"
);
}
const challengeUrl = new URL(`/.well-known/acme-challenge/${challenge.auth.token}`, `http://${host}`);
logger.info({ challengeUrl }, "Performing ACME HTTP-01 challenge validation");
// TODO: read config from the profile to get the timeout instead
const timeoutMs = 10 * 1000; // 10 seconds
// Notice: well, we are in a transaction, ideally we should not hold transaction and perform
// a long running operation for long time. But assuming we are not performing a tons of
// challenge validation at the same time, it should be fine.
const challengeResponse = await axios.get<string>(challengeUrl.toString(), {
// In case if we override the host in the development mode, still provide the original host in the header
// to help the upstream server to validate the request
headers: { Host: challenge.auth.identifierValue },
timeout: timeoutMs,
responseType: "text",
validateStatus: () => true
});
if (challengeResponse.status !== 200) {
throw new AcmeIncorrectResponseError({
message: `ACME challenge response is not 200: ${challengeResponse.status}`
});
}
const challengeResponseBody: string = challengeResponse.data;
const thumbprint = challenge.auth.account.publicKeyThumbprint;
const expectedChallengeResponseBody = `${challenge.auth.token}.${thumbprint}`;
if (challengeResponseBody.trimEnd() !== expectedChallengeResponseBody) {
throw new AcmeIncorrectResponseError({ message: "ACME HTTP-01 challenge response is not correct" });
}
};
const validateDns01Challenge = async (challenge: ChallengeWithAuth): Promise<void> => {
const resolver = new Resolver();
if (appCfg.ACME_DNS_RESOLVER_SERVERS.length > 0) {
const servers = appCfg.ACME_DNS_RESOLVE_RESOLVER_SERVERS_HOST_ENABLED
? await Promise.all(
appCfg.ACME_DNS_RESOLVER_SERVERS.map(async (server) => {
if (isValidIp(server)) {
return server;
}
const ips = await resolve4(server);
return ips[0];
})
)
: appCfg.ACME_DNS_RESOLVER_SERVERS;
resolver.setServers(servers);
}
const recordName = `_acme-challenge.${challenge.auth.identifierValue}`;
const records = await resolver.resolveTxt(recordName);
const recordValues = records.map((chunks) => chunks.join(""));
const thumbprint = challenge.auth.account.publicKeyThumbprint;
const keyAuthorization = `${challenge.auth.token}.${thumbprint}`;
const digest = crypto.nativeCrypto.createHash("sha256").update(keyAuthorization).digest();
const expectedChallengeResponseValue = Buffer.from(digest).toString("base64url");
if (!recordValues.some((recordValue) => recordValue.trim() === expectedChallengeResponseValue)) {
throw new AcmeIncorrectResponseError({ message: "ACME DNS-01 challenge response is not correct" });
}
};
const handleChallengeValidationError = async (
exp: unknown,
challenge: ChallengeWithAuth,
challengeId: string,
retryCount: number
): Promise<never> => {
let finalAttempt = false;
if (retryCount >= 2) {
logger.error(
exp,
`Last attempt to validate ACME challenge response failed, marking ${challengeId} challenge as invalid`
);
// This is the last attempt to validate the challenge response, if it fails, we mark the challenge as invalid
await acmeChallengeDAL.markAsInvalidCascadeById(challengeId);
finalAttempt = true;
}
try {
// Properly type and inspect the error
if (axios.isAxiosError(exp)) {
const axiosError = exp as AxiosError;
const errorCode = axiosError.code;
const errorMessage = axiosError.message;
if (errorCode === "ECONNREFUSED" || errorMessage.includes("ECONNREFUSED")) {
throw new AcmeConnectionError({ message: "Connection refused" });
}
if (errorCode === "ENOTFOUND" || errorMessage.includes("ENOTFOUND")) {
throw new AcmeDnsFailureError({ message: "Hostname could not be resolved (DNS failure)" });
}
if (errorCode === "ECONNRESET" || errorMessage.includes("ECONNRESET")) {
throw new AcmeConnectionError({ message: "Connection reset by peer" });
}
if (errorCode === "ECONNABORTED" || errorMessage.includes("timeout")) {
logger.error(exp, "Connection timed out while validating ACME challenge response");
throw new AcmeConnectionError({ message: "Connection timed out" });
}
logger.error(exp, "Unknown error validating ACME challenge response");
throw new AcmeServerInternalError({ message: "Unknown error validating ACME challenge response" });
}
if (exp instanceof Error) {
if ((exp as unknown as { code?: string })?.code === "ENOTFOUND") {
throw new AcmeDnsFailureError({ message: "Hostname could not be resolved (DNS failure)" });
}
logger.error(exp, "Error validating ACME challenge response");
throw exp;
}
logger.error(exp, "Unknown error validating ACME challenge response");
throw new AcmeServerInternalError({ message: "Unknown error validating ACME challenge response" });
} catch (outterExp) {
await auditLogService.createAuditLog({
projectId: challenge.auth.account.project.id,
actor: {
type: ActorType.ACME_ACCOUNT,
metadata: {
profileId: challenge.auth.account.profileId,
accountId: challenge.auth.account.id
}
},
event: {
type: finalAttempt ? EventType.FAIL_ACME_CHALLENGE : EventType.ATTEMPT_ACME_CHALLENGE,
metadata: {
challengeId,
type: challenge.type as AcmeChallengeType,
retryCount,
errorMessage: exp instanceof Error ? exp.message : "Unknown error"
}
}
});
throw outterExp;
}
};
const validateChallengeResponse = async (challengeId: string, retryCount: number): Promise<void> => {
logger.info({ challengeId, retryCount }, "Validating ACME challenge response");
const challenge = await acmeChallengeDAL.findByIdForChallengeValidation(challengeId);
@@ -80,41 +230,16 @@ export const pkiAcmeChallengeServiceFactory = ({
message: `ACME challenge is ${challenge.status} instead of ${AcmeChallengeStatus.Processing}`
});
}
let host = challenge.auth.identifierValue;
if (appCfg.isAcmeDevelopmentMode && appCfg.ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES[host]) {
host = appCfg.ACME_DEVELOPMENT_HTTP01_CHALLENGE_HOST_OVERRIDES[host];
logger.warn(
{ srcHost: challenge.auth.identifierValue, dstHost: host },
"Using ACME development HTTP-01 challenge host override"
);
}
const challengeUrl = new URL(`/.well-known/acme-challenge/${challenge.auth.token}`, `http://${host}`);
logger.info({ challengeUrl }, "Performing ACME HTTP-01 challenge validation");
try {
// TODO: read config from the profile to get the timeout instead
const timeoutMs = 10 * 1000; // 10 seconds
// Notice: well, we are in a transaction, ideally we should not hold transaction and perform
// a long running operation for long time. But assuming we are not performing a tons of
// challenge validation at the same time, it should be fine.
const challengeResponse = await axios.get<string>(challengeUrl.toString(), {
// In case if we override the host in the development mode, still provide the original host in the header
// to help the upstream server to validate the request
headers: { Host: challenge.auth.identifierValue },
timeout: timeoutMs,
responseType: "text",
validateStatus: () => true
});
if (challengeResponse.status !== 200) {
throw new AcmeIncorrectResponseError({
message: `ACME challenge response is not 200: ${challengeResponse.status}`
});
}
const challengeResponseBody: string = challengeResponse.data;
const thumbprint = challenge.auth.account.publicKeyThumbprint;
const expectedChallengeResponseBody = `${challenge.auth.token}.${thumbprint}`;
if (challengeResponseBody.trimEnd() !== expectedChallengeResponseBody) {
throw new AcmeIncorrectResponseError({ message: "ACME challenge response is not correct" });
if (challenge.type === AcmeChallengeType.HTTP_01) {
await validateHttp01Challenge(challenge);
} else if (challenge.type === AcmeChallengeType.DNS_01) {
await validateDns01Challenge(challenge);
} else {
throw new BadRequestError({ message: `Unsupported challenge type: ${challenge.type}` });
}
logger.info({ challengeId }, "ACME challenge response is correct, marking challenge as valid");
await acmeChallengeDAL.markAsValidCascadeById(challengeId);
await auditLogService.createAuditLog({
@@ -135,67 +260,7 @@ export const pkiAcmeChallengeServiceFactory = ({
}
});
} catch (exp) {
let finalAttempt = false;
if (retryCount >= 2) {
logger.error(
exp,
`Last attempt to validate ACME challenge response failed, marking ${challengeId} challenge as invalid`
);
// This is the last attempt to validate the challenge response, if it fails, we mark the challenge as invalid
await acmeChallengeDAL.markAsInvalidCascadeById(challengeId);
finalAttempt = true;
}
try {
// Properly type and inspect the error
if (axios.isAxiosError(exp)) {
const axiosError = exp as AxiosError;
const errorCode = axiosError.code;
const errorMessage = axiosError.message;
if (errorCode === "ECONNREFUSED" || errorMessage.includes("ECONNREFUSED")) {
throw new AcmeConnectionError({ message: "Connection refused" });
}
if (errorCode === "ENOTFOUND" || errorMessage.includes("ENOTFOUND")) {
throw new AcmeDnsFailureError({ message: "Hostname could not be resolved (DNS failure)" });
}
if (errorCode === "ECONNRESET" || errorMessage.includes("ECONNRESET")) {
throw new AcmeConnectionError({ message: "Connection reset by peer" });
}
if (errorCode === "ECONNABORTED" || errorMessage.includes("timeout")) {
logger.error(exp, "Connection timed out while validating ACME challenge response");
throw new AcmeConnectionError({ message: "Connection timed out" });
}
logger.error(exp, "Unknown error validating ACME challenge response");
throw new AcmeServerInternalError({ message: "Unknown error validating ACME challenge response" });
}
if (exp instanceof Error) {
logger.error(exp, "Error validating ACME challenge response");
throw exp;
}
logger.error(exp, "Unknown error validating ACME challenge response");
throw new AcmeServerInternalError({ message: "Unknown error validating ACME challenge response" });
} catch (outterExp) {
await auditLogService.createAuditLog({
projectId: challenge.auth.account.project.id,
actor: {
type: ActorType.ACME_ACCOUNT,
metadata: {
profileId: challenge.auth.account.profileId,
accountId: challenge.auth.account.id
}
},
event: {
type: finalAttempt ? EventType.FAIL_ACME_CHALLENGE : EventType.ATTEMPT_ACME_CHALLENGE,
metadata: {
challengeId,
type: challenge.type as AcmeChallengeType,
retryCount,
errorMessage: exp instanceof Error ? exp.message : "Unknown error"
}
}
});
throw outterExp;
}
await handleChallengeValidationError(exp, challenge, challengeId, retryCount);
}
};

View File

@@ -707,15 +707,17 @@ export const pkiAcmeServiceFactory = ({
tx
);
if (!skipDnsOwnershipVerification) {
// TODO: support other challenge types here. Currently only HTTP-01 is supported.
await acmeChallengeDAL.create(
{
authId: auth.id,
status: AcmeChallengeStatus.Pending,
type: AcmeChallengeType.HTTP_01
},
tx
);
for (const challengeType of [AcmeChallengeType.HTTP_01, AcmeChallengeType.DNS_01]) {
// eslint-disable-next-line no-await-in-loop
await acmeChallengeDAL.create(
{
authId: auth.id,
status: AcmeChallengeStatus.Pending,
type: challengeType
},
tx
);
}
}
return auth;
})

View File

@@ -119,6 +119,16 @@ const envSchema = z
})
.default("{}")
),
ACME_DNS_RESOLVER_SERVERS: zpStr(
z
.string()
.optional()
.transform((val) => {
if (!val) return [];
return val.split(",");
})
),
ACME_DNS_RESOLVE_RESOLVER_SERVERS_HOST_ENABLED: zodStrBool.default("false").optional(),
DNS_MADE_EASY_SANDBOX_ENABLED: zodStrBool.default("false").optional(),
// smtp options
SMTP_HOST: zpStr(z.string().optional()),

View File

@@ -94,6 +94,13 @@ services:
volumes:
- ./backend/bdd/pebble/:/var/data/pebble:ro
technitium:
image: technitium/dns-server:14.2.0
ports:
- "5380:5380/tcp"
environment:
- DNS_SERVER_ADMIN_PASSWORD=infisical
volumes:
postgres-data:
driver: local