revert - match start

This commit is contained in:
Jack Gilcrest
2025-05-08 16:38:05 -06:00
parent 296289dc53
commit d482913b43
10 changed files with 146 additions and 53 deletions

View File

@@ -290,7 +290,7 @@ pub fn generate_noir_code(
code.push_str(&format!(" capture_group_ids,\n"));
code.push_str(&format!(" capture_group_starts,\n"));
code.push_str(&format!(
" capture_group_start_indices[{}] - match_start as Field,\n",
" capture_group_start_indices[{}],\n",
capture_group_id - 1
));
code.push_str(&format!(" );\n\n"));

View File

@@ -12,7 +12,7 @@ codegen_regex() {
--decomposed-regex-path "./noir/templates/${template_name}/${template_name}.json" \
--output-file-path ./noir/templates/${template_name} \
--template-name "$template_name_pascal" \
--noir
--proving-framework noir
mv ./noir/templates/${template_name}/${template_name}_regex.nr ./noir/src/common/${template_name}_regex.nr
sed -i 's/zkregex/crate/g' ./noir/src/common/${template_name}_regex.nr

View File

@@ -61,6 +61,19 @@ fn check_accept_state(
accept_state_reached
}
/**
* BodyHashRegex matching function
* Regex: (?:\r\n|^)dkim-signature:(?:[a-z]+=[^;]+; )+bh=([a-zA-Z0-9+/=]+);
* @param in_haystack - The input haystack to search from
* @param match_start - The start index in the haystack for the subarray to match from
* @param match_length - The length of the subarray to extract from haystack
* @param current_states - The current states of the NFA at each index in the match subarray
* @param next_states - The next states of the NFA at each index in the match subarray
* @param capture_group_ids - The ids of the capture groups in the match subarray
* @param capture_group_starts - The start positions of the capture groups in the match subarray
* @param capture_group_start_indices - The start indices of the capture groups in the match subarray
* @return - tuple of substring captures as dictated by the regular expression
*/
pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
in_haystack: [u8; MAX_HAYSTACK_LEN],
match_start: u32,
@@ -71,7 +84,6 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
capture_group_starts: [Field; MAX_MATCH_LEN],
capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS],
) -> (BoundedVec<u8, CAPTURE_1_MAX_LENGTH>) {
// regex:"(?:\r\n|^)dkim-signature:(?:[a-z]+=[^;]+; )+bh=([a-zA-Z0-9+/=]+);"
// resize haystack
let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length);
@@ -100,13 +112,15 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
);
}
assert(reached_end_state == 0, "Did not reach a valid end state");
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
(capture_1)
}

View File

@@ -61,6 +61,19 @@ fn check_accept_state(
accept_state_reached
}
/**
* EmailAddrRegex matching function
* Regex: ([A-Za-z0-9!#$%&\'*+=?\\-\\^_`{|}~./@]+@[A-Za-z0-9.\\-]+)
* @param in_haystack - The input haystack to search from
* @param match_start - The start index in the haystack for the subarray to match from
* @param match_length - The length of the subarray to extract from haystack
* @param current_states - The current states of the NFA at each index in the match subarray
* @param next_states - The next states of the NFA at each index in the match subarray
* @param capture_group_ids - The ids of the capture groups in the match subarray
* @param capture_group_starts - The start positions of the capture groups in the match subarray
* @param capture_group_start_indices - The start indices of the capture groups in the match subarray
* @return - tuple of substring captures as dictated by the regular expression
*/
pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
in_haystack: [u8; MAX_HAYSTACK_LEN],
match_start: u32,
@@ -71,7 +84,6 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
capture_group_starts: [Field; MAX_MATCH_LEN],
capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS],
) -> (BoundedVec<u8, CAPTURE_1_MAX_LENGTH>) {
// regex:"([A-Za-z0-9!#$%&'*+=?\\-\\^_`{|}~./@]+@[A-Za-z0-9.\\-]+)"
// resize haystack
let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length);
@@ -100,13 +112,15 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
);
}
assert(reached_end_state == 0, "Did not reach a valid end state");
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
(capture_1)
}

View File

@@ -61,6 +61,19 @@ fn check_accept_state(
accept_state_reached
}
/**
* EmailDomainRegex matching function
* Regex: [A-Za-z0-9!#$%&\'*+=?\\-\\^_`{|}~./]+@([A-Za-z0-9.\\-@]+)
* @param in_haystack - The input haystack to search from
* @param match_start - The start index in the haystack for the subarray to match from
* @param match_length - The length of the subarray to extract from haystack
* @param current_states - The current states of the NFA at each index in the match subarray
* @param next_states - The next states of the NFA at each index in the match subarray
* @param capture_group_ids - The ids of the capture groups in the match subarray
* @param capture_group_starts - The start positions of the capture groups in the match subarray
* @param capture_group_start_indices - The start indices of the capture groups in the match subarray
* @return - tuple of substring captures as dictated by the regular expression
*/
pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
in_haystack: [u8; MAX_HAYSTACK_LEN],
match_start: u32,
@@ -71,7 +84,6 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
capture_group_starts: [Field; MAX_MATCH_LEN],
capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS],
) -> (BoundedVec<u8, CAPTURE_1_MAX_LENGTH>) {
// regex:"[A-Za-z0-9!#$%&'*+=?\\-\\^_`{|}~./]+@([A-Za-z0-9.\\-@]+)"
// resize haystack
let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length);
@@ -100,13 +112,15 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
);
}
assert(reached_end_state == 0, "Did not reach a valid end state");
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
(capture_1)
}

View File

@@ -58,6 +58,15 @@ fn check_accept_state(
accept_state_reached
}
/**
* SimpleRegex matching function
* Regex: a*b
* @param in_haystack - The input haystack to search from
* @param match_start - The start index in the haystack for the subarray to match from
* @param match_length - The length of the subarray to extract from haystack
* @param current_states - The current states of the NFA at each index in the match subarray
* @param next_states - The next states of the NFA at each index in the match subarray
*/
pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
in_haystack: [u8; MAX_HAYSTACK_LEN],
match_start: u32,
@@ -65,7 +74,6 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
current_states: [Field; MAX_MATCH_LEN],
next_states: [Field; MAX_MATCH_LEN],
) {
// regex:"a*b"
// resize haystack
let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length);
@@ -92,5 +100,6 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
);
}
assert(reached_end_state == 0, "Did not reach a valid end state");
}

View File

@@ -61,6 +61,19 @@ fn check_accept_state(
accept_state_reached
}
/**
* SubjectAllRegex matching function
* Regex: (?:\r\n|^)subject:([a-z]+)\r\n
* @param in_haystack - The input haystack to search from
* @param match_start - The start index in the haystack for the subarray to match from
* @param match_length - The length of the subarray to extract from haystack
* @param current_states - The current states of the NFA at each index in the match subarray
* @param next_states - The next states of the NFA at each index in the match subarray
* @param capture_group_ids - The ids of the capture groups in the match subarray
* @param capture_group_starts - The start positions of the capture groups in the match subarray
* @param capture_group_start_indices - The start indices of the capture groups in the match subarray
* @return - tuple of substring captures as dictated by the regular expression
*/
pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
in_haystack: [u8; MAX_HAYSTACK_LEN],
match_start: u32,
@@ -71,7 +84,6 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
capture_group_starts: [Field; MAX_MATCH_LEN],
capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS],
) -> (BoundedVec<u8, CAPTURE_1_MAX_LENGTH>) {
// regex:"(?:\r\n|^)subject:([a-z]+)\r\n"
// resize haystack
let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length);
@@ -100,13 +112,15 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
);
}
assert(reached_end_state == 0, "Did not reach a valid end state");
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
(capture_1)
}

View File

@@ -41,7 +41,7 @@ fn check_start_state(start_state: Field) {
*/
fn check_accept_state(
next_state: Field,
haystack_index: Field,
haystack_index: Field,
asserted_match_length: Field
) -> Field {
// check if the next state is an accept state
@@ -61,6 +61,19 @@ fn check_accept_state(
accept_state_reached
}
/**
* TimestampRegex matching function
* Regex: (?:\r\n|^)dkim-signature:(?:[a-z]+=[^;]+; )+t=([0-9]+);
* @param in_haystack - The input haystack to search from
* @param match_start - The start index in the haystack for the subarray to match from
* @param match_length - The length of the subarray to extract from haystack
* @param current_states - The current states of the NFA at each index in the match subarray
* @param next_states - The next states of the NFA at each index in the match subarray
* @param capture_group_ids - The ids of the capture groups in the match subarray
* @param capture_group_starts - The start positions of the capture groups in the match subarray
* @param capture_group_start_indices - The start indices of the capture groups in the match subarray
* @return - tuple of substring captures as dictated by the regular expression
*/
pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
in_haystack: [u8; MAX_HAYSTACK_LEN],
match_start: u32,
@@ -71,9 +84,9 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
capture_group_starts: [Field; MAX_MATCH_LEN],
capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS],
) -> (BoundedVec<u8, CAPTURE_1_MAX_LENGTH>) {
// regex:"(?:\r\n|^)dkim-signature:(?:[a-z]+=[^;]+; )+t=([0-9]+);"
// resize haystack
let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length);
let mut reached_end_state = 1;
check_start_state(current_states[0]);
for i in 0..MAX_MATCH_LEN-1 {
@@ -99,13 +112,15 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
);
}
assert(reached_end_state == 0, "Did not reach a valid end state");
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
(capture_1)
}

View File

@@ -61,6 +61,19 @@ fn check_accept_state(
accept_state_reached
}
/**
* ToAllRegex matching function
* Regex: (?:\r\n|^)to:([^\r\n]+)\r\n
* @param in_haystack - The input haystack to search from
* @param match_start - The start index in the haystack for the subarray to match from
* @param match_length - The length of the subarray to extract from haystack
* @param current_states - The current states of the NFA at each index in the match subarray
* @param next_states - The next states of the NFA at each index in the match subarray
* @param capture_group_ids - The ids of the capture groups in the match subarray
* @param capture_group_starts - The start positions of the capture groups in the match subarray
* @param capture_group_start_indices - The start indices of the capture groups in the match subarray
* @return - tuple of substring captures as dictated by the regular expression
*/
pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
in_haystack: [u8; MAX_HAYSTACK_LEN],
match_start: u32,
@@ -71,7 +84,6 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
capture_group_starts: [Field; MAX_MATCH_LEN],
capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS],
) -> (BoundedVec<u8, CAPTURE_1_MAX_LENGTH>) {
// regex:"(?:\r\n|^)to:([^\r\n]+)\r\n"
// resize haystack
let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length);
@@ -100,13 +112,15 @@ pub fn regex_match<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
);
}
assert(reached_end_state == 0, "Did not reach a valid end state");
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
// Capture Group 1
let capture_1 = capture_substring::<MAX_MATCH_LEN, CAPTURE_1_MAX_LENGTH, 1>(
haystack,
capture_group_ids,
capture_group_starts,
capture_group_start_indices[0],
);
(capture_1)
}

View File

@@ -128,7 +128,6 @@ fn substring_from_mask<let MAX_HAYSTACK_LEN: u32, let MAX_MATCH_LEN: u32>(
start_index: Field,
) -> BoundedVec<u8, MAX_MATCH_LEN> {
let substring = unsafe { __substring_from_mask(haystack, mask, start_index) };
println(f"substring: {substring}");
let mut length = 0;
// check mask[start_index - 1] = 0