diff --git a/compiler/src/nfa/codegen/noir.rs b/compiler/src/nfa/codegen/noir.rs index d433189..b406200 100644 --- a/compiler/src/nfa/codegen/noir.rs +++ b/compiler/src/nfa/codegen/noir.rs @@ -290,7 +290,7 @@ pub fn generate_noir_code( code.push_str(&format!(" capture_group_ids,\n")); code.push_str(&format!(" capture_group_starts,\n")); code.push_str(&format!( - " capture_group_start_indices[{}] - match_start as Field,\n", + " capture_group_start_indices[{}],\n", capture_group_id - 1 )); code.push_str(&format!(" );\n\n")); diff --git a/noir/scripts/gen_regex.sh b/noir/scripts/gen_regex.sh index 5839740..07007ba 100755 --- a/noir/scripts/gen_regex.sh +++ b/noir/scripts/gen_regex.sh @@ -12,7 +12,7 @@ codegen_regex() { --decomposed-regex-path "./noir/templates/${template_name}/${template_name}.json" \ --output-file-path ./noir/templates/${template_name} \ --template-name "$template_name_pascal" \ - --noir + --proving-framework noir mv ./noir/templates/${template_name}/${template_name}_regex.nr ./noir/src/common/${template_name}_regex.nr sed -i 's/zkregex/crate/g' ./noir/src/common/${template_name}_regex.nr diff --git a/noir/src/common/body_hash_regex.nr b/noir/src/common/body_hash_regex.nr index 127ca4a..28e011d 100644 --- a/noir/src/common/body_hash_regex.nr +++ b/noir/src/common/body_hash_regex.nr @@ -61,6 +61,19 @@ fn check_accept_state( accept_state_reached } +/** + * BodyHashRegex matching function + * Regex: (?:\r\n|^)dkim-signature:(?:[a-z]+=[^;]+; )+bh=([a-zA-Z0-9+/=]+); + * @param in_haystack - The input haystack to search from + * @param match_start - The start index in the haystack for the subarray to match from + * @param match_length - The length of the subarray to extract from haystack + * @param current_states - The current states of the NFA at each index in the match subarray + * @param next_states - The next states of the NFA at each index in the match subarray + * @param capture_group_ids - The ids of the capture groups in the match subarray + * @param capture_group_starts - The start positions of the capture groups in the match subarray + * @param capture_group_start_indices - The start indices of the capture groups in the match subarray + * @return - tuple of substring captures as dictated by the regular expression + */ pub fn regex_match( in_haystack: [u8; MAX_HAYSTACK_LEN], match_start: u32, @@ -71,7 +84,6 @@ pub fn regex_match( capture_group_starts: [Field; MAX_MATCH_LEN], capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS], ) -> (BoundedVec) { - // regex:"(?:\r\n|^)dkim-signature:(?:[a-z]+=[^;]+; )+bh=([a-zA-Z0-9+/=]+);" // resize haystack let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length); @@ -100,13 +112,15 @@ pub fn regex_match( ); } assert(reached_end_state == 0, "Did not reach a valid end state"); - // Capture Group 1 - let capture_1 = capture_substring::( - haystack, - capture_group_ids, - capture_group_starts, - capture_group_start_indices[0], - ); + + // Capture Group 1 + let capture_1 = capture_substring::( + haystack, + capture_group_ids, + capture_group_starts, + capture_group_start_indices[0], + ); + (capture_1) } diff --git a/noir/src/common/email_addr_regex.nr b/noir/src/common/email_addr_regex.nr index 0ac618b..dbd67f8 100644 --- a/noir/src/common/email_addr_regex.nr +++ b/noir/src/common/email_addr_regex.nr @@ -61,6 +61,19 @@ fn check_accept_state( accept_state_reached } +/** + * EmailAddrRegex matching function + * Regex: ([A-Za-z0-9!#$%&\'*+=?\\-\\^_`{|}~./@]+@[A-Za-z0-9.\\-]+) + * @param in_haystack - The input haystack to search from + * @param match_start - The start index in the haystack for the subarray to match from + * @param match_length - The length of the subarray to extract from haystack + * @param current_states - The current states of the NFA at each index in the match subarray + * @param next_states - The next states of the NFA at each index in the match subarray + * @param capture_group_ids - The ids of the capture groups in the match subarray + * @param capture_group_starts - The start positions of the capture groups in the match subarray + * @param capture_group_start_indices - The start indices of the capture groups in the match subarray + * @return - tuple of substring captures as dictated by the regular expression + */ pub fn regex_match( in_haystack: [u8; MAX_HAYSTACK_LEN], match_start: u32, @@ -71,7 +84,6 @@ pub fn regex_match( capture_group_starts: [Field; MAX_MATCH_LEN], capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS], ) -> (BoundedVec) { - // regex:"([A-Za-z0-9!#$%&'*+=?\\-\\^_`{|}~./@]+@[A-Za-z0-9.\\-]+)" // resize haystack let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length); @@ -100,13 +112,15 @@ pub fn regex_match( ); } assert(reached_end_state == 0, "Did not reach a valid end state"); - // Capture Group 1 - let capture_1 = capture_substring::( - haystack, - capture_group_ids, - capture_group_starts, - capture_group_start_indices[0], - ); + + // Capture Group 1 + let capture_1 = capture_substring::( + haystack, + capture_group_ids, + capture_group_starts, + capture_group_start_indices[0], + ); + (capture_1) } diff --git a/noir/src/common/email_domain_regex.nr b/noir/src/common/email_domain_regex.nr index 7a86c1d..23fd3d6 100644 --- a/noir/src/common/email_domain_regex.nr +++ b/noir/src/common/email_domain_regex.nr @@ -61,6 +61,19 @@ fn check_accept_state( accept_state_reached } +/** + * EmailDomainRegex matching function + * Regex: [A-Za-z0-9!#$%&\'*+=?\\-\\^_`{|}~./]+@([A-Za-z0-9.\\-@]+) + * @param in_haystack - The input haystack to search from + * @param match_start - The start index in the haystack for the subarray to match from + * @param match_length - The length of the subarray to extract from haystack + * @param current_states - The current states of the NFA at each index in the match subarray + * @param next_states - The next states of the NFA at each index in the match subarray + * @param capture_group_ids - The ids of the capture groups in the match subarray + * @param capture_group_starts - The start positions of the capture groups in the match subarray + * @param capture_group_start_indices - The start indices of the capture groups in the match subarray + * @return - tuple of substring captures as dictated by the regular expression + */ pub fn regex_match( in_haystack: [u8; MAX_HAYSTACK_LEN], match_start: u32, @@ -71,7 +84,6 @@ pub fn regex_match( capture_group_starts: [Field; MAX_MATCH_LEN], capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS], ) -> (BoundedVec) { - // regex:"[A-Za-z0-9!#$%&'*+=?\\-\\^_`{|}~./]+@([A-Za-z0-9.\\-@]+)" // resize haystack let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length); @@ -100,13 +112,15 @@ pub fn regex_match( ); } assert(reached_end_state == 0, "Did not reach a valid end state"); - // Capture Group 1 - let capture_1 = capture_substring::( - haystack, - capture_group_ids, - capture_group_starts, - capture_group_start_indices[0], - ); + + // Capture Group 1 + let capture_1 = capture_substring::( + haystack, + capture_group_ids, + capture_group_starts, + capture_group_start_indices[0], + ); + (capture_1) } diff --git a/noir/src/common/simple_regex.nr b/noir/src/common/simple_regex.nr index 274640c..820a202 100644 --- a/noir/src/common/simple_regex.nr +++ b/noir/src/common/simple_regex.nr @@ -58,6 +58,15 @@ fn check_accept_state( accept_state_reached } +/** + * SimpleRegex matching function + * Regex: a*b + * @param in_haystack - The input haystack to search from + * @param match_start - The start index in the haystack for the subarray to match from + * @param match_length - The length of the subarray to extract from haystack + * @param current_states - The current states of the NFA at each index in the match subarray + * @param next_states - The next states of the NFA at each index in the match subarray + */ pub fn regex_match( in_haystack: [u8; MAX_HAYSTACK_LEN], match_start: u32, @@ -65,7 +74,6 @@ pub fn regex_match( current_states: [Field; MAX_MATCH_LEN], next_states: [Field; MAX_MATCH_LEN], ) { - // regex:"a*b" // resize haystack let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length); @@ -92,5 +100,6 @@ pub fn regex_match( ); } assert(reached_end_state == 0, "Did not reach a valid end state"); + } diff --git a/noir/src/common/subject_all_regex.nr b/noir/src/common/subject_all_regex.nr index 99ba59f..c7611c7 100644 --- a/noir/src/common/subject_all_regex.nr +++ b/noir/src/common/subject_all_regex.nr @@ -61,6 +61,19 @@ fn check_accept_state( accept_state_reached } +/** + * SubjectAllRegex matching function + * Regex: (?:\r\n|^)subject:([a-z]+)\r\n + * @param in_haystack - The input haystack to search from + * @param match_start - The start index in the haystack for the subarray to match from + * @param match_length - The length of the subarray to extract from haystack + * @param current_states - The current states of the NFA at each index in the match subarray + * @param next_states - The next states of the NFA at each index in the match subarray + * @param capture_group_ids - The ids of the capture groups in the match subarray + * @param capture_group_starts - The start positions of the capture groups in the match subarray + * @param capture_group_start_indices - The start indices of the capture groups in the match subarray + * @return - tuple of substring captures as dictated by the regular expression + */ pub fn regex_match( in_haystack: [u8; MAX_HAYSTACK_LEN], match_start: u32, @@ -71,7 +84,6 @@ pub fn regex_match( capture_group_starts: [Field; MAX_MATCH_LEN], capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS], ) -> (BoundedVec) { - // regex:"(?:\r\n|^)subject:([a-z]+)\r\n" // resize haystack let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length); @@ -100,13 +112,15 @@ pub fn regex_match( ); } assert(reached_end_state == 0, "Did not reach a valid end state"); - // Capture Group 1 - let capture_1 = capture_substring::( - haystack, - capture_group_ids, - capture_group_starts, - capture_group_start_indices[0], - ); + + // Capture Group 1 + let capture_1 = capture_substring::( + haystack, + capture_group_ids, + capture_group_starts, + capture_group_start_indices[0], + ); + (capture_1) } diff --git a/noir/src/common/timestamp_regex.nr b/noir/src/common/timestamp_regex.nr index 0fdac84..73c385d 100644 --- a/noir/src/common/timestamp_regex.nr +++ b/noir/src/common/timestamp_regex.nr @@ -41,7 +41,7 @@ fn check_start_state(start_state: Field) { */ fn check_accept_state( next_state: Field, - haystack_index: Field, + haystack_index: Field, asserted_match_length: Field ) -> Field { // check if the next state is an accept state @@ -61,6 +61,19 @@ fn check_accept_state( accept_state_reached } +/** + * TimestampRegex matching function + * Regex: (?:\r\n|^)dkim-signature:(?:[a-z]+=[^;]+; )+t=([0-9]+); + * @param in_haystack - The input haystack to search from + * @param match_start - The start index in the haystack for the subarray to match from + * @param match_length - The length of the subarray to extract from haystack + * @param current_states - The current states of the NFA at each index in the match subarray + * @param next_states - The next states of the NFA at each index in the match subarray + * @param capture_group_ids - The ids of the capture groups in the match subarray + * @param capture_group_starts - The start positions of the capture groups in the match subarray + * @param capture_group_start_indices - The start indices of the capture groups in the match subarray + * @return - tuple of substring captures as dictated by the regular expression + */ pub fn regex_match( in_haystack: [u8; MAX_HAYSTACK_LEN], match_start: u32, @@ -71,9 +84,9 @@ pub fn regex_match( capture_group_starts: [Field; MAX_MATCH_LEN], capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS], ) -> (BoundedVec) { - // regex:"(?:\r\n|^)dkim-signature:(?:[a-z]+=[^;]+; )+t=([0-9]+);" // resize haystack let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length); + let mut reached_end_state = 1; check_start_state(current_states[0]); for i in 0..MAX_MATCH_LEN-1 { @@ -99,13 +112,15 @@ pub fn regex_match( ); } assert(reached_end_state == 0, "Did not reach a valid end state"); - // Capture Group 1 - let capture_1 = capture_substring::( - haystack, - capture_group_ids, - capture_group_starts, - capture_group_start_indices[0], - ); + + // Capture Group 1 + let capture_1 = capture_substring::( + haystack, + capture_group_ids, + capture_group_starts, + capture_group_start_indices[0], + ); + (capture_1) } diff --git a/noir/src/common/to_all_regex.nr b/noir/src/common/to_all_regex.nr index 71dccce..6f6f331 100644 --- a/noir/src/common/to_all_regex.nr +++ b/noir/src/common/to_all_regex.nr @@ -61,6 +61,19 @@ fn check_accept_state( accept_state_reached } +/** + * ToAllRegex matching function + * Regex: (?:\r\n|^)to:([^\r\n]+)\r\n + * @param in_haystack - The input haystack to search from + * @param match_start - The start index in the haystack for the subarray to match from + * @param match_length - The length of the subarray to extract from haystack + * @param current_states - The current states of the NFA at each index in the match subarray + * @param next_states - The next states of the NFA at each index in the match subarray + * @param capture_group_ids - The ids of the capture groups in the match subarray + * @param capture_group_starts - The start positions of the capture groups in the match subarray + * @param capture_group_start_indices - The start indices of the capture groups in the match subarray + * @return - tuple of substring captures as dictated by the regular expression + */ pub fn regex_match( in_haystack: [u8; MAX_HAYSTACK_LEN], match_start: u32, @@ -71,7 +84,6 @@ pub fn regex_match( capture_group_starts: [Field; MAX_MATCH_LEN], capture_group_start_indices: [Field; NUM_CAPTURE_GROUPS], ) -> (BoundedVec) { - // regex:"(?:\r\n|^)to:([^\r\n]+)\r\n" // resize haystack let haystack: [u8; MAX_MATCH_LEN] = select_subarray(in_haystack, match_start, match_length); @@ -100,13 +112,15 @@ pub fn regex_match( ); } assert(reached_end_state == 0, "Did not reach a valid end state"); - // Capture Group 1 - let capture_1 = capture_substring::( - haystack, - capture_group_ids, - capture_group_starts, - capture_group_start_indices[0], - ); + + // Capture Group 1 + let capture_1 = capture_substring::( + haystack, + capture_group_ids, + capture_group_starts, + capture_group_start_indices[0], + ); + (capture_1) } diff --git a/noir/src/utils/captures.nr b/noir/src/utils/captures.nr index 72e0e08..5687f63 100644 --- a/noir/src/utils/captures.nr +++ b/noir/src/utils/captures.nr @@ -128,7 +128,6 @@ fn substring_from_mask( start_index: Field, ) -> BoundedVec { let substring = unsafe { __substring_from_mask(haystack, mask, start_index) }; - println(f"substring: {substring}"); let mut length = 0; // check mask[start_index - 1] = 0