mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
Extact_code can detect single-line code now (#2)
* Extact_code can detect single-line code now * Add comments for RE * Add test case and adjust UNKNOWN behavior * Remove tmp test files * Update autogen/code_utils.py --------- Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
@@ -34,24 +34,43 @@ def infer_lang(code):
|
||||
return "python"
|
||||
|
||||
|
||||
def extract_code(text: str, pattern: str = CODE_BLOCK_PATTERN) -> List[Tuple[str, str]]:
|
||||
def extract_code(
|
||||
text: str, pattern: str = CODE_BLOCK_PATTERN, detect_single_line_code: bool = False
|
||||
) -> List[Tuple[str, str]]:
|
||||
"""Extract code from a text.
|
||||
|
||||
Args:
|
||||
text (str): The text to extract code from.
|
||||
pattern (Optional, str): The regular expression pattern for finding the code block.
|
||||
pattern (str, optional): The regular expression pattern for finding the
|
||||
code block. Defaults to CODE_BLOCK_PATTERN.
|
||||
detect_single_line_code (bool, optional): Enable the new feature for
|
||||
extracting single line code. Defaults to False.
|
||||
|
||||
Returns:
|
||||
list: A list of tuples, each containing the language and the code.
|
||||
If there is no code block in the input text, the language would be "unknown".
|
||||
If there is code block but the language is not specified, the language would be "".
|
||||
"""
|
||||
# Use a regular expression to find all the code blocks
|
||||
match = re.findall(pattern, text, flags=re.DOTALL)
|
||||
# match = re.search(pattern, text, flags=re.DOTALL)
|
||||
# If a match is found, return the code
|
||||
# if match:
|
||||
# return match.group(2), match.group(1)
|
||||
# If no code block is found, return the whole text
|
||||
return match if match else [(UNKNOWN, text)]
|
||||
if not detect_single_line_code:
|
||||
match = re.findall(pattern, text, flags=re.DOTALL)
|
||||
return match if match else [(UNKNOWN, text)]
|
||||
|
||||
# Extract both multi-line and single-line code block, separated by the | operator
|
||||
# `{3}(\w+)?\s*([\s\S]*?)`{3}: Matches multi-line code blocks.
|
||||
# The (\w+)? matches the language, where the ? indicates it is optional.
|
||||
# `([^`]+)`: Matches inline code.
|
||||
code_pattern = re.compile(r"`{3}(\w+)?\s*([\s\S]*?)`{3}|`([^`]+)`")
|
||||
code_blocks = code_pattern.findall(text)
|
||||
|
||||
# Extract the individual code blocks and languages from the matched groups
|
||||
extracted = []
|
||||
for lang, group1, group2 in code_blocks:
|
||||
if group1:
|
||||
extracted.append((lang.strip(), group1.strip()))
|
||||
elif group2:
|
||||
extracted.append(("", group2.strip()))
|
||||
|
||||
return extracted
|
||||
|
||||
|
||||
# _FIND_CODE_SYS_MSG = [
|
||||
|
||||
@@ -161,10 +161,23 @@ Example:
|
||||
```
|
||||
print("hello extract code")
|
||||
```
|
||||
"""
|
||||
""",
|
||||
detect_single_line_code=False,
|
||||
)
|
||||
print(codeblocks)
|
||||
|
||||
codeblocks2 = extract_code(
|
||||
"""
|
||||
Example:
|
||||
```
|
||||
print("hello extract code")
|
||||
```
|
||||
""",
|
||||
detect_single_line_code=True,
|
||||
)
|
||||
assert codeblocks2 == codeblocks
|
||||
# import pdb; pdb.set_trace()
|
||||
|
||||
codeblocks = extract_code(
|
||||
"""
|
||||
Example:
|
||||
@@ -190,6 +203,15 @@ print(f"Text: {text}")
|
||||
codeblocks = extract_code("no code block")
|
||||
assert len(codeblocks) == 1 and codeblocks[0] == (UNKNOWN, "no code block")
|
||||
|
||||
# Disable single line code detection
|
||||
line = "Run `source setup.sh` from terminal"
|
||||
codeblocks = extract_code(line, detect_single_line_code=False)
|
||||
assert len(codeblocks) == 1 and codeblocks[0] == (UNKNOWN, line)
|
||||
|
||||
# Enable single line code detection
|
||||
codeblocks = extract_code("Run `source setup.sh` from terminal", detect_single_line_code=True)
|
||||
assert len(codeblocks) == 1 and codeblocks[0] == ("", "source setup.sh")
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform in ["darwin", "win32"],
|
||||
|
||||
Reference in New Issue
Block a user