Fix : Updating markdown_validator example

Issue #183 reported that the markdown_validator example structure needs to be updated, this commit includes :
- adding A config/ directory for YAML configuration files (agents.yaml, tasks.yaml).
- adding dedicated tools/ folder for any helper utilities, such as markdownTools.py .
- adding The crew.py module, which contains the logic for the MarkDownValidatorCrew.
-adding main.py file, which now serves as the entry point for the project, and includes run and train functions.
This commit is contained in:
BasharAssaf
2024-10-23 20:43:56 +03:00
parent 660c7dbdab
commit c4e8cb9030
12 changed files with 5719 additions and 1021 deletions

View File

@@ -1,8 +1,2 @@
# Using OpenAI's API
OPENAI_API_KEY="sk-..."
MODEL_NAME="gpt-3.5-turbo"
# Using LLM Studio's API
# MODEL_NAME='oh-2.5m7b-q51'
# OPENAI_API_BASE_URL="http://localhost:8000/v1"
# OPENAI_API_KEY=local
SERPER_API_KEY=key_here
OPENAI_API_KEY=key_here

View File

@@ -1,32 +0,0 @@
import os
import sys
from langchain.tools import tool
from pymarkdown.api import PyMarkdownApi, PyMarkdownApiException
@tool("markdown_validation_tool")
def markdown_validation_tool(file_path: str) -> str:
"""
A tool to review files for markdown syntax errors.
Parameters:
- file_path: The path to the markdown file to be reviewed.
Returns:
- validation_results: A list of validation results
and suggestions on how to fix them.
"""
print("\n\nValidating Markdown syntax...\n\n" + file_path)
scan_result = None
try:
if not (os.path.exists(file_path)):
return "Could not validate file. The provided file path does not exist."
scan_result = PyMarkdownApi().scan_path(file_path.rstrip().lstrip())
results = str(scan_result)
return results # Return the reviewed document
except PyMarkdownApiException as this_exception:
print(f"API Exception: {this_exception}", file=sys.stderr)
return f"API Exception: {str(this_exception)}"

View File

@@ -9,10 +9,12 @@ This example uses the OpenAI API to call a model. This can be through a locally
=======
- **Configure Environment**: Rename `.env.example` to `.env` and set up the environment variables the model, endpoint url, and api key.
- **Install Dependencies**: Run `poetry install --no-root`.
- **Install Dependencies**: Run `poetry lock`.
- **Execute the Script**: Run `python main.py README.md` to see a list of recommended changes to this document.
## Details & Explanation
- **Running the Script**: Execute `python main.py <path to markdown file>`. The script will leverage the CrewAI framework to process the specified file and return a list of changes.
- **Running the Script**: Execute `poetry run markdown_validator {filename}`. The script will leverage the CrewAI framework to process the specified file and return a list of changes.
- **Running the Script with agent training**: Execute `poetry run train {number_of_iterations} {filename}`. The script will leverage the CrewAI framework to process the specified file and return a list of changes, and updates the changes according to the user's feedback.
## License
This project is released under the MIT License.

View File

@@ -1,86 +0,0 @@
import sys
from crewai import Agent, Task
import os
from dotenv import load_dotenv
from langchain.tools import tool
from langchain.chat_models.openai import ChatOpenAI
from pymarkdown.api import PyMarkdownApi, PyMarkdownApiException
from MarkdownTools import markdown_validation_tool
load_dotenv()
defalut_llm = ChatOpenAI(openai_api_base=os.environ.get("OPENAI_API_BASE_URL", "https://api.openai.com/v1"),
openai_api_key=os.environ.get("OPENAI_API_KEY"),
temperature=0.1,
model_name=os.environ.get("MODEL_NAME", "gpt-3.5-turbo"),
top_p=0.3)
def process_markdown_document(filename):
"""
Processes a markdown document by reviewing its syntax validation
results and providing feedback on necessary changes.
Args:
filename (str): The path to the markdown file to be processed.
Returns:
str: The list of recommended changes to make to the document.
"""
# Define general agent
general_agent = Agent(role='Requirements Manager',
goal="""Provide a detailed list of the markdown
linting results. Give a summary with actionable
tasks to address the validation results. Write your
response as if you were handing it to a developer
to fix the issues.
DO NOT provide examples of how to fix the issues or
recommend other tools to use.""",
backstory="""You are an expert business analyst
and software QA specialist. You provide high quality,
thorough, insightful and actionable feedback via
detailed list of changes and actionable tasks.""",
allow_delegation=False,
verbose=True,
tools=[markdown_validation_tool],
llm=defalut_llm)
# Define Tasks Using Crew Tools
syntax_review_task = Task(description=f"""
Use the markdown_validation_tool to review
the file(s) at this path: {filename}
Be sure to pass only the file path to the markdown_validation_tool.
Use the following format to call the markdown_validation_tool:
Do I need to use a tool? Yes
Action: markdown_validation_tool
Action Input: {filename}
Get the validation results from the tool
and then summarize it into a list of changes
the developer should make to the document.
DO NOT recommend ways to update the document.
DO NOT change any of the content of the document or
add content to it. It is critical to your task to
only respond with a list of changes.
If you already know the answer or if you do not need
to use a tool, return it as your Final Answer.""",
agent=general_agent)
updated_markdown = syntax_review_task.execute()
return updated_markdown
# If called directly from the command line take the first argument as the filename
if __name__ == "__main__":
if len(sys.argv) > 1:
filename = sys.argv[1]
processed_document = process_markdown_document(filename)
print(processed_document)

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +1,11 @@
[tool.poetry]
name = "markdown-validation-crew"
name = "markdown_validator"
version = "0.1.0"
description = ""
authors = ["ITLackey <itlackey@gmail.com>"]
[tool.poetry.dependencies]
python = ">=3.10.0,<3.12"
crewai = "^0.11.0"
crewai = "^0.75.0"
python-dotenv = "1.0.0"
markdown = "3.4.3"
pymarkdownlnt = "0.9.15"
@@ -16,6 +15,9 @@ pymarkdownlnt = "0.9.15"
useLibraryCodeForTypes = true
exclude = [".cache"]
[tool.poetry.scripts]
markdown_validator = "markdown_validator.main:run"
train = "markdown_validator.main:train"
[tool.ruff]
# https://beta.ruff.rs/docs/configuration/
select = ['E', 'W', 'F', 'I', 'B', 'C4', 'ARG', 'SIM']

View File

@@ -0,0 +1,11 @@
Requirements_Manager:
role: >
Requirements Manager
goal: >
Provide a detailed list of the markdown linting results.
Give a summary with actionable tasks to address the validation results.
Write your response as if you were handing it to a developer to fix the issues.
DO NOT provide examples of how to fix the issues or recommend other tools to use.
backstory: >
You are an expert business analyst and software QA specialist.
You provide high quality, thorough, insightful, and actionable feedback via a detailed list of changes and actionable tasks.

View File

@@ -0,0 +1,19 @@
syntax_review_task:
description: >
Use the markdown_validation_tool to review the file(s) at this path: {filename}.
Be sure to pass only the file path to the markdown_validation_tool.
Use the following format to call the markdown_validation_tool:
Do I need to use a tool? Yes
Action: markdown_validation_tool
Action Input: {filename}
Get the validation results from the tool and then summarize it into a list of changes
the developer should make to the document.
DO NOT recommend ways to update the document.
DO NOT change any of the content of the document or add content to it.
It is critical to your task to only respond with a list of changes.
If you already know the answer or if you do not need to use a tool,
return it as your Final Answer.
expected_output: >
A list of changes the developer should make to the document based on the markdown validation results.

View File

@@ -0,0 +1,36 @@
from crewai import Agent, Crew, Process, Task
from crewai.project import CrewBase, agent, crew, task
from markdown_validator.tools.markdownTools import markdown_validation_tool
@CrewBase
class MarkDownValidatorCrew():
"""MarkDownValidatorCrew crew"""
agents_config = 'config/agents.yaml'
tasks_config = 'config/tasks.yaml'
@agent
def RequirementsManager(self) -> Agent:
return Agent(
config=self.agents_config['Requirements_Manager'],
tools=[markdown_validation_tool],
allow_delegation=False,
verbose=False
)
@task
def syntax_review_task(self) -> Task:
return Task(
config=self.tasks_config['syntax_review_task'],
agent=self.RequirementsManager()
)
@crew
def crew(self) -> Crew:
"""Creates the MarkDownValidatorCrew crew"""
return Crew(
agents=self.agents,
tasks=self.tasks,
process=Process.sequential,
verbose=False,
)

View File

@@ -0,0 +1,75 @@
#!/usr/bin/env python
import sys
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from markdown_validator.crew import MarkDownValidatorCrew
# Load environment variables from .env file
load_dotenv()
# Initialize the OpenAI LLM
default_llm = ChatOpenAI(
openai_api_base=os.environ.get("OPENAI_API_BASE_URL", "https://api.openai.com/v1"),
openai_api_key=os.environ.get("OPENAI_API_KEY"),
temperature=0.1,
model_name=os.environ.get("MODEL_NAME", "gpt-4o-mini"),
top_p=0.3
)
def run():
"""
Run the markdown validation crew to analyze the markdown file.
"""
# Get the input markdown file from command line arguments
inputs = {
'query': 'Please provide the markdown file to analyze:',
'filename': sys.argv[1] if len(sys.argv) > 1 else None, # Expect 'filename' key
}
# Check if the markdown file path is provided
if inputs['filename']:
print(f"Starting markdown validation for file: {inputs['filename']}")
crewResult = MarkDownValidatorCrew().crew().kickoff(inputs=inputs)
print("Markdown validation completed")
return crewResult
else:
raise ValueError("Error: No markdown file provided. Please provide a file path as a command-line argument.")
def train():
"""
Train the markdown validator crew for a given number of iterations.
"""
# Get the number of iterations and markdown file path from command line arguments
inputs = {
'query': 'Training the markdown validation model.',
'filename': sys.argv[2] if len(sys.argv) > 2 else None, # Expect 'filename' key
}
# Check if the markdown file path is provided
if inputs['filename']:
try:
print(f"Starting training for file: {inputs['filename']}")
MarkDownValidatorCrew().crew().train(n_iterations=int(sys.argv[1]), filename=inputs['filename'])
print("Training completed successfully.")
except Exception as e1:
raise Exception(f"An error occurred while training the crew: {e1}")
else:
raise ValueError(
"Error: No markdown file provided for training. Please provide the number of iterations and a file path.")
if __name__ == "__main__":
print("## Welcome to Markdown Validator Crew")
print('-------------------------------------')
try:
result = run()
print("\n\n########################")
print("## Validation Report")
print("########################\n")
print(f"Final Recommendations: {result}")
except Exception as e:
print(f"An error occurred: {e}")

View File

@@ -0,0 +1,54 @@
import os
from langchain.tools import tool
from pymarkdown.api import PyMarkdownApi, PyMarkdownApiException
@tool("markdown_validation_tool")
def markdown_validation_tool(file_path: str) -> str:
"""
A tool to review files for markdown syntax errors.
Parameters:
- file_path: The path to the markdown file to be reviewed.
Returns:
- validation_results: A formatted string of validation results or summary.
"""
try:
if not os.path.exists(file_path):
return "Error: The provided file path does not exist."
# Perform the markdown scan
scan_result = PyMarkdownApi().scan_path(file_path.strip())
# Always return formatted scan results
return format_scan_result(scan_result)
except PyMarkdownApiException as this_exception:
return f"API Exception: {str(this_exception)}"
def format_scan_result(scan_result) -> str:
"""
Format the PyMarkdownApi scan result.
Parameters:
- scan_result: The result from the PyMarkdownApi scan.
Returns:
- A formatted string summarizing the issues found or a simple success message.
"""
if not scan_result.scan_failures:
return "No markdown validation issues found."
# Format only essential information
output = []
for failure in scan_result.scan_failures:
output.append(
f"File: {failure.scan_file}, Line: {failure.line_number}, "
f"Rule: {failure.rule_id} ({failure.rule_name}) - {failure.rule_description}"
)
return "\n".join(output)