Fix : Updating markdown_validator example

Issue #183 reported that the markdown_validator example structure needs to be updated, this commit includes : - adding A config/ directory for YAML configuration files (agents.yaml, tasks.yaml). - adding dedicated tools/ folder for any helper utilities, such as markdownTools.py . - adding The crew.py module, which contains the logic for the MarkDownValidatorCrew. -adding main.py file, which now serves as the entry point for the project, and includes run and train functions.
2026-01-10 06:17:58 -05:00 · 2024-10-23 20:43:56 +03:00
parent 660c7dbdab
commit c4e8cb9030
12 changed files with 5719 additions and 1021 deletions
--- a/markdown_validator/.env.example
+++ b/markdown_validator/.env.example
@@ -1,8 +1,2 @@
-# Using OpenAI's API
-OPENAI_API_KEY="sk-..."
-MODEL_NAME="gpt-3.5-turbo"
-
-# Using LLM Studio's API
-# MODEL_NAME='oh-2.5m7b-q51'
-# OPENAI_API_BASE_URL="http://localhost:8000/v1"
-# OPENAI_API_KEY=local
+SERPER_API_KEY=key_here
+OPENAI_API_KEY=key_here
--- a/markdown_validator/MarkdownTools.py
+++ b/markdown_validator/MarkdownTools.py
@@ -1,32 +0,0 @@
-import os
-import sys
-from langchain.tools import tool
-from pymarkdown.api import PyMarkdownApi, PyMarkdownApiException
-
-@tool("markdown_validation_tool")
-def markdown_validation_tool(file_path: str) -> str:
-    """
-    A tool to review files for markdown syntax errors.
-
-    Parameters:
-    - file_path: The path to the markdown file to be reviewed.
-
-    Returns:
-    - validation_results: A list of validation results 
-    and suggestions on how to fix them.
-    """
-    
-    print("\n\nValidating Markdown syntax...\n\n" + file_path)
-
-    scan_result = None
-    try:
-        if not (os.path.exists(file_path)):
-           return "Could not validate file. The provided file path does not exist."
-
-        scan_result = PyMarkdownApi().scan_path(file_path.rstrip().lstrip())
-        results = str(scan_result)    
-        return results  # Return the reviewed document
-    except PyMarkdownApiException as this_exception:
-        print(f"API Exception: {this_exception}", file=sys.stderr)
-        return f"API Exception: {str(this_exception)}"
-  
--- a/markdown_validator/README.md
+++ b/markdown_validator/README.md
@@ -9,10 +9,12 @@ This example uses the OpenAI API to call a model. This can be through a locally
 =======
 - **Configure Environment**: Rename `.env.example` to `.env` and set up the environment variables the model, endpoint url, and api key.
 - **Install Dependencies**: Run `poetry install --no-root`.
+- **Install Dependencies**: Run `poetry lock`.
 - **Execute the Script**: Run `python main.py README.md` to see a list of recommended changes to this document.

 ## Details & Explanation
- **Running the Script**: Execute `python main.py <path to markdown file>`. The script will leverage the CrewAI framework to process the specified file and return a list of changes.
+- **Running the Script**: Execute `poetry run markdown_validator {filename}`. The script will leverage the CrewAI framework to process the specified file and return a list of changes.
+- **Running the Script with agent training**: Execute `poetry run train {number_of_iterations} {filename}`. The script will leverage the CrewAI framework to process the specified file and return a list of changes, and updates the changes according to the user's feedback.

 ## License
 This project is released under the MIT License.
--- a/markdown_validator/main.py
+++ b/markdown_validator/main.py
@@ -1,86 +0,0 @@
-import sys
-from crewai import Agent, Task
-import os
-from dotenv import load_dotenv
-from langchain.tools import tool
-from langchain.chat_models.openai import ChatOpenAI
-from pymarkdown.api import PyMarkdownApi, PyMarkdownApiException
-from MarkdownTools import markdown_validation_tool
-
-load_dotenv()
-
-defalut_llm = ChatOpenAI(openai_api_base=os.environ.get("OPENAI_API_BASE_URL", "https://api.openai.com/v1"),
-                        openai_api_key=os.environ.get("OPENAI_API_KEY"),
-                        temperature=0.1,                        
-                        model_name=os.environ.get("MODEL_NAME", "gpt-3.5-turbo"),
-                        top_p=0.3)
-
-
-
-def process_markdown_document(filename):
-    """
-    Processes a markdown document by reviewing its syntax validation 
-    results and providing feedback on necessary changes.
-
-    Args:
-        filename (str): The path to the markdown file to be processed.
-
-    Returns:
-        str: The list of recommended changes to make to the document.
-
-    """
-
-    # Define general agent
-    general_agent  = Agent(role='Requirements Manager',
-                    goal="""Provide a detailed list of the markdown 
-                            linting results. Give a summary with actionable 
-                            tasks to address the validation results. Write your 
-                            response as if you were handing it to a developer 
-                            to fix the issues.
-                            DO NOT provide examples of how to fix the issues or
-                            recommend other tools to use.""",
-                    backstory="""You are an expert business analyst 
-					and software QA specialist. You provide high quality, 
-                    thorough, insightful and actionable feedback via 
-                    detailed list of changes and actionable tasks.""",
-                    allow_delegation=False, 
-                    verbose=True,
-                    tools=[markdown_validation_tool],
-                    llm=defalut_llm)
-
-
-    # Define Tasks Using Crew Tools
-    syntax_review_task = Task(description=f"""
-			Use the markdown_validation_tool to review 
-			the file(s) at this path: {filename}
-            
-			Be sure to pass only the file path to the markdown_validation_tool.
-			Use the following format to call the markdown_validation_tool:
-			Do I need to use a tool? Yes
-			Action: markdown_validation_tool
-			Action Input: {filename}
-
-			Get the validation results from the tool 
-			and then summarize it into a list of changes
-			the developer should make to the document.
-            DO NOT recommend ways to update the document.
-            DO NOT change any of the content of the document or
-            add content to it. It is critical to your task to
-            only respond with a list of changes.
-			
-			If you already know the answer or if you do not need 
-			to use a tool, return it as your Final Answer.""",
-            agent=general_agent)
-    
-    updated_markdown = syntax_review_task.execute()
-
-    return updated_markdown
-
-# If called directly from the command line take the first argument as the filename
-if __name__ == "__main__":
-
-    if len(sys.argv) > 1:
-        filename = sys.argv[1]
-        processed_document = process_markdown_document(filename)
-        print(processed_document)
-
--- a/markdown_validator/poetry.lock
+++ b/markdown_validator/poetry.lock
--- a/markdown_validator/pyproject.toml
+++ b/markdown_validator/pyproject.toml
@@ -1,12 +1,11 @@
 [tool.poetry]
-name = "markdown-validation-crew"
+name = "markdown_validator"
 version = "0.1.0"
 description = ""
 authors = ["ITLackey <itlackey@gmail.com>"]
-
 [tool.poetry.dependencies]
 python = ">=3.10.0,<3.12"
-crewai = "^0.11.0"
+crewai = "^0.75.0"
 python-dotenv = "1.0.0"
 markdown = "3.4.3"
 pymarkdownlnt = "0.9.15"
@@ -16,6 +15,9 @@ pymarkdownlnt = "0.9.15"
 useLibraryCodeForTypes = true
 exclude = [".cache"]

+[tool.poetry.scripts]
+markdown_validator = "markdown_validator.main:run"
+train = "markdown_validator.main:train"
 [tool.ruff]
 # https://beta.ruff.rs/docs/configuration/
 select = ['E', 'W', 'F', 'I', 'B', 'C4', 'ARG', 'SIM']
--- a/markdown_validator/src/markdown_validator/init.py
+++ b/markdown_validator/src/markdown_validator/init.py
--- a/markdown_validator/src/markdown_validator/config/agents.yaml
+++ b/markdown_validator/src/markdown_validator/config/agents.yaml
@@ -0,0 +1,11 @@
+Requirements_Manager:
+  role: >
+    Requirements Manager
+  goal: >
+    Provide a detailed list of the markdown linting results. 
+    Give a summary with actionable tasks to address the validation results. 
+    Write your response as if you were handing it to a developer to fix the issues. 
+    DO NOT provide examples of how to fix the issues or recommend other tools to use.
+  backstory: >
+    You are an expert business analyst and software QA specialist. 
+    You provide high quality, thorough, insightful, and actionable feedback via a detailed list of changes and actionable tasks.
--- a/markdown_validator/src/markdown_validator/config/tasks.yaml
+++ b/markdown_validator/src/markdown_validator/config/tasks.yaml
@@ -0,0 +1,19 @@
+syntax_review_task:
+  description: >
+    Use the markdown_validation_tool to review the file(s) at this path: {filename}.
+    Be sure to pass only the file path to the markdown_validation_tool.
+    Use the following format to call the markdown_validation_tool:
+    Do I need to use a tool? Yes
+    Action: markdown_validation_tool
+    Action Input: {filename}
+
+    Get the validation results from the tool and then summarize it into a list of changes
+    the developer should make to the document.
+    DO NOT recommend ways to update the document.
+    DO NOT change any of the content of the document or add content to it. 
+    It is critical to your task to only respond with a list of changes.
+
+    If you already know the answer or if you do not need to use a tool, 
+    return it as your Final Answer.
+  expected_output: >
+    A list of changes the developer should make to the document based on the markdown validation results.
--- a/markdown_validator/src/markdown_validator/crew.py
+++ b/markdown_validator/src/markdown_validator/crew.py
@@ -0,0 +1,36 @@
+from crewai import Agent, Crew, Process, Task
+from crewai.project import CrewBase, agent, crew, task
+from markdown_validator.tools.markdownTools import markdown_validation_tool
+
+
+@CrewBase
+class MarkDownValidatorCrew():
+    """MarkDownValidatorCrew crew"""
+    agents_config = 'config/agents.yaml'
+    tasks_config = 'config/tasks.yaml'
+
+    @agent
+    def RequirementsManager(self) -> Agent:
+        return Agent(
+            config=self.agents_config['Requirements_Manager'],
+            tools=[markdown_validation_tool],
+            allow_delegation=False,
+            verbose=False
+        )
+
+    @task
+    def syntax_review_task(self) -> Task:
+        return Task(
+            config=self.tasks_config['syntax_review_task'],
+            agent=self.RequirementsManager()
+        )
+
+    @crew
+    def crew(self) -> Crew:
+        """Creates the MarkDownValidatorCrew crew"""
+        return Crew(
+            agents=self.agents,
+            tasks=self.tasks,
+            process=Process.sequential,
+            verbose=False,
+        )
--- a/markdown_validator/src/markdown_validator/main.py
+++ b/markdown_validator/src/markdown_validator/main.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+import sys
+import os
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+from markdown_validator.crew import MarkDownValidatorCrew
+
+# Load environment variables from .env file
+load_dotenv()
+
+# Initialize the OpenAI LLM
+default_llm = ChatOpenAI(
+    openai_api_base=os.environ.get("OPENAI_API_BASE_URL", "https://api.openai.com/v1"),
+    openai_api_key=os.environ.get("OPENAI_API_KEY"),
+    temperature=0.1,
+    model_name=os.environ.get("MODEL_NAME", "gpt-4o-mini"),
+    top_p=0.3
+)
+
+
+def run():
+    """
+    Run the markdown validation crew to analyze the markdown file.
+    """
+    # Get the input markdown file from command line arguments
+    inputs = {
+        'query': 'Please provide the markdown file to analyze:',
+        'filename': sys.argv[1] if len(sys.argv) > 1 else None,  # Expect 'filename' key
+    }
+
+    # Check if the markdown file path is provided
+    if inputs['filename']:
+        print(f"Starting markdown validation for file: {inputs['filename']}")
+        crewResult = MarkDownValidatorCrew().crew().kickoff(inputs=inputs)
+        print("Markdown validation completed")
+        return crewResult
+    else:
+        raise ValueError("Error: No markdown file provided. Please provide a file path as a command-line argument.")
+
+
+def train():
+    """
+    Train the markdown validator crew for a given number of iterations.
+    """
+    # Get the number of iterations and markdown file path from command line arguments
+    inputs = {
+        'query': 'Training the markdown validation model.',
+        'filename': sys.argv[2] if len(sys.argv) > 2 else None,  # Expect 'filename' key
+    }
+
+    # Check if the markdown file path is provided
+    if inputs['filename']:
+        try:
+            print(f"Starting training for file: {inputs['filename']}")
+            MarkDownValidatorCrew().crew().train(n_iterations=int(sys.argv[1]), filename=inputs['filename'])
+            print("Training completed successfully.")
+        except Exception as e1:
+            raise Exception(f"An error occurred while training the crew: {e1}")
+    else:
+        raise ValueError(
+            "Error: No markdown file provided for training. Please provide the number of iterations and a file path.")
+
+
+if __name__ == "__main__":
+    print("## Welcome to Markdown Validator Crew")
+    print('-------------------------------------')
+
+    try:
+        result = run()
+        print("\n\n########################")
+        print("## Validation Report")
+        print("########################\n")
+        print(f"Final Recommendations: {result}")
+    except Exception as e:
+        print(f"An error occurred: {e}")
--- a/markdown_validator/src/markdown_validator/tools/markdownTools.py
+++ b/markdown_validator/src/markdown_validator/tools/markdownTools.py
@@ -0,0 +1,54 @@
+import os
+from langchain.tools import tool
+from pymarkdown.api import PyMarkdownApi, PyMarkdownApiException
+
+
+@tool("markdown_validation_tool")
+def markdown_validation_tool(file_path: str) -> str:
+    """
+    A tool to review files for markdown syntax errors.
+
+    Parameters:
+    - file_path: The path to the markdown file to be reviewed.
+
+    Returns:
+    - validation_results: A formatted string of validation results or summary.
+    """
+
+    try:
+        if not os.path.exists(file_path):
+            return "Error: The provided file path does not exist."
+
+        # Perform the markdown scan
+
+        scan_result = PyMarkdownApi().scan_path(file_path.strip())
+
+        # Always return formatted scan results
+        return format_scan_result(scan_result)
+
+    except PyMarkdownApiException as this_exception:
+        return f"API Exception: {str(this_exception)}"
+
+
+def format_scan_result(scan_result) -> str:
+    """
+    Format the PyMarkdownApi scan result.
+
+    Parameters:
+    - scan_result: The result from the PyMarkdownApi scan.
+
+    Returns:
+    - A formatted string summarizing the issues found or a simple success message.
+    """
+    if not scan_result.scan_failures:
+        return "No markdown validation issues found."
+
+    # Format only essential information
+    output = []
+    for failure in scan_result.scan_failures:
+        output.append(
+            f"File: {failure.scan_file}, Line: {failure.line_number}, "
+            f"Rule: {failure.rule_id} ({failure.rule_name}) - {failure.rule_description}"
+        )
+
+    return "\n".join(output)