Merge pull request #16 from itlackey/feature/markdown_validator

Added a markdown validation tool example
2026-01-10 22:38:00 -05:00 · 2024-01-12 13:35:50 -03:00
parent 087b8ca5c5 a643411f29
commit e167392d8e
7 changed files with 1735 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -9,4 +9,5 @@ By [@joaomdmoura](https://x.com/joaomdmoura).
 - [Trip Planner](https://github.com/joaomdmoura/crewAI-examples/tree/main/trip_planner)
 - [Stock Analysis](https://github.com/joaomdmoura/crewAI-examples/tree/main/stock_analysis)
 - [Landing Page Generator](https://github.com/joaomdmoura/crewAI-examples/tree/main/landing_page_generator)
- [Create Instagram Post](https://github.com/joaomdmoura/crewAI-examples/tree/main/instagram_post)
+- [Create Instagram Post](https://github.com/joaomdmoura/crewAI-examples/tree/main/instagram_post)
+- [Markdown Validator](https://github.com/joaomdmoura/crewAI-examples/tree/main/markdown_validator)
--- a/markdown_validator/.env.example
+++ b/markdown_validator/.env.example
@@ -0,0 +1,3 @@
+MODEL='oh-2.5m7b-q51'
+OPENAI_API_BASE_URL="http://localhost:8000/v1" # LLM Studio
+OPENAI_API_KEY=local
--- a/markdown_validator/.gitignore
+++ b/markdown_validator/.gitignore
@@ -0,0 +1,4 @@
+.env
+.DS_Store
+__pycache__
+.venv
--- a/markdown_validator/README.md
+++ b/markdown_validator/README.md
@@ -0,0 +1,17 @@
+# AI Crew for Reviewing Markdown Syntax
+
+## Introduction
+This project is an example using the CrewAI framework to automate the process reviewing a markdown file for syntax issues. A general assistant leverages a custom tool to get a list of markdown linting errors. It then summarizes those errors into a list of changes to make to the document.
+
+## Running the Script
+This example uses the OpenAI API to call a model. This can be through a locally hosted solution like LM Studio, or the Open AI API endpoint with your API key. 
+
+- **Configure Environment**: Copy ``.env.example` and set up the environment variables the model, endpoint url, and api key.
+- **Install Dependencies**: Run `poetry install --no-root`.
+- **Execute the Script**: Run `python main.py README.md` to see a list of recommended changes to this document.
+
+## Details & Explanation
+- **Running the Script**: Execute `python main.py <path to markdown file>`. The script will leverage the CrewAI framework to process the specified file and return a list of changes.
+
+## License
+This project is released under the MIT License.
--- a/markdown_validator/main.py
+++ b/markdown_validator/main.py
@@ -0,0 +1,179 @@
+import sys
+from crewai import Agent, Task
+import os
+from dotenv import load_dotenv
+from langchain.tools import tool
+from langchain.chat_models.openai import ChatOpenAI
+from pymarkdown.api import PyMarkdownApi, PyMarkdownApiException
+
+load_dotenv()
+
+default_model_name = os.environ.get("MODEL_NAME", "gpt-3.5-turbo")
+
+
+defalut_llm = ChatOpenAI(openai_api_base=os.environ.get("OPENAI_API_BASE_URL", "https://api.openai.com/v1"),
+                        openai_api_key=os.environ.get("OPENAI_API_KEY"),
+                        temperature=0.1,                        
+                        model_name=os.environ.get("MODEL_NAME", "gpt-3.5-turbo"),
+                        top_p=0.3)
+
+
+@tool("markdown_validation_tool")
+def markdown_validation_tool(file_path: str) -> str:
+    """
+    A tool to review files for markdown syntax errors.
+
+    Parameters:
+    - file_path: The path to the markdown file to be reviewed.
+
+    Returns:
+    - validation_results: A list of validation results 
+    and suggestions on how to fix them.
+    """
+    
+    print("\n\nValidating Markdown syntax...\n\n" + file_path)
+
+    scan_result = None
+    try:
+        scan_result = PyMarkdownApi().scan_path(file_path)
+        results = str(scan_result)
+        print(results)
+        syntax_validator_agent = Agent(role='Syntax Validator',
+                                backstory="""You are an expert markdown validator. 
+								You are an expert in formatting and structure. 
+								You following formatting guidelines strictly.""",
+                                goal="""
+                                Provide a detailed list of the provided markdown 
+                                linting results. Give a summary with actionable 
+								tasks to address the validation results. Write your 
+								response as if you were handing it to a developer 
+								to fix the issues.
+                                DO NOT provide examples of how to fix the issues.
+								""", 
+                                allow_delegation=False, 
+                                verbose=True,
+                                llm=defalut_llm)
+
+        fix_syntax_task = Task(description="""Give a detailed list of the 
+                               validation results below. Be sure to to include 
+                               suggestions on how to fix the issues.
+                               \n\nValidation Results:\n\n""" + results, 
+                            agent=syntax_validator_agent)
+            
+        updated_markdown = fix_syntax_task.execute()
+
+        return updated_markdown  # Return the reviewed document
+    except PyMarkdownApiException as this_exception:
+        print(f"API Exception: {this_exception}", file=sys.stderr)
+        return f"API Exception: {str(this_exception)}"
+    
+
+
+def process_markdown_document(filename):
+    """
+    Processes a markdown document by reviewing its syntax validation 
+    results and providing feedback on necessary changes.
+
+    Args:
+        filename (str): The path to the markdown file to be processed.
+
+    Returns:
+        str: The list of recommended changes to make to the document.
+
+    """
+
+    # Define general agent
+    general_agent  = Agent(role='Requirements Manager',
+                    goal="""To use the available tools to provide 
+					execellent feedback to the team members.""",
+                    backstory="""You are an expert business analyst 
+					and software QA specialist. You provide high quality, 
+                    thorough, insightful and actionable feedback.""",
+                    allow_delegation=False, 
+                    verbose=True,
+                    tools=[markdown_validation_tool],
+                    llm=defalut_llm)
+
+
+    # Define Tasks Using Crew Tools
+    syntax_review_task = Task(description=f"""
+			Use the markdown_validation_tool to review 
+			the file(s) at this path: {filename}
+            
+			Be sure to pass only the file path to the markdown_validation_tool.
+			Use the following format to call the markdown_validation_tool:
+			Do I need to use a tool? Yes
+			Action: markdown_validation_tool
+			Action Input: {filename}
+
+			Collect the final answer from the syntax review tool 
+			and then summarize it into a list of changes
+			the developer should make to the document.
+			
+			If you already know the answer or if you do not need 
+			to use a tool, return it as your Final Answer.""",
+             agent=general_agent)
+    
+    updated_markdown = syntax_review_task.execute()
+
+    return updated_markdown
+
+
+processed_document = process_markdown_document("README.md")
+print(processed_document)
+
+# If called directly from the command line take the first argument as the filename
+if __name__ == "__main__":
+
+    if len(sys.argv) > 1:
+        filename = sys.argv[1]
+        processed_document = process_markdown_document(filename)
+        print(processed_document)
+
+
+
+
+### Example Validation Results
+
+##ikawrakow/open-hermes-2.5-mistral-7b-quantized-gguf/oh-2.5-m7b-q51.gguf
+        
+# model_name="oh-2.5m7b-q51",
+# temperature=0.1,           
+# top_p=0.3
+
+# Here is a list of changes that the developer should make to the README.md file based 
+# on the validation results from the markdown_validation_tool:
+
+# 1. Add a # at the beginning of the first line to make it a 
+# level 1 heading (e.g., "# My Project").
+# 2. Break line 3 into two or more shorter lines, 
+# as it is currently too long (127 characters).
+# 3. Break line 7 into two or more shorter lines, 
+# as it is currently too long (94 characters).
+# 4. Break line 44 into multiple shorter lines or rephrase the content to 
+# make it more concise, as it is extremely long (234 characters).
+# 5. Repeat steps 1-4 for the remaining PyMarkdownScanFailure 
+# entries in the validation results list.
+# 6. Ensure that the README.md file follows proper Markdown syntax 
+# and is well-structured, with appropriate headings, paragraphs, and lists as needed.
+# 7. Add a brief introduction to the project at the beginning of the README.md file, 
+# explaining its purpose and any key features or functionalities.
+# 8. Review the overall readability and clarity of the README.md file, 
+# making adjustments as necessary to ensure it is easy for others to understand and navigate.
+
+## TheBloke/dolphin-2.6-mistral-7b-dpo.Q4_K_M.gguf        
+# model_name="dolphin-2.6-mistral-dpo-7b-q4_k_m",
+# temperature=0.1,           
+# top_p=0.3
+        
+# The markdown validation tool has identified three issues in your README.md file. 
+# Here's a summary of the changes you should make:
+
+# 1. Add a first-line heading, such as `# Heading`, at the beginning of the README.md 
+# file to comply with Rule ID: MD041.
+# 2. Break long lines into shorter ones to ensure they don't exceed 80 characters. 
+# You can use soft wraps or add line breaks where necessary. 
+# This will help you adhere to Rule ID: MD013.
+# 3. Review each line in the README.md file and ensure they don't exceed 80 characters. 
+# If necessary, break up long lines or rephrase sentences to fit within the character 
+# limit. This will also help you comply with Rule ID: MD013.
--- a/markdown_validator/poetry.lock
+++ b/markdown_validator/poetry.lock
--- a/markdown_validator/pyproject.toml
+++ b/markdown_validator/pyproject.toml
@@ -0,0 +1,26 @@
+[tool.poetry]
+name = "markdown-validation-crew"
+version = "0.1.0"
+description = ""
+authors = ["ITLackey <itlackey@gmail.com>"]
+
+[tool.poetry.dependencies]
+python = ">=3.10.0,<3.12"
+crewai = "0.1.24"
+python-dotenv = "1.0.0"
+markdown = "3.4.3"
+pymarkdownlnt = "0.9.15"
+
+[tool.pyright]
+# https://github.com/microsoft/pyright/blob/main/docs/configuration.md
+useLibraryCodeForTypes = true
+exclude = [".cache"]
+
+[tool.ruff]
+# https://beta.ruff.rs/docs/configuration/
+select = ['E', 'W', 'F', 'I', 'B', 'C4', 'ARG', 'SIM']
+ignore = ['W291', 'W292', 'W293']
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"