Merge branch 'master' into aarushikansal/finetuning-blocks

fix file creation
finetune block
2026-02-12 15:55:03 -05:00 · 2024-08-04 22:19:37 -07:00 · 2024-07-23 22:09:23 +01:00 · 2024-07-23 21:23:27 +01:00
2 changed files with 104 additions and 0 deletions
--- a/rnd/autogpt_server/autogpt_server/blocks/finetune.py
+++ b/rnd/autogpt_server/autogpt_server/blocks/finetune.py
@@ -0,0 +1,103 @@
 from typing import Any, List
 import json
 import tempfile
 import openai
 from autogpt_server.data.block import Block, BlockCategory, BlockOutput, BlockSchema
 from autogpt_server.data.model import BlockSecret, SecretField
 class FinetuneBlock(Block):
    class Input(BlockSchema):
        api_key: BlockSecret = SecretField(key="openai_api_key", description="OpenAI API key")
        model: str = "gpt-3.5-turbo"
        training_data: str
        validation_split: float = 0.2
        n_epochs: int = 3
        batch_size: int = 1
        learning_rate_multiplier: float = 0.3
    class Output(BlockSchema):
        job_id: str
        status: str
        error: str
    def __init__(self):
        super().__init__(
            id="b9a8c7d6-e5f4-3g2h-1i0j-k9l8m7n6o5p4",
            description="Create and start an OpenAI fine-tuning job with JSONL formatted data",
            categories={BlockCategory.LLM, BlockCategory.TRAINING},
            input_schema=FinetuneBlock.Input,
            output_schema=FinetuneBlock.Output,
            test_input={
                "api_key": "sk-test123",
                "model": "gpt-3.5-turbo",
                "training_data": '{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What\'s the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn\'t know that already."}]}\n{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote \'Romeo and Juliet\'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]}'
            },
            test_output=[("job_id", "ft-abc123"), ("status", "created")],
            test_mock={"create_fine_tuning_job": lambda *args, **kwargs: {"id": "ft-abc123", "status": "created"}}
        )
    @staticmethod
    def split_data(data: str, validation_split: float) -> tuple[str, str]:
        lines = data.strip().split('\n')
        split_index = int(len(lines) * (1 - validation_split))
        return '\n'.join(lines[:split_index]), '\n'.join(lines[split_index:])
    @staticmethod
    def create_temp_file(data: str) -> str:
        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.jsonl') as temp_file:
            temp_file.write(data)
            return temp_file.name
    @staticmethod
    def create_fine_tuning_job(api_key: str, model: str, training_data: str, validation_data: str, n_epochs: int, learning_rate_multiplier: int,
                               batch_size: int) -> dict:
        openai.api_key = api_key
        training_file_path = FinetuneBlock.create_temp_file(training_data)
        validation_file_path = FinetuneBlock.create_temp_file(validation_data)
        training_file = openai.files.create(
            file=open(training_file_path, "rb"),
            purpose='fine-tune'
        )
        job_params = {
            "training_file": training_file.id,
            "model": model,
            "hyperparameters": {
                "n_epochs": n_epochs,
                "batch_size": batch_size,
                "learning_rate_multiplier": learning_rate_multiplier
            }
        }
        if validation_data:
            validation_file = openai.files.create(
                file=open(validation_file_path, "rb"),
                purpose='fine-tune'
            )
            job_params["validation_file"] = validation_file.id
        job = openai.fine_tuning.jobs.create(**job_params)
        return job
    def run(self, input_data: Input) -> BlockOutput:
        try:
            # Split data into training and validation sets
            training_data, validation_data = self.split_data(input_data.training_data, input_data.validation_split)
            job = self.create_fine_tuning_job(
                api_key=input_data.api_key.get_secret_value(),
                model=input_data.model,
                training_data=training_data,
                validation_data=validation_data,
                n_epochs=input_data.n_epochs,
                learning_rate_multiplier=input_data.learning_rate_multiplier,
                batch_size=input_data.batch_size
            )
            yield "job_id", job.id
            yield "status", job.status
        except Exception as e:
            yield "error", str(e)
--- a/rnd/autogpt_server/autogpt_server/data/block.py
+++ b/rnd/autogpt_server/autogpt_server/data/block.py
@@ -22,6 +22,7 @@ class BlockCategory(Enum):
    TEXT = "Block that processes text data."
    SEARCH = "Block that searches or extracts information from the internet."
    BASIC = "Block that performs basic operations."
    TRAINING = "Block that performs finetuning actions"
    INPUT_OUTPUT = "Block that interacts with input/output of the graph."
    def dict(self) -> dict[str, str]:
Author	SHA1	Message	Date
Nicholas Tindle	e651781e01	Merge branch 'master' into aarushikansal/finetuning-blocks	2024-08-04 22:19:37 -07:00
Aarushi	e43af9066d	fix file creation	2024-07-23 22:09:23 +01:00
Aarushi	614cfbe94f	finetune block	2024-07-23 21:23:27 +01:00