Revert

Move shutil in
Move venv in
2026-01-20 04:28:09 -05:00 · 2024-09-17 21:00:47 -05:00 · 2024-09-17 20:55:20 -05:00 · 2024-09-17 20:51:56 -05:00 · 2024-09-17 20:46:01 -05:00 · 2024-09-17 20:39:29 -05:00
3 changed files with 328 additions and 20 deletions
--- a/rnd/autogpt_server/autogpt_server/blocks/csv.py
+++ b/rnd/autogpt_server/autogpt_server/blocks/csv.py
@@ -14,7 +14,8 @@ class ReadCsvBlock(Block):
        skip_columns: list[str] = []
    class Output(BlockSchema):
-        data: dict[str, str]
+        row: dict[str, str]
        all_data: list[dict[str, str]]
    def __init__(self):
        super().__init__(
@@ -27,8 +28,15 @@ class ReadCsvBlock(Block):
                "contents": "a, b, c\n1,2,3\n4,5,6",
            },
            test_output=[
-                ("data", {"a": "1", "b": "2", "c": "3"}),
+                ("row", {"a": "1", "b": "2", "c": "3"}),
-                ("data", {"a": "4", "b": "5", "c": "6"}),
+                ("row", {"a": "4", "b": "5", "c": "6"}),
                (
                    "all_data",
                    [
                        {"a": "1", "b": "2", "c": "3"},
                        {"a": "4", "b": "5", "c": "6"},
                    ],
                ),
            ],
        )
@@ -53,8 +61,7 @@ class ReadCsvBlock(Block):
        for _ in range(input_data.skip_rows):
            next(reader)
-        # join the data with the header
+        def process_row(row):
        for row in reader:
            data = {}
            for i, value in enumerate(row):
                if i not in input_data.skip_columns:
@@ -62,4 +69,12 @@ class ReadCsvBlock(Block):
                        data[header[i]] = value.strip() if input_data.strip else value
                    else:
                        data[str(i)] = value.strip() if input_data.strip else value
-            yield "data", data
+            return data
        all_data = []
        for row in reader:
            processed_row = process_row(row)
            all_data.append(processed_row)
            yield "row", processed_row
        yield "all_data", all_data
--- a/rnd/autogpt_server/autogpt_server/blocks/llm.py
+++ b/rnd/autogpt_server/autogpt_server/blocks/llm.py
@@ -1,6 +1,7 @@
 import logging
 from enum import Enum
-from typing import List, NamedTuple
+from json import JSONDecodeError
 from typing import Any, List, NamedTuple
 import anthropic
 import ollama
@@ -93,7 +94,7 @@ class AIStructuredResponseGeneratorBlock(Block):
        )
    class Output(BlockSchema):
-        response: dict[str, str]
+        response: dict[str, Any]
        error: str
    def __init__(self):
@@ -139,16 +140,33 @@ class AIStructuredResponseGeneratorBlock(Block):
            )
            return response.choices[0].message.content or ""
        elif provider == "anthropic":
-            sysprompt = "".join([p["content"] for p in prompt if p["role"] == "system"])
+            system_messages = [p["content"] for p in prompt if p["role"] == "system"]
-            usrprompt = [p for p in prompt if p["role"] == "user"]
+            sysprompt = " ".join(system_messages)
            messages = []
            last_role = None
            for p in prompt:
                if p["role"] in ["user", "assistant"]:
                    if p["role"] != last_role:
                        messages.append({"role": p["role"], "content": p["content"]})
                        last_role = p["role"]
                    else:
                        # If the role is the same as the last one, combine the content
                        messages[-1]["content"] += "\n" + p["content"]
            client = anthropic.Anthropic(api_key=api_key)
-            response = client.messages.create(
+            try:
-                model=model.value,
+                response = client.messages.create(
-                max_tokens=4096,
+                    model=model.value,
-                system=sysprompt,
+                    max_tokens=4096,
-                messages=usrprompt,  # type: ignore
+                    system=sysprompt,
-            )
+                    messages=messages,
-            return response.content[0].text if response.content else ""
+                )
                return response.content[0].text if response.content else ""
            except anthropic.APIError as e:
                error_message = f"Anthropic API error: {str(e)}"
                logger.error(error_message)
                raise ValueError(error_message)
        elif provider == "groq":
            client = Groq(api_key=api_key)
            response_format = {"type": "json_object"} if json_format else None
@@ -199,14 +217,16 @@ class AIStructuredResponseGeneratorBlock(Block):
        prompt.append({"role": "user", "content": input_data.prompt})
-        def parse_response(resp: str) -> tuple[dict[str, str], str | None]:
+        def parse_response(resp: str) -> tuple[dict[str, Any], str | None]:
            try:
                parsed = json.loads(resp)
                if not isinstance(parsed, dict):
                    return {}, f"Expected a dictionary, but got {type(parsed)}"
                miss_keys = set(input_data.expected_format.keys()) - set(parsed.keys())
                if miss_keys:
                    return parsed, f"Missing keys: {miss_keys}"
                return parsed, None
-            except Exception as e:
+            except JSONDecodeError as e:
                return {}, f"JSON decode error: {e}"
        logger.info(f"LLM request: {prompt}")
@@ -230,7 +250,16 @@ class AIStructuredResponseGeneratorBlock(Block):
                if input_data.expected_format:
                    parsed_dict, parsed_error = parse_response(response_text)
                    if not parsed_error:
-                        yield "response", {k: str(v) for k, v in parsed_dict.items()}
+                        yield "response", {
                            k: (
                                json.loads(v)
                                if isinstance(v, str)
                                and v.startswith("[")
                                and v.endswith("]")
                                else (", ".join(v) if isinstance(v, list) else v)
                            )
                            for k, v in parsed_dict.items()
                        }
                        return
                else:
                    yield "response", {"response": response_text}
--- a/rnd/autogpt_server/autogpt_server/blocks/sampling.py
+++ b/rnd/autogpt_server/autogpt_server/blocks/sampling.py
@@ -0,0 +1,264 @@
 import random
 from collections import defaultdict
 from enum import Enum
 from typing import Any, Dict, List, Optional, Union
 from autogpt_server.data.block import Block, BlockCategory, BlockOutput, BlockSchema
 from autogpt_server.data.model import SchemaField
 class SamplingMethod(str, Enum):
    RANDOM = "random"
    SYSTEMATIC = "systematic"
    TOP = "top"
    BOTTOM = "bottom"
    STRATIFIED = "stratified"
    WEIGHTED = "weighted"
    RESERVOIR = "reservoir"
    CLUSTER = "cluster"
 class DataSamplingBlock(Block):
    class Input(BlockSchema):
        data: Union[Dict[str, Any], List[Union[dict, List[Any]]]] = SchemaField(
            description="The dataset to sample from. Can be a single dictionary, a list of dictionaries, or a list of lists.",
            placeholder="{'id': 1, 'value': 'a'} or [{'id': 1, 'value': 'a'}, {'id': 2, 'value': 'b'}, ...]",
        )
        sample_size: int = SchemaField(
            description="The number of samples to take from the dataset.",
            placeholder="10",
            default=10,
        )
        sampling_method: SamplingMethod = SchemaField(
            description="The method to use for sampling.",
            default=SamplingMethod.RANDOM,
        )
        accumulate: bool = SchemaField(
            description="Whether to accumulate data before sampling.",
            default=False,
        )
        random_seed: Optional[int] = SchemaField(
            description="Seed for random number generator (optional).",
            default=None,
        )
        stratify_key: Optional[str] = SchemaField(
            description="Key to use for stratified sampling (required for stratified sampling).",
            default=None,
        )
        weight_key: Optional[str] = SchemaField(
            description="Key to use for weighted sampling (required for weighted sampling).",
            default=None,
        )
        cluster_key: Optional[str] = SchemaField(
            description="Key to use for cluster sampling (required for cluster sampling).",
            default=None,
        )
    class Output(BlockSchema):
        sampled_data: List[Union[dict, List[Any]]] = SchemaField(
            description="The sampled subset of the input data."
        )
        sample_indices: List[int] = SchemaField(
            description="The indices of the sampled data in the original dataset."
        )
    def __init__(self):
        super().__init__(
            id="4a448883-71fa-49cf-91cf-70d793bd7d87",
            description="This block samples data from a given dataset using various sampling methods.",
            categories={BlockCategory.LOGIC},
            input_schema=DataSamplingBlock.Input,
            output_schema=DataSamplingBlock.Output,
            test_input={
                "data": [
                    {"id": i, "value": chr(97 + i), "group": i % 3} for i in range(10)
                ],
                "sample_size": 3,
                "sampling_method": SamplingMethod.STRATIFIED,
                "accumulate": False,
                "random_seed": 42,
                "stratify_key": "group",
            },
            test_output=[
                (
                    "sampled_data",
                    [
                        {"id": 0, "value": "a", "group": 0},
                        {"id": 1, "value": "b", "group": 1},
                        {"id": 8, "value": "i", "group": 2},
                    ],
                ),
                ("sample_indices", [0, 1, 8]),
            ],
        )
        self.accumulated_data = []
    def run(self, input_data: Input) -> BlockOutput:
        if input_data.accumulate:
            if isinstance(input_data.data, dict):
                self.accumulated_data.append(input_data.data)
            elif isinstance(input_data.data, list):
                self.accumulated_data.extend(input_data.data)
            else:
                raise ValueError(f"Unsupported data type: {type(input_data.data)}")
            # If we don't have enough data yet, return without sampling
            if len(self.accumulated_data) < input_data.sample_size:
                return
            data_to_sample = self.accumulated_data
        else:
            # If not accumulating, use the input data directly
            data_to_sample = (
                input_data.data
                if isinstance(input_data.data, list)
                else [input_data.data]
            )
        if input_data.random_seed is not None:
            random.seed(input_data.random_seed)
        data_size = len(data_to_sample)
        if input_data.sample_size > data_size:
            raise ValueError(
                f"Sample size ({input_data.sample_size}) cannot be larger than the dataset size ({data_size})."
            )
        indices = []
        if input_data.sampling_method == SamplingMethod.RANDOM:
            indices = random.sample(range(data_size), input_data.sample_size)
        elif input_data.sampling_method == SamplingMethod.SYSTEMATIC:
            step = data_size // input_data.sample_size
            start = random.randint(0, step - 1)
            indices = list(range(start, data_size, step))[: input_data.sample_size]
        elif input_data.sampling_method == SamplingMethod.TOP:
            indices = list(range(input_data.sample_size))
        elif input_data.sampling_method == SamplingMethod.BOTTOM:
            indices = list(range(data_size - input_data.sample_size, data_size))
        elif input_data.sampling_method == SamplingMethod.STRATIFIED:
            if not input_data.stratify_key:
                raise ValueError(
                    "Stratify key must be provided for stratified sampling."
                )
            strata = defaultdict(list)
            for i, item in enumerate(data_to_sample):
                if isinstance(item, dict):
                    strata_value = item.get(input_data.stratify_key)
                elif hasattr(item, input_data.stratify_key):
                    strata_value = getattr(item, input_data.stratify_key)
                else:
                    raise ValueError(
                        f"Stratify key '{input_data.stratify_key}' not found in item {item}"
                    )
                if strata_value is None:
                    raise ValueError(
                        f"Stratify value for key '{input_data.stratify_key}' is None"
                    )
                strata[str(strata_value)].append(i)
            # Calculate the number of samples to take from each stratum
            stratum_sizes = {
                k: max(1, int(len(v) / data_size * input_data.sample_size))
                for k, v in strata.items()
            }
            # Adjust sizes to ensure we get exactly sample_size samples
            while sum(stratum_sizes.values()) != input_data.sample_size:
                if sum(stratum_sizes.values()) < input_data.sample_size:
                    stratum_sizes[
                        max(stratum_sizes, key=lambda k: stratum_sizes[k])
                    ] += 1
                else:
                    stratum_sizes[
                        max(stratum_sizes, key=lambda k: stratum_sizes[k])
                    ] -= 1
            for stratum, size in stratum_sizes.items():
                indices.extend(random.sample(strata[stratum], size))
        elif input_data.sampling_method == SamplingMethod.WEIGHTED:
            if not input_data.weight_key:
                raise ValueError("Weight key must be provided for weighted sampling.")
            weights = []
            for item in data_to_sample:
                if isinstance(item, dict):
                    weight = item.get(input_data.weight_key)
                elif hasattr(item, input_data.weight_key):
                    weight = getattr(item, input_data.weight_key)
                else:
                    raise ValueError(
                        f"Weight key '{input_data.weight_key}' not found in item {item}"
                    )
                if weight is None:
                    raise ValueError(
                        f"Weight value for key '{input_data.weight_key}' is None"
                    )
                try:
                    weights.append(float(weight))
                except ValueError:
                    raise ValueError(
                        f"Weight value '{weight}' cannot be converted to a number"
                    )
            if not weights:
                raise ValueError(
                    f"No valid weights found using key '{input_data.weight_key}'"
                )
            indices = random.choices(
                range(data_size), weights=weights, k=input_data.sample_size
            )
        elif input_data.sampling_method == SamplingMethod.RESERVOIR:
            indices = list(range(input_data.sample_size))
            for i in range(input_data.sample_size, data_size):
                j = random.randint(0, i)
                if j < input_data.sample_size:
                    indices[j] = i
        elif input_data.sampling_method == SamplingMethod.CLUSTER:
            if not input_data.cluster_key:
                raise ValueError("Cluster key must be provided for cluster sampling.")
            clusters = defaultdict(list)
            for i, item in enumerate(data_to_sample):
                if isinstance(item, dict):
                    cluster_value = item.get(input_data.cluster_key)
                elif hasattr(item, input_data.cluster_key):
                    cluster_value = getattr(item, input_data.cluster_key)
                else:
                    raise TypeError(
                        f"Item {item} does not have the cluster key '{input_data.cluster_key}'"
                    )
                clusters[str(cluster_value)].append(i)
            # Randomly select clusters until we have enough samples
            selected_clusters = []
            while (
                sum(len(clusters[c]) for c in selected_clusters)
                < input_data.sample_size
            ):
                available_clusters = [c for c in clusters if c not in selected_clusters]
                if not available_clusters:
                    break
                selected_clusters.append(random.choice(available_clusters))
            for cluster in selected_clusters:
                indices.extend(clusters[cluster])
            # If we have more samples than needed, randomly remove some
            if len(indices) > input_data.sample_size:
                indices = random.sample(indices, input_data.sample_size)
        else:
            raise ValueError(f"Unknown sampling method: {input_data.sampling_method}")
        sampled_data = [data_to_sample[i] for i in indices]
        # Clear accumulated data after sampling if accumulation is enabled
        if input_data.accumulate:
            self.accumulated_data = []
        yield "sampled_data", sampled_data
        yield "sample_indices", indices
Author	SHA1	Message	Date
Zamil Majdy	ab3a62995f	Revert	2024-09-17 21:00:47 -05:00
Zamil Majdy	f2e9a8463d	Move shutil in	2024-09-17 20:55:20 -05:00
Zamil Majdy	107148749b	Move venv in	2024-09-17 20:51:56 -05:00
Zamil Majdy	17f1d33ed3	Revert	2024-09-17 20:46:01 -05:00
Zamil Majdy	b3a0fc538a	Revert	2024-09-17 20:39:29 -05:00
Zamil Majdy	e818bbf859	Merge branch 'master' into ntindle/samples	2024-09-17 18:30:23 -05:00
Nicholas Tindle	069ec89691	Update test_manager.py	2024-09-17 10:07:41 -05:00
Nicholas Tindle	84d490bcb1	Merge branch 'master' into ntindle/samples	2024-09-17 09:46:22 -05:00
Nicholas Tindle	9368956d5d	Update test_manager.py	2024-09-16 14:53:28 -05:00
Nicholas Tindle	2c3bde0c53	Merge branch 'master' into ntindle/samples	2024-09-16 14:40:12 -05:00
Nicholas Tindle	104b56628e	fix: merge oops	2024-09-16 14:39:23 -05:00
Nicholas Tindle	15ac526eee	Merge branch 'master' into ntindle/samples	2024-09-15 10:26:49 -05:00
Nicholas Tindle	5f83e354b9	Merge branch 'master' into ntindle/samples	2024-09-13 23:35:34 -05:00
Nicholas Tindle	70ebf4d58b	Update test_manager.py	2024-09-13 22:31:33 -05:00
Nicholas Tindle	6d0d264d99	feat(server): set timeout to 44 second	2024-09-13 18:09:16 -05:00
Nicholas Tindle	8e24b546a3	Merge branch 'master' into ntindle/samples	2024-09-12 23:57:23 -05:00
Nicholas Tindle	d4838cdc45	Update test_manager.py	2024-09-12 23:51:01 -05:00
Nicholas Tindle	acaca35498	Update test_manager.py	2024-09-12 23:43:58 -05:00
Nicholas Tindle	9ee0825f21	Update test_manager.py	2024-09-12 23:36:06 -05:00
Nicholas Tindle	5fde0f2c67	lets try 34 secondsd	2024-09-12 23:29:13 -05:00
Nicholas Tindle	e3407fdfb4	Update test_manager.py	2024-09-12 23:02:08 -05:00
Nicholas Tindle	b98e62cdef	Update test_manager.py	2024-09-12 22:54:07 -05:00
Nicholas Tindle	4d82f78f04	Update test_manager.py	2024-09-12 22:47:36 -05:00
Nicholas Tindle	c5d2586f6c	Update test_manager.py	2024-09-12 22:34:56 -05:00
Nicholas Tindle	589c8d94ec	feat: warning	2024-09-12 10:59:33 -05:00
Nicholas Tindle	136d258a46	Merge branch 'master' into ntindle/samples	2024-09-12 07:02:42 -05:00
Nicholas Tindle	92bcc39f4d	Merge branch 'master' into ntindle/samples	2024-09-08 07:48:24 -05:00
Nicholas Tindle	5909697215	Merge branch 'master' into ntindle/samples	2024-09-06 22:47:56 -05:00
Nicholas Tindle	bf34801a74	feat: longer timeout?	2024-09-06 22:05:16 -05:00
Nicholas Tindle	154eccb9af	fix: longer for tests?	2024-09-06 21:56:25 -05:00
Nicholas Tindle	14f8a92c20	Merge branch 'master' into ntindle/samples	2024-09-06 21:48:22 -05:00
Nicholas Tindle	2c07c64ccf	Update code.py	2024-09-05 18:16:34 -05:00
Nicholas Tindle	ef21d359a6	Update code.py	2024-09-05 17:44:37 -05:00
Nicholas Tindle	f4bd998fa2	Merge branch 'master' into ntindle/samples	2024-09-05 17:43:45 -05:00
Nicholas Tindle	4ebae90f62	Merge branch 'master' into ntindle/samples	2024-08-26 13:05:11 -05:00
Bentlybro	09d3768948	fix output to make pytest work	2024-08-17 21:37:32 +01:00
Nicholas Tindle	8c6adaeaa1	feat(server): linting and bug fix on llm	2024-08-16 19:22:12 -05:00
Nicholas Tindle	dabd2e1610	Merge branch 'master' into ntindle/samples	2024-08-16 17:19:36 -07:00
Nicholas Tindle	b228c4445e	feat(server): much better execution of unified	2024-08-15 10:29:03 -05:00
Nicholas Tindle	05c9931c11	feat(server): more complicated blocks	2024-08-14 21:42:04 -05:00
Nicholas Tindle	9198a86c0e	fix(server): no default was provided	2024-08-14 21:41:45 -05:00
Nicholas Tindle	c8fedf3dad	feat(server): even more advanced	2024-08-14 21:28:51 -05:00
Nicholas Tindle	0c7e1838cd	feat(server): more advanced coding blocks	2024-08-14 21:20:35 -05:00
Nicholas Tindle	979d80cd17	feat(server): broken code exec lol	2024-08-14 21:06:59 -05:00
Nicholas Tindle	4f7ffd13e4	feat(server): timeouts on code	2024-08-14 21:01:25 -05:00
Nicholas Tindle	b944e0f6da	feat(server): code args	2024-08-14 20:57:38 -05:00
Nicholas Tindle	51aaaf6ddc	fix(server): stratified sampling	2024-08-14 20:42:46 -05:00
Nicholas Tindle	3c662af1ba	feat(server): allow yielding all data at once rather than row by row	2024-08-14 20:40:17 -05:00
Nicholas Tindle	17370116f6	feat(server): improve various sampling techniques	2024-08-14 20:39:48 -05:00
Nicholas Tindle	d15049e9a7	Merge branch 'master' into ntindle/samples	2024-08-14 16:12:00 -05:00
Nicholas Tindle	da4afd4530	fix(server): anthropic retry didn't work	2024-08-14 13:48:12 -05:00
Nicholas Tindle	7617aa6d1f	Merge branch 'master' into ntindle/samples	2024-08-14 13:29:39 -05:00
Nicholas Tindle	b190e1f2aa	feat(server): sampling and code block	2024-08-14 00:13:15 -05:00