fix eval api_key leak in metadata; fix llm config in run infer (#2998)

This commit is contained in:
Xingyao Wang
2024-07-18 23:46:59 +08:00
committed by GitHub
parent 692fe21d60
commit cf910dfa9d
16 changed files with 34 additions and 15 deletions

View File

@@ -62,7 +62,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
eval_output_dir = metadata.eval_output_dir
if reset_logger:

View File

@@ -37,7 +37,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
inst_id = instance.instance_id
question = instance.description

View File

@@ -87,7 +87,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
instance = BiocoderData(**instance)
print(instance)
workspace_dir_name = (

View File

@@ -126,7 +126,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
workspace_mount_path = os.path.join(
config.workspace_mount_path, 'bird_eval_workspace'
)

View File

@@ -31,7 +31,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
env_id = instance.instance_id
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:

View File

@@ -48,7 +48,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
# create process-specific workspace dir
# we will create a workspace directory for EACH process
# so that different agent don't interfere with each other.

View File

@@ -120,7 +120,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
old_workspace_mount_path = config.workspace_mount_path
old_workspace_base = config.workspace_base
try:

View File

@@ -108,7 +108,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
old_workspace_mount_path = config.workspace_mount_path
old_workspace_base = config.workspace_base

View File

@@ -103,7 +103,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
old_workspace_mount_path = config.workspace_mount_path
old_workspace_base = config.workspace_base

View File

@@ -41,7 +41,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
env_id = instance.id
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:

View File

@@ -67,7 +67,7 @@ ID2CONDA = {
def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
old_workspace_mount_path = config.workspace_mount_path
old_workspace_base = config.workspace_base
try:

View File

@@ -172,7 +172,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
workspace_mount_path = os.path.join(config.workspace_mount_path, '_eval_workspace')
# create process-specific workspace dir

View File

@@ -35,7 +35,7 @@ AGENT_CLS_TO_INST_SUFFIX = {
def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
# create process-specific workspace dir
# we will create a workspace directory for EACH process
# so that different agent don't interfere with each other.

View File

@@ -29,6 +29,14 @@ class EvalMetadata(BaseModel):
data_split: str | None = None
details: dict[str, Any] | None = None
def model_dump_json(self, *args, **kwargs):
dumped = super().model_dump_json(*args, **kwargs)
dumped_dict = json.loads(dumped)
logger.debug(f'Dumped metadata: {dumped_dict}')
# avoid leaking sensitive information
dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
return json.dumps(dumped_dict)
def codeact_user_response(
state: State,

View File

@@ -42,7 +42,7 @@ def process_instance(
reset_logger: bool = True,
):
# Create the agent
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(llm_config=metadata.llm_config))
agent = Agent.get_cls(metadata.agent_class)(llm=LLM(config=metadata.llm_config))
env_id = instance.id
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger: