diff --git a/evaluation/swe_bench/README.md b/evaluation/swe_bench/README.md
index 958d6b7649..44efd1cb1f 100644
--- a/evaluation/swe_bench/README.md
+++ b/evaluation/swe_bench/README.md
@@ -51,6 +51,7 @@ sandbox_timeout = 120
 use_host_network = false
 run_as_devin = false
 enable_auto_lint = true
+max_budget_per_task = 4 # 4 USD
 
 # TODO: Change these to the model you want to evaluate
 [eval_gpt4_1106_preview]
diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
index d89835559d..fdd073e933 100644
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -209,7 +209,7 @@ def process_instance(
     if reset_logger:
         # Set up logger
         log_file = os.path.join(
-            eval_output_dir, 'logs', f'instance_{instance.instance_id}.log'
+            eval_output_dir, 'infer_logs', f'instance_{instance.instance_id}.log'
         )
         # Remove all existing handlers from logger
         for handler in logger.handlers[:]: