extract raw data to analysis/raw-data

mostly so listing the directory in Jupyter doesn't take forever
2026-01-09 14:07:57 -05:00 · 2020-04-29 12:53:51 -04:00
parent b54cf6beb5
commit e66484fd03
2 changed files with 18 additions and 8 deletions
--- a/scripts/analyze.py
+++ b/scripts/analyze.py
@@ -33,8 +33,8 @@ def parse_args():
                        help='path to testground output zip or tgz file')

    extract_cmd.add_argument('--output-dir', '-o', dest='output_dir', default=None,
-                             help='path to write output files. default is to create a new dir based on zip filename')
-    extract_cmd.set_defaults(subcomment='extract')
+                             help='path to write output files. default is to create an "analysis" dir next to archive file')
+    extract_cmd.set_defaults(subcommand='extract')

    run_notebook_cmd = commands.add_parser('run_notebook',
                                           help='runs latest analysis notebook against extracted test data')
@@ -236,14 +236,16 @@ def extract_test_outputs(test_output_zip_path, output_dir=None, convert_to_panda
    if output_dir is None or output_dir == '':
        output_dir = os.path.join(os.path.dirname(test_output_zip_path), 'analysis')

-    mkdirp(output_dir)
-    aggregate_output(test_output_zip_path, output_dir)
-    run_tracestat(output_dir)
+    raw_output_dir = os.path.join(output_dir, 'raw-data')
+    mkdirp(raw_output_dir)
+    aggregate_output(test_output_zip_path, raw_output_dir)
+    run_tracestat(raw_output_dir)

    if convert_to_pandas:
        import notebook_helper
        print('converting data to pandas format...')
-        notebook_helper.to_pandas(output_dir, os.path.join(output_dir, 'pandas'))
+        pandas_dir = os.path.join(output_dir, 'pandas')
+        notebook_helper.to_pandas(raw_output_dir, pandas_dir)
    if prep_notebook:
        prepare_analysis_notebook(analysis_dir=output_dir)
    return output_dir
--- a/scripts/notebook_helper.py
+++ b/scripts/notebook_helper.py
@@ -181,9 +181,17 @@ def write_pandas(tables, output_dir):
 def load_pandas(analysis_dir):
    analysis_dir = os.path.abspath(analysis_dir)
    pandas_dir = os.path.join(analysis_dir, 'pandas')
+    raw_data_dir = os.path.join(analysis_dir, 'raw-data')
+
+    # if the raw-data dir doesn't exist, assume that we're running against an
+    # output directory that was extracted with an earlier version, which put
+    # the raw data in the "analysis" dir
+    if not os.path.exists(raw_data_dir):
+        raw_data_dir = analysis_dir
+
    if not os.path.exists(pandas_dir):
-        print('Cached pandas data not found. Converting analysis data from {} to pandas'.format(analysis_dir))
-        to_pandas(analysis_dir, pandas_dir)
+        print('Cached pandas data not found. Converting analysis data from {} to pandas'.format(raw_data_dir))
+        to_pandas(raw_data_dir, pandas_dir)

    tables = {}
    for f in os.listdir(pandas_dir):