extract raw data to analysis/raw-data

mostly so listing the directory in Jupyter doesn't take forever
This commit is contained in:
Yusef Napora
2020-04-29 12:53:51 -04:00
parent b54cf6beb5
commit e66484fd03
2 changed files with 18 additions and 8 deletions

View File

@@ -33,8 +33,8 @@ def parse_args():
help='path to testground output zip or tgz file')
extract_cmd.add_argument('--output-dir', '-o', dest='output_dir', default=None,
help='path to write output files. default is to create a new dir based on zip filename')
extract_cmd.set_defaults(subcomment='extract')
help='path to write output files. default is to create an "analysis" dir next to archive file')
extract_cmd.set_defaults(subcommand='extract')
run_notebook_cmd = commands.add_parser('run_notebook',
help='runs latest analysis notebook against extracted test data')
@@ -236,14 +236,16 @@ def extract_test_outputs(test_output_zip_path, output_dir=None, convert_to_panda
if output_dir is None or output_dir == '':
output_dir = os.path.join(os.path.dirname(test_output_zip_path), 'analysis')
mkdirp(output_dir)
aggregate_output(test_output_zip_path, output_dir)
run_tracestat(output_dir)
raw_output_dir = os.path.join(output_dir, 'raw-data')
mkdirp(raw_output_dir)
aggregate_output(test_output_zip_path, raw_output_dir)
run_tracestat(raw_output_dir)
if convert_to_pandas:
import notebook_helper
print('converting data to pandas format...')
notebook_helper.to_pandas(output_dir, os.path.join(output_dir, 'pandas'))
pandas_dir = os.path.join(output_dir, 'pandas')
notebook_helper.to_pandas(raw_output_dir, pandas_dir)
if prep_notebook:
prepare_analysis_notebook(analysis_dir=output_dir)
return output_dir

View File

@@ -181,9 +181,17 @@ def write_pandas(tables, output_dir):
def load_pandas(analysis_dir):
analysis_dir = os.path.abspath(analysis_dir)
pandas_dir = os.path.join(analysis_dir, 'pandas')
raw_data_dir = os.path.join(analysis_dir, 'raw-data')
# if the raw-data dir doesn't exist, assume that we're running against an
# output directory that was extracted with an earlier version, which put
# the raw data in the "analysis" dir
if not os.path.exists(raw_data_dir):
raw_data_dir = analysis_dir
if not os.path.exists(pandas_dir):
print('Cached pandas data not found. Converting analysis data from {} to pandas'.format(analysis_dir))
to_pandas(analysis_dir, pandas_dir)
print('Cached pandas data not found. Converting analysis data from {} to pandas'.format(raw_data_dir))
to_pandas(raw_data_dir, pandas_dir)
tables = {}
for f in os.listdir(pandas_dir):