diff --git a/.github/workflows/szdiff.yml b/.github/workflows/szdiff.yml new file mode 100644 index 0000000000..64bdb83170 --- /dev/null +++ b/.github/workflows/szdiff.yml @@ -0,0 +1,51 @@ +name: Check Line Counts +on: + pull_request: + +# Cancel the workflow in progress in newer build is about to start. +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + szdiff: + name: Core Library Line Difference + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - name: Checkout code from pr + uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + path: pr + - name: Checkout code from base + uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Count Lines Of Code + run: | + pip install tabulate + BASE="$GITHUB_WORKSPACE/base" + PR="$GITHUB_WORKSPACE/pr" + cp "$PR/sz.py" . + echo "loc_content<> "$GITHUB_ENV" + python sz.py "$BASE" "$PR" >> "$GITHUB_ENV" + echo "EOF" >> "$GITHUB_ENV" + - name: Comment Code Lines + continue-on-error: false + uses: marocchino/sticky-pull-request-comment@v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + header: LOC + ignore_empty: true + skip_unchanged: true + recreate: true + message: ${{ env.loc_content }} diff --git a/sz.py b/sz.py index 3514e9d32e..c70280c806 100755 --- a/sz.py +++ b/sz.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -import os -from pathlib import Path +import os, sys import token import tokenize import itertools @@ -8,21 +7,62 @@ from tabulate import tabulate TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING] -if __name__ == "__main__": - headers = ["Name", "Lines", "Tokens/Line"] +def gen_stats(base_path="."): table = [] - for path, subdirs, files in os.walk("tinygrad"): + for path, _, files in os.walk(os.path.join(base_path, "tinygrad")): for name in files: if not name.endswith(".py"): continue - filepath = Path(path) / name + filepath = os.path.join(path, name) + relfilepath = os.path.relpath(filepath, base_path) with tokenize.open(filepath) as file_: tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST] token_count, line_count = len(tokens), len(set([t.start[0] for t in tokens])) - table.append([filepath.as_posix(), line_count, token_count/line_count]) + table.append([relfilepath, line_count, token_count/line_count]) + return table - print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n") +def gen_diff(table_old, table_new): + table = [] + files_new = set([x[0] for x in table_new]) + files_old = set([x[0] for x in table_old]) + added, deleted, unchanged = files_new - files_old, files_old - files_new, files_new & files_old + if added: + for file in added: + file_stat = [stats for stats in table_new if file in stats] + table.append([file_stat[0][0], file_stat[0][1], file_stat[0][1]-0, file_stat[0][2], file_stat[0][2]-0]) + if deleted: + for file in deleted: + file_stat = [stats for stats in table_old if file in stats] + table.append([file_stat[0][0], 0, 0 - file_stat[0][1], 0, 0-file_stat[0][2]]) + if unchanged: + for file in unchanged: + file_stat_old = [stats for stats in table_old if file in stats] + file_stat_new = [stats for stats in table_new if file in stats] + if file_stat_new[0][1]-file_stat_old[0][1] != 0 or file_stat_new[0][2]-file_stat_old[0][2] != 0: + table.append([file_stat_new[0][0], file_stat_new[0][1], file_stat_new[0][1]-file_stat_old[0][1], file_stat_new[0][2], file_stat_new[0][2]-file_stat_old[0][2]]) + return table - for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1]) for x in table]), key=lambda x:x[0]): - print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}") +def display_diff(diff): return "+"+str(diff) if diff > 0 else str(diff) - print(f"\ntotal line count: {sum([x[1] for x in table])}") +if __name__ == "__main__": + if len(sys.argv) == 3: + headers = ["Name", "Lines", "Diff", "Tokens/Line", "Diff"] + table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2])) + elif len(sys.argv) == 2: + headers = ["Name", "Lines", "Tokens/Line"] + table = gen_stats(sys.argv[1]) + else: + headers = ["Name", "Lines", "Tokens/Line"] + table = gen_stats(".") + + if table: + if len(sys.argv) == 3: + print("### Changes") + print("```") + print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", intfmt=(..., "d", "+d"), floatfmt=(..., ..., ..., ".1f", "+.1f"))+"\n") + print(f"\ntotal lines changes: {display_diff(sum([x[2] for x in table]))}") + print("```") + else: + print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n") + for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1], x[2]) for x in table]), key=lambda x:x[0]): + print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}") + print(f"\ntotal line count: {sum([x[1] for x in table])}")