refactor(analytics): move generate_views.py to backend, add poetry run analytics-setup/analytics-views scripts

This commit is contained in:
Zamil Majdy
2026-03-11 16:23:29 +07:00
parent 856f0d980d
commit 8aad333a45
4 changed files with 232 additions and 238 deletions

View File

@@ -1,2 +0,0 @@
views.sql
setup.sql

View File

@@ -1,236 +0,0 @@
#!/usr/bin/env python3
"""
AutoGPT Analytics — View Generator
====================================
Reads every .sql file in queries/ and registers it as a
CREATE OR REPLACE VIEW in the analytics schema.
Quick start
-----------
Step 1 — one-time setup (creates schema, role, grants).
Run the output in Supabase SQL Editor as the postgres superuser:
python generate_views.py --setup | psql <db-url>
# or copy-paste the printed SQL into the Supabase SQL Editor
Step 2 — create / refresh all 14 analytics views.
Credentials are auto-detected from backend/.env (DB_* vars);
override with --db-url or DATABASE_URL if needed:
python generate_views.py
Step 3 (optional) — set a password for the read-only role so
external tools (Supabase MCP, PostHog Data Warehouse) can connect:
ALTER ROLE analytics_readonly WITH PASSWORD 'your-password';
Usage
-----
# Print one-time setup SQL (schema, role, grants)
python generate_views.py --setup
# Dry-run: print all view SQL without executing
python generate_views.py --dry-run
# Apply to database (auto-reads backend/.env)
python generate_views.py
# Apply to database (explicit connection string)
python generate_views.py --db-url "postgresql://user:pass@host:5432/db"
# Apply only specific views (e.g. after editing one query)
python generate_views.py --only graph_execution,retention_login_weekly
Environment variables
---------------------
DATABASE_URL Postgres connection string (checked before backend/.env)
Notes
-----
- backend/.env DB_* vars are read automatically as a fallback.
- Safe to re-run: uses CREATE OR REPLACE VIEW.
- Looker, PostHog Data Warehouse, and Supabase MCP all read from the
same analytics.* views — no raw tables exposed.
"""
import argparse
import os
import sys
from pathlib import Path
QUERIES_DIR = Path(__file__).parent / "queries"
BACKEND_ENV = Path(__file__).parent.parent / "backend" / ".env"
SCHEMA = "analytics"
SETUP_SQL = """\
-- =============================================================
-- AutoGPT Analytics Schema Setup
-- Run ONCE in Supabase SQL Editor as the postgres superuser.
-- After this, run generate_views.py to create/refresh the views.
-- =============================================================
-- 1. Create the analytics schema
CREATE SCHEMA IF NOT EXISTS analytics;
-- 2. Create the read-only role (skip if already exists)
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'analytics_readonly') THEN
CREATE ROLE analytics_readonly WITH LOGIN PASSWORD 'CHANGE_ME';
END IF;
END
$$;
-- 3. Analytics schema grants only.
-- Views are created with security_invoker = false so they execute as their
-- owner (postgres). analytics_readonly never needs direct access to the
-- platform or auth schemas — it can only see analytics.* views.
GRANT USAGE ON SCHEMA analytics TO analytics_readonly;
GRANT SELECT ON ALL TABLES IN SCHEMA analytics TO analytics_readonly;
ALTER DEFAULT PRIVILEGES IN SCHEMA analytics
GRANT SELECT ON TABLES TO analytics_readonly;
"""
def load_db_url_from_backend_env() -> str | None:
"""Read DB_* vars from backend/.env and build a psycopg2 connection string."""
if not BACKEND_ENV.exists():
return None
env: dict[str, str] = {}
for line in BACKEND_ENV.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, _, value = line.partition("=")
key = key.strip()
# strip optional surrounding quotes
value = value.strip().strip('"').strip("'")
env[key] = value
host = env.get("DB_HOST", "localhost")
port = env.get("DB_PORT", "5432")
user = env.get("DB_USER", "postgres")
password = env.get("DB_PASS", "")
dbname = env.get("DB_NAME", "postgres")
if not password:
return None
return f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
def view_name_from_file(path: Path) -> str:
return path.stem
def build_view_sql(name: str, query_body: str) -> str:
# Strip any trailing semicolons so we can wrap cleanly
body = query_body.strip().rstrip(";")
# security_invoker = false → view executes as its owner (postgres), not the
# calling user. This lets analytics_readonly query views without needing
# direct SELECT grants on the underlying platform / auth tables.
return f"CREATE OR REPLACE VIEW {SCHEMA}.{name} WITH (security_invoker = false) AS\n{body};\n"
def generate_all(only: list[str] | None = None) -> list[tuple[str, str]]:
"""Return list of (view_name, sql) pairs, in alphabetical order."""
files = sorted(QUERIES_DIR.glob("*.sql"))
if not files:
print(f"No .sql files found in {QUERIES_DIR}", file=sys.stderr)
sys.exit(1)
result = []
for f in files:
name = view_name_from_file(f)
if only and name not in only:
continue
body = f.read_text()
result.append((name, build_view_sql(name, body)))
return result
def apply_to_db(views: list[tuple[str, str]], db_url: str) -> None:
try:
import psycopg2
except ImportError:
print(
"psycopg2 not installed. Run: pip install psycopg2-binary",
file=sys.stderr,
)
sys.exit(1)
conn = psycopg2.connect(db_url)
conn.autocommit = False
cur = conn.cursor()
try:
for name, sql in views:
print(f" Creating view: {SCHEMA}.{name} ...", end=" ")
cur.execute(sql)
print("OK")
# Also refresh grants so the readonly role can see new views
cur.execute(
f"GRANT SELECT ON ALL TABLES IN SCHEMA {SCHEMA} TO analytics_readonly;"
)
conn.commit()
print(f"\n{len(views)} view(s) created/updated successfully.")
except Exception as e:
conn.rollback()
print(f"\n✗ Error: {e}", file=sys.stderr)
sys.exit(1)
finally:
cur.close()
conn.close()
def main() -> None:
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"--setup", action="store_true", help="Print one-time schema/role/grant SQL"
)
parser.add_argument(
"--dry-run", action="store_true", help="Print SQL without executing"
)
parser.add_argument(
"--db-url", help="Postgres connection string (overrides DATABASE_URL)"
)
parser.add_argument(
"--only", help="Comma-separated list of view names to update (default: all)"
)
args = parser.parse_args()
if args.setup:
print(SETUP_SQL)
return
only = [v.strip() for v in args.only.split(",")] if args.only else None
views = generate_all(only=only)
if not views:
print("No matching views found.")
sys.exit(0)
if args.dry_run:
print(f"-- Generated by generate_views.py ({len(views)} views)\n")
for name, sql in views:
print(f"-- ── {name} ──────────────────────────────")
print(sql)
return
db_url = (
args.db_url or os.environ.get("DATABASE_URL") or load_db_url_from_backend_env()
)
if not db_url:
print(
"No database URL found.\n"
"Tried: --db-url, DATABASE_URL env var, and backend/.env (DB_* vars).\n"
"Use --dry-run to just print the SQL.",
file=sys.stderr,
)
sys.exit(1)
print(f"Applying {len(views)} view(s) to database...")
apply_to_db(views, db_url)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,230 @@
#!/usr/bin/env python3
"""
AutoGPT Analytics — View Generator
====================================
Reads every .sql file in analytics/queries/ and registers it as a
CREATE OR REPLACE VIEW in the analytics schema.
Quick start (from autogpt_platform/backend/):
Step 1 — one-time setup (creates schema, role, grants):
poetry run analytics-setup
Step 2 — create / refresh all 14 analytics views:
poetry run analytics-views
Both commands auto-detect credentials from .env (DB_* vars).
Use --db-url to override.
Step 3 (optional) — set a password for the read-only role so
external tools (Supabase MCP, PostHog Data Warehouse) can connect.
Run in Supabase SQL Editor:
ALTER ROLE analytics_readonly WITH PASSWORD 'your-password';
Usage
-----
poetry run analytics-setup # apply setup to DB
poetry run analytics-setup --dry-run # print setup SQL only
poetry run analytics-views # apply all views to DB
poetry run analytics-views --dry-run # print all view SQL only
poetry run analytics-views --only graph_execution,retention_login_weekly
Environment variables
---------------------
DATABASE_URL Postgres connection string (checked before .env)
Notes
-----
- .env DB_* vars are read automatically as a fallback.
- Safe to re-run: uses CREATE OR REPLACE VIEW.
- Looker, PostHog Data Warehouse, and Supabase MCP all read from the
same analytics.* views — no raw tables exposed.
"""
import argparse
import os
import sys
from pathlib import Path
QUERIES_DIR = Path(__file__).parent.parent / "analytics" / "queries"
ENV_FILE = Path(__file__).parent / ".env"
SCHEMA = "analytics"
SETUP_SQL = """\
-- =============================================================
-- AutoGPT Analytics Schema Setup
-- Run ONCE as the postgres superuser (e.g. via Supabase SQL Editor).
-- After this, run: poetry run analytics-views
-- =============================================================
-- 1. Create the analytics schema
CREATE SCHEMA IF NOT EXISTS analytics;
-- 2. Create the read-only role (skip if already exists)
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'analytics_readonly') THEN
CREATE ROLE analytics_readonly WITH LOGIN PASSWORD 'CHANGE_ME';
END IF;
END
$$;
-- 3. Analytics schema grants only.
-- Views use security_invoker = false so they execute as their
-- owner (postgres). analytics_readonly never needs direct access
-- to the platform or auth schemas.
GRANT USAGE ON SCHEMA analytics TO analytics_readonly;
GRANT SELECT ON ALL TABLES IN SCHEMA analytics TO analytics_readonly;
ALTER DEFAULT PRIVILEGES IN SCHEMA analytics
GRANT SELECT ON TABLES TO analytics_readonly;
"""
def load_db_url_from_env() -> str | None:
"""Read DB_* vars from .env and build a psycopg2 connection string."""
if not ENV_FILE.exists():
return None
env: dict[str, str] = {}
for line in ENV_FILE.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, _, value = line.partition("=")
env[key.strip()] = value.strip().strip('"').strip("'")
host = env.get("DB_HOST", "localhost")
port = env.get("DB_PORT", "5432")
user = env.get("DB_USER", "postgres")
password = env.get("DB_PASS", "")
dbname = env.get("DB_NAME", "postgres")
if not password:
return None
return f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
def get_db_url(args: argparse.Namespace) -> str | None:
return args.db_url or os.environ.get("DATABASE_URL") or load_db_url_from_env()
def connect(db_url: str):
try:
import psycopg2
except ImportError:
print("psycopg2 not found. Run: poetry install", file=sys.stderr)
sys.exit(1)
return psycopg2.connect(db_url)
def run_sql(db_url: str, statements: list[tuple[str, str]]) -> None:
"""Execute a list of (label, sql) pairs in a single transaction."""
conn = connect(db_url)
conn.autocommit = False
cur = conn.cursor()
try:
for label, sql in statements:
print(f" {label} ...", end=" ")
cur.execute(sql)
print("OK")
conn.commit()
print(f"\n{len(statements)} statement(s) applied.")
except Exception as e:
conn.rollback()
print(f"\n✗ Error: {e}", file=sys.stderr)
sys.exit(1)
finally:
cur.close()
conn.close()
def build_view_sql(name: str, query_body: str) -> str:
body = query_body.strip().rstrip(";")
# security_invoker = false → view runs as its owner (postgres), not the
# caller, so analytics_readonly only needs analytics schema access.
return f"CREATE OR REPLACE VIEW {SCHEMA}.{name} WITH (security_invoker = false) AS\n{body};\n"
def load_views(only: list[str] | None = None) -> list[tuple[str, str]]:
"""Return [(label, sql)] for all views, in alphabetical order."""
files = sorted(QUERIES_DIR.glob("*.sql"))
if not files:
print(f"No .sql files found in {QUERIES_DIR}", file=sys.stderr)
sys.exit(1)
result = []
for f in files:
name = f.stem
if only and name not in only:
continue
result.append((f"view analytics.{name}", build_view_sql(name, f.read_text())))
return result
def no_db_url_error() -> None:
print(
"No database URL found.\n"
"Tried: --db-url, DATABASE_URL env var, and .env (DB_* vars).\n"
"Use --dry-run to just print the SQL.",
file=sys.stderr,
)
sys.exit(1)
def cmd_setup(args: argparse.Namespace) -> None:
if args.dry_run:
print(SETUP_SQL)
return
db_url = get_db_url(args)
if not db_url:
no_db_url_error()
assert db_url
print("Applying analytics setup...")
run_sql(db_url, [("schema / role / grants", SETUP_SQL)])
def cmd_views(args: argparse.Namespace) -> None:
only = [v.strip() for v in args.only.split(",")] if args.only else None
views = load_views(only=only)
if not views:
print("No matching views found.")
sys.exit(0)
if args.dry_run:
print(f"-- {len(views)} views\n")
for label, sql in views:
print(f"-- {label}")
print(sql)
return
db_url = get_db_url(args)
if not db_url:
no_db_url_error()
assert db_url
print(f"Applying {len(views)} view(s)...")
# Append grant refresh so the readonly role sees any new views
grant = f"GRANT SELECT ON ALL TABLES IN SCHEMA {SCHEMA} TO analytics_readonly;"
run_sql(db_url, views + [("grant analytics_readonly", grant)])
def main_setup() -> None:
parser = argparse.ArgumentParser(description="Apply analytics schema setup to DB")
parser.add_argument(
"--dry-run", action="store_true", help="Print SQL, don't execute"
)
parser.add_argument("--db-url", help="Postgres connection string")
cmd_setup(parser.parse_args())
def main_views() -> None:
parser = argparse.ArgumentParser(description="Apply analytics views to DB")
parser.add_argument(
"--dry-run", action="store_true", help="Print SQL, don't execute"
)
parser.add_argument("--db-url", help="Postgres connection string")
parser.add_argument("--only", help="Comma-separated view names to update")
cmd_views(parser.parse_args())
if __name__ == "__main__":
# Default: apply views (backwards-compatible with direct python invocation)
main_views()

View File

@@ -120,6 +120,8 @@ ws = "backend.ws:main"
scheduler = "backend.scheduler:main"
notification = "backend.notification:main"
executor = "backend.exec:main"
analytics-setup = "generate_views:main_setup"
analytics-views = "generate_views:main_views"
copilot-executor = "backend.copilot.executor.__main__:main"
cli = "backend.cli:main"
format = "linter:format"