mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
refactor(analytics): move generate_views.py to backend, add poetry run analytics-setup/analytics-views scripts
This commit is contained in:
2
autogpt_platform/analytics/.gitignore
vendored
2
autogpt_platform/analytics/.gitignore
vendored
@@ -1,2 +0,0 @@
|
||||
views.sql
|
||||
setup.sql
|
||||
@@ -1,236 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AutoGPT Analytics — View Generator
|
||||
====================================
|
||||
Reads every .sql file in queries/ and registers it as a
|
||||
CREATE OR REPLACE VIEW in the analytics schema.
|
||||
|
||||
Quick start
|
||||
-----------
|
||||
Step 1 — one-time setup (creates schema, role, grants).
|
||||
Run the output in Supabase SQL Editor as the postgres superuser:
|
||||
|
||||
python generate_views.py --setup | psql <db-url>
|
||||
# or copy-paste the printed SQL into the Supabase SQL Editor
|
||||
|
||||
Step 2 — create / refresh all 14 analytics views.
|
||||
Credentials are auto-detected from backend/.env (DB_* vars);
|
||||
override with --db-url or DATABASE_URL if needed:
|
||||
|
||||
python generate_views.py
|
||||
|
||||
Step 3 (optional) — set a password for the read-only role so
|
||||
external tools (Supabase MCP, PostHog Data Warehouse) can connect:
|
||||
|
||||
ALTER ROLE analytics_readonly WITH PASSWORD 'your-password';
|
||||
|
||||
Usage
|
||||
-----
|
||||
# Print one-time setup SQL (schema, role, grants)
|
||||
python generate_views.py --setup
|
||||
|
||||
# Dry-run: print all view SQL without executing
|
||||
python generate_views.py --dry-run
|
||||
|
||||
# Apply to database (auto-reads backend/.env)
|
||||
python generate_views.py
|
||||
|
||||
# Apply to database (explicit connection string)
|
||||
python generate_views.py --db-url "postgresql://user:pass@host:5432/db"
|
||||
|
||||
# Apply only specific views (e.g. after editing one query)
|
||||
python generate_views.py --only graph_execution,retention_login_weekly
|
||||
|
||||
Environment variables
|
||||
---------------------
|
||||
DATABASE_URL Postgres connection string (checked before backend/.env)
|
||||
|
||||
Notes
|
||||
-----
|
||||
- backend/.env DB_* vars are read automatically as a fallback.
|
||||
- Safe to re-run: uses CREATE OR REPLACE VIEW.
|
||||
- Looker, PostHog Data Warehouse, and Supabase MCP all read from the
|
||||
same analytics.* views — no raw tables exposed.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
QUERIES_DIR = Path(__file__).parent / "queries"
|
||||
BACKEND_ENV = Path(__file__).parent.parent / "backend" / ".env"
|
||||
SCHEMA = "analytics"
|
||||
|
||||
SETUP_SQL = """\
|
||||
-- =============================================================
|
||||
-- AutoGPT Analytics Schema Setup
|
||||
-- Run ONCE in Supabase SQL Editor as the postgres superuser.
|
||||
-- After this, run generate_views.py to create/refresh the views.
|
||||
-- =============================================================
|
||||
|
||||
-- 1. Create the analytics schema
|
||||
CREATE SCHEMA IF NOT EXISTS analytics;
|
||||
|
||||
-- 2. Create the read-only role (skip if already exists)
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'analytics_readonly') THEN
|
||||
CREATE ROLE analytics_readonly WITH LOGIN PASSWORD 'CHANGE_ME';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
||||
-- 3. Analytics schema grants only.
|
||||
-- Views are created with security_invoker = false so they execute as their
|
||||
-- owner (postgres). analytics_readonly never needs direct access to the
|
||||
-- platform or auth schemas — it can only see analytics.* views.
|
||||
GRANT USAGE ON SCHEMA analytics TO analytics_readonly;
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA analytics TO analytics_readonly;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA analytics
|
||||
GRANT SELECT ON TABLES TO analytics_readonly;
|
||||
"""
|
||||
|
||||
|
||||
def load_db_url_from_backend_env() -> str | None:
|
||||
"""Read DB_* vars from backend/.env and build a psycopg2 connection string."""
|
||||
if not BACKEND_ENV.exists():
|
||||
return None
|
||||
env: dict[str, str] = {}
|
||||
for line in BACKEND_ENV.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, _, value = line.partition("=")
|
||||
key = key.strip()
|
||||
# strip optional surrounding quotes
|
||||
value = value.strip().strip('"').strip("'")
|
||||
env[key] = value
|
||||
host = env.get("DB_HOST", "localhost")
|
||||
port = env.get("DB_PORT", "5432")
|
||||
user = env.get("DB_USER", "postgres")
|
||||
password = env.get("DB_PASS", "")
|
||||
dbname = env.get("DB_NAME", "postgres")
|
||||
if not password:
|
||||
return None
|
||||
return f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
|
||||
|
||||
|
||||
def view_name_from_file(path: Path) -> str:
|
||||
return path.stem
|
||||
|
||||
|
||||
def build_view_sql(name: str, query_body: str) -> str:
|
||||
# Strip any trailing semicolons so we can wrap cleanly
|
||||
body = query_body.strip().rstrip(";")
|
||||
# security_invoker = false → view executes as its owner (postgres), not the
|
||||
# calling user. This lets analytics_readonly query views without needing
|
||||
# direct SELECT grants on the underlying platform / auth tables.
|
||||
return f"CREATE OR REPLACE VIEW {SCHEMA}.{name} WITH (security_invoker = false) AS\n{body};\n"
|
||||
|
||||
|
||||
def generate_all(only: list[str] | None = None) -> list[tuple[str, str]]:
|
||||
"""Return list of (view_name, sql) pairs, in alphabetical order."""
|
||||
files = sorted(QUERIES_DIR.glob("*.sql"))
|
||||
if not files:
|
||||
print(f"No .sql files found in {QUERIES_DIR}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
result = []
|
||||
for f in files:
|
||||
name = view_name_from_file(f)
|
||||
if only and name not in only:
|
||||
continue
|
||||
body = f.read_text()
|
||||
result.append((name, build_view_sql(name, body)))
|
||||
return result
|
||||
|
||||
|
||||
def apply_to_db(views: list[tuple[str, str]], db_url: str) -> None:
|
||||
try:
|
||||
import psycopg2
|
||||
except ImportError:
|
||||
print(
|
||||
"psycopg2 not installed. Run: pip install psycopg2-binary",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
conn = psycopg2.connect(db_url)
|
||||
conn.autocommit = False
|
||||
cur = conn.cursor()
|
||||
|
||||
try:
|
||||
for name, sql in views:
|
||||
print(f" Creating view: {SCHEMA}.{name} ...", end=" ")
|
||||
cur.execute(sql)
|
||||
print("OK")
|
||||
# Also refresh grants so the readonly role can see new views
|
||||
cur.execute(
|
||||
f"GRANT SELECT ON ALL TABLES IN SCHEMA {SCHEMA} TO analytics_readonly;"
|
||||
)
|
||||
conn.commit()
|
||||
print(f"\n✓ {len(views)} view(s) created/updated successfully.")
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f"\n✗ Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
finally:
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
"--setup", action="store_true", help="Print one-time schema/role/grant SQL"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true", help="Print SQL without executing"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db-url", help="Postgres connection string (overrides DATABASE_URL)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--only", help="Comma-separated list of view names to update (default: all)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.setup:
|
||||
print(SETUP_SQL)
|
||||
return
|
||||
|
||||
only = [v.strip() for v in args.only.split(",")] if args.only else None
|
||||
views = generate_all(only=only)
|
||||
|
||||
if not views:
|
||||
print("No matching views found.")
|
||||
sys.exit(0)
|
||||
|
||||
if args.dry_run:
|
||||
print(f"-- Generated by generate_views.py ({len(views)} views)\n")
|
||||
for name, sql in views:
|
||||
print(f"-- ── {name} ──────────────────────────────")
|
||||
print(sql)
|
||||
return
|
||||
|
||||
db_url = (
|
||||
args.db_url or os.environ.get("DATABASE_URL") or load_db_url_from_backend_env()
|
||||
)
|
||||
if not db_url:
|
||||
print(
|
||||
"No database URL found.\n"
|
||||
"Tried: --db-url, DATABASE_URL env var, and backend/.env (DB_* vars).\n"
|
||||
"Use --dry-run to just print the SQL.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Applying {len(views)} view(s) to database...")
|
||||
apply_to_db(views, db_url)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
230
autogpt_platform/backend/generate_views.py
Executable file
230
autogpt_platform/backend/generate_views.py
Executable file
@@ -0,0 +1,230 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AutoGPT Analytics — View Generator
|
||||
====================================
|
||||
Reads every .sql file in analytics/queries/ and registers it as a
|
||||
CREATE OR REPLACE VIEW in the analytics schema.
|
||||
|
||||
Quick start (from autogpt_platform/backend/):
|
||||
|
||||
Step 1 — one-time setup (creates schema, role, grants):
|
||||
|
||||
poetry run analytics-setup
|
||||
|
||||
Step 2 — create / refresh all 14 analytics views:
|
||||
|
||||
poetry run analytics-views
|
||||
|
||||
Both commands auto-detect credentials from .env (DB_* vars).
|
||||
Use --db-url to override.
|
||||
|
||||
Step 3 (optional) — set a password for the read-only role so
|
||||
external tools (Supabase MCP, PostHog Data Warehouse) can connect.
|
||||
Run in Supabase SQL Editor:
|
||||
|
||||
ALTER ROLE analytics_readonly WITH PASSWORD 'your-password';
|
||||
|
||||
Usage
|
||||
-----
|
||||
poetry run analytics-setup # apply setup to DB
|
||||
poetry run analytics-setup --dry-run # print setup SQL only
|
||||
poetry run analytics-views # apply all views to DB
|
||||
poetry run analytics-views --dry-run # print all view SQL only
|
||||
poetry run analytics-views --only graph_execution,retention_login_weekly
|
||||
|
||||
Environment variables
|
||||
---------------------
|
||||
DATABASE_URL Postgres connection string (checked before .env)
|
||||
|
||||
Notes
|
||||
-----
|
||||
- .env DB_* vars are read automatically as a fallback.
|
||||
- Safe to re-run: uses CREATE OR REPLACE VIEW.
|
||||
- Looker, PostHog Data Warehouse, and Supabase MCP all read from the
|
||||
same analytics.* views — no raw tables exposed.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
QUERIES_DIR = Path(__file__).parent.parent / "analytics" / "queries"
|
||||
ENV_FILE = Path(__file__).parent / ".env"
|
||||
SCHEMA = "analytics"
|
||||
|
||||
SETUP_SQL = """\
|
||||
-- =============================================================
|
||||
-- AutoGPT Analytics Schema Setup
|
||||
-- Run ONCE as the postgres superuser (e.g. via Supabase SQL Editor).
|
||||
-- After this, run: poetry run analytics-views
|
||||
-- =============================================================
|
||||
|
||||
-- 1. Create the analytics schema
|
||||
CREATE SCHEMA IF NOT EXISTS analytics;
|
||||
|
||||
-- 2. Create the read-only role (skip if already exists)
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'analytics_readonly') THEN
|
||||
CREATE ROLE analytics_readonly WITH LOGIN PASSWORD 'CHANGE_ME';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
||||
-- 3. Analytics schema grants only.
|
||||
-- Views use security_invoker = false so they execute as their
|
||||
-- owner (postgres). analytics_readonly never needs direct access
|
||||
-- to the platform or auth schemas.
|
||||
GRANT USAGE ON SCHEMA analytics TO analytics_readonly;
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA analytics TO analytics_readonly;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA analytics
|
||||
GRANT SELECT ON TABLES TO analytics_readonly;
|
||||
"""
|
||||
|
||||
|
||||
def load_db_url_from_env() -> str | None:
|
||||
"""Read DB_* vars from .env and build a psycopg2 connection string."""
|
||||
if not ENV_FILE.exists():
|
||||
return None
|
||||
env: dict[str, str] = {}
|
||||
for line in ENV_FILE.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, _, value = line.partition("=")
|
||||
env[key.strip()] = value.strip().strip('"').strip("'")
|
||||
host = env.get("DB_HOST", "localhost")
|
||||
port = env.get("DB_PORT", "5432")
|
||||
user = env.get("DB_USER", "postgres")
|
||||
password = env.get("DB_PASS", "")
|
||||
dbname = env.get("DB_NAME", "postgres")
|
||||
if not password:
|
||||
return None
|
||||
return f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
|
||||
|
||||
|
||||
def get_db_url(args: argparse.Namespace) -> str | None:
|
||||
return args.db_url or os.environ.get("DATABASE_URL") or load_db_url_from_env()
|
||||
|
||||
|
||||
def connect(db_url: str):
|
||||
try:
|
||||
import psycopg2
|
||||
except ImportError:
|
||||
print("psycopg2 not found. Run: poetry install", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
return psycopg2.connect(db_url)
|
||||
|
||||
|
||||
def run_sql(db_url: str, statements: list[tuple[str, str]]) -> None:
|
||||
"""Execute a list of (label, sql) pairs in a single transaction."""
|
||||
conn = connect(db_url)
|
||||
conn.autocommit = False
|
||||
cur = conn.cursor()
|
||||
try:
|
||||
for label, sql in statements:
|
||||
print(f" {label} ...", end=" ")
|
||||
cur.execute(sql)
|
||||
print("OK")
|
||||
conn.commit()
|
||||
print(f"\n✓ {len(statements)} statement(s) applied.")
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f"\n✗ Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
finally:
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
def build_view_sql(name: str, query_body: str) -> str:
|
||||
body = query_body.strip().rstrip(";")
|
||||
# security_invoker = false → view runs as its owner (postgres), not the
|
||||
# caller, so analytics_readonly only needs analytics schema access.
|
||||
return f"CREATE OR REPLACE VIEW {SCHEMA}.{name} WITH (security_invoker = false) AS\n{body};\n"
|
||||
|
||||
|
||||
def load_views(only: list[str] | None = None) -> list[tuple[str, str]]:
|
||||
"""Return [(label, sql)] for all views, in alphabetical order."""
|
||||
files = sorted(QUERIES_DIR.glob("*.sql"))
|
||||
if not files:
|
||||
print(f"No .sql files found in {QUERIES_DIR}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
result = []
|
||||
for f in files:
|
||||
name = f.stem
|
||||
if only and name not in only:
|
||||
continue
|
||||
result.append((f"view analytics.{name}", build_view_sql(name, f.read_text())))
|
||||
return result
|
||||
|
||||
|
||||
def no_db_url_error() -> None:
|
||||
print(
|
||||
"No database URL found.\n"
|
||||
"Tried: --db-url, DATABASE_URL env var, and .env (DB_* vars).\n"
|
||||
"Use --dry-run to just print the SQL.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def cmd_setup(args: argparse.Namespace) -> None:
|
||||
if args.dry_run:
|
||||
print(SETUP_SQL)
|
||||
return
|
||||
db_url = get_db_url(args)
|
||||
if not db_url:
|
||||
no_db_url_error()
|
||||
assert db_url
|
||||
print("Applying analytics setup...")
|
||||
run_sql(db_url, [("schema / role / grants", SETUP_SQL)])
|
||||
|
||||
|
||||
def cmd_views(args: argparse.Namespace) -> None:
|
||||
only = [v.strip() for v in args.only.split(",")] if args.only else None
|
||||
views = load_views(only=only)
|
||||
if not views:
|
||||
print("No matching views found.")
|
||||
sys.exit(0)
|
||||
|
||||
if args.dry_run:
|
||||
print(f"-- {len(views)} views\n")
|
||||
for label, sql in views:
|
||||
print(f"-- {label}")
|
||||
print(sql)
|
||||
return
|
||||
|
||||
db_url = get_db_url(args)
|
||||
if not db_url:
|
||||
no_db_url_error()
|
||||
assert db_url
|
||||
print(f"Applying {len(views)} view(s)...")
|
||||
# Append grant refresh so the readonly role sees any new views
|
||||
grant = f"GRANT SELECT ON ALL TABLES IN SCHEMA {SCHEMA} TO analytics_readonly;"
|
||||
run_sql(db_url, views + [("grant analytics_readonly", grant)])
|
||||
|
||||
|
||||
def main_setup() -> None:
|
||||
parser = argparse.ArgumentParser(description="Apply analytics schema setup to DB")
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true", help="Print SQL, don't execute"
|
||||
)
|
||||
parser.add_argument("--db-url", help="Postgres connection string")
|
||||
cmd_setup(parser.parse_args())
|
||||
|
||||
|
||||
def main_views() -> None:
|
||||
parser = argparse.ArgumentParser(description="Apply analytics views to DB")
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true", help="Print SQL, don't execute"
|
||||
)
|
||||
parser.add_argument("--db-url", help="Postgres connection string")
|
||||
parser.add_argument("--only", help="Comma-separated view names to update")
|
||||
cmd_views(parser.parse_args())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Default: apply views (backwards-compatible with direct python invocation)
|
||||
main_views()
|
||||
@@ -120,6 +120,8 @@ ws = "backend.ws:main"
|
||||
scheduler = "backend.scheduler:main"
|
||||
notification = "backend.notification:main"
|
||||
executor = "backend.exec:main"
|
||||
analytics-setup = "generate_views:main_setup"
|
||||
analytics-views = "generate_views:main_views"
|
||||
copilot-executor = "backend.copilot.executor.__main__:main"
|
||||
cli = "backend.cli:main"
|
||||
format = "linter:format"
|
||||
|
||||
Reference in New Issue
Block a user