Files
AutoGPT/autogpt_platform/backend/test/test_data_creator.py
2025-10-17 11:12:00 +02:00

579 lines
21 KiB
Python

"""
Test Data Creator for AutoGPT Platform
This script creates test data for the AutoGPT platform database.
Image/Video URL Domains Used:
- Images: picsum.photos (for all image URLs - avatars, store listing images, etc.)
- Videos: youtube.com (for store listing video URLs)
Add these domains to your Next.js config:
```javascript
// next.config.js
images: {
domains: ['picsum.photos'],
}
```
"""
import asyncio
import random
from datetime import datetime
import prisma.enums
import pytest
from autogpt_libs.api_key.keysmith import APIKeySmith
from faker import Faker
from prisma import Json, Prisma
from prisma.types import (
AgentBlockCreateInput,
AgentGraphCreateInput,
AgentNodeCreateInput,
AgentNodeLinkCreateInput,
AnalyticsDetailsCreateInput,
AnalyticsMetricsCreateInput,
CreditTransactionCreateInput,
IntegrationWebhookCreateInput,
ProfileCreateInput,
StoreListingReviewCreateInput,
UserCreateInput,
)
faker = Faker()
# Constants for data generation limits
# Base entities
NUM_USERS = 100 # Creates 100 user records
NUM_AGENT_BLOCKS = 100 # Creates 100 agent block templates
# Per-user entities
MIN_GRAPHS_PER_USER = 1 # Each user will have between 1-5 graphs
MAX_GRAPHS_PER_USER = 5 # Total graphs: 500-2500 (NUM_USERS * MIN/MAX_GRAPHS)
# Per-graph entities
MIN_NODES_PER_GRAPH = 2 # Each graph will have between 2-5 nodes
MAX_NODES_PER_GRAPH = (
5 # Total nodes: 1000-2500 (GRAPHS_PER_USER * NUM_USERS * MIN/MAX_NODES)
)
# Additional per-user entities
MIN_PRESETS_PER_USER = 1 # Each user will have between 1-2 presets
MAX_PRESETS_PER_USER = 5 # Total presets: 500-2500 (NUM_USERS * MIN/MAX_PRESETS)
MIN_AGENTS_PER_USER = 1 # Each user will have between 1-2 agents
MAX_AGENTS_PER_USER = 10 # Total agents: 500-5000 (NUM_USERS * MIN/MAX_AGENTS)
# Execution and review records
MIN_EXECUTIONS_PER_GRAPH = 1 # Each graph will have between 1-5 execution records
MAX_EXECUTIONS_PER_GRAPH = (
20 # Total executions: 1000-5000 (TOTAL_GRAPHS * MIN/MAX_EXECUTIONS)
)
MIN_REVIEWS_PER_VERSION = 1 # Each version will have between 1-3 reviews
MAX_REVIEWS_PER_VERSION = 5 # Total reviews depends on number of versions created
def get_image():
"""Generate a consistent image URL using picsum.photos service."""
width = random.choice([200, 300, 400, 500, 600, 800])
height = random.choice([200, 300, 400, 500, 600, 800])
# Use a random seed to get different images
seed = random.randint(1, 1000)
return f"https://picsum.photos/seed/{seed}/{width}/{height}"
def get_video_url():
"""Generate a consistent video URL using a placeholder service."""
# Using YouTube as a consistent source for video URLs
video_ids = [
"dQw4w9WgXcQ", # Example video IDs
"9bZkp7q19f0",
"kJQP7kiw5Fk",
"RgKAFK5djSk",
"L_jWHffIx5E",
]
video_id = random.choice(video_ids)
return f"https://www.youtube.com/watch?v={video_id}"
async def main():
db = Prisma()
await db.connect()
# Insert Users
print(f"Inserting {NUM_USERS} users")
users = []
for _ in range(NUM_USERS):
user = await db.user.create(
data=UserCreateInput(
id=str(faker.uuid4()),
email=faker.unique.email(),
name=faker.name(),
metadata=prisma.Json({}),
integrations="",
)
)
users.append(user)
# Insert AgentBlocks
agent_blocks = []
print(f"Inserting {NUM_AGENT_BLOCKS} agent blocks")
for _ in range(NUM_AGENT_BLOCKS):
block = await db.agentblock.create(
data=AgentBlockCreateInput(
name=f"{faker.word()}_{str(faker.uuid4())[:8]}",
inputSchema="{}",
outputSchema="{}",
)
)
agent_blocks.append(block)
# Insert AgentGraphs
agent_graphs = []
print(f"Inserting {NUM_USERS * MAX_GRAPHS_PER_USER} agent graphs")
for user in users:
for _ in range(
random.randint(MIN_GRAPHS_PER_USER, MAX_GRAPHS_PER_USER)
): # Adjust the range to create more graphs per user if desired
graph = await db.agentgraph.create(
data=AgentGraphCreateInput(
name=faker.sentence(nb_words=3),
description=faker.text(max_nb_chars=200),
userId=user.id,
isActive=True,
)
)
agent_graphs.append(graph)
# Insert AgentNodes
agent_nodes = []
print(
f"Inserting {NUM_USERS * MAX_GRAPHS_PER_USER * MAX_NODES_PER_GRAPH} agent nodes"
)
for graph in agent_graphs:
num_nodes = random.randint(MIN_NODES_PER_GRAPH, MAX_NODES_PER_GRAPH)
for _ in range(num_nodes): # Create 5 AgentNodes per graph
block = random.choice(agent_blocks)
node = await db.agentnode.create(
data=AgentNodeCreateInput(
agentBlockId=block.id,
agentGraphId=graph.id,
agentGraphVersion=graph.version,
constantInput=Json({}),
metadata=Json({}),
)
)
agent_nodes.append(node)
# Insert AgentPresets
agent_presets = []
print(f"Inserting {NUM_USERS * MAX_PRESETS_PER_USER} agent presets")
for user in users:
num_presets = random.randint(MIN_PRESETS_PER_USER, MAX_PRESETS_PER_USER)
for _ in range(num_presets): # Create 1 AgentPreset per user
graph = random.choice(agent_graphs)
preset = await db.agentpreset.create(
data={
"name": faker.sentence(nb_words=3),
"description": faker.text(max_nb_chars=200),
"userId": user.id,
"agentGraphId": graph.id,
"agentGraphVersion": graph.version,
"isActive": True,
}
)
agent_presets.append(preset)
# Insert Profiles first (before LibraryAgents)
profiles = []
print(f"Inserting {NUM_USERS} profiles")
for user in users:
profile = await db.profile.create(
data=ProfileCreateInput(
userId=user.id,
name=user.name or faker.name(),
username=faker.unique.user_name(),
description=faker.text(),
links=[faker.url() for _ in range(3)],
avatarUrl=get_image(),
)
)
profiles.append(profile)
# Insert LibraryAgents
library_agents = []
print("Inserting library agents")
for user in users:
num_agents = random.randint(MIN_AGENTS_PER_USER, MAX_AGENTS_PER_USER)
# Get a shuffled list of graphs to ensure uniqueness per user
available_graphs = agent_graphs.copy()
random.shuffle(available_graphs)
# Limit to available unique graphs
num_agents = min(num_agents, len(available_graphs))
for i in range(num_agents):
graph = available_graphs[i] # Use unique graph for each library agent
# Get creator profile for this graph's owner
creator_profile = next(
(p for p in profiles if p.userId == graph.userId), None
)
library_agent = await db.libraryagent.create(
data={
"userId": user.id,
"agentGraphId": graph.id,
"agentGraphVersion": graph.version,
"creatorId": creator_profile.id if creator_profile else None,
"imageUrl": get_image() if random.random() < 0.5 else None,
"useGraphIsActiveVersion": random.choice([True, False]),
"isFavorite": random.choice([True, False]),
"isCreatedByUser": random.choice([True, False]),
"isArchived": random.choice([True, False]),
"isDeleted": random.choice([True, False]),
}
)
library_agents.append(library_agent)
# Insert AgentGraphExecutions
agent_graph_executions = []
print(
f"Inserting {NUM_USERS * MAX_GRAPHS_PER_USER * MAX_EXECUTIONS_PER_GRAPH} agent graph executions"
)
graph_execution_data = []
for graph in agent_graphs:
user = random.choice(users)
num_executions = random.randint(
MIN_EXECUTIONS_PER_GRAPH, MAX_EXECUTIONS_PER_GRAPH
)
for _ in range(num_executions):
matching_presets = [p for p in agent_presets if p.agentGraphId == graph.id]
preset = (
random.choice(matching_presets)
if matching_presets and random.random() < 0.5
else None
)
graph_execution_data.append(
{
"agentGraphId": graph.id,
"agentGraphVersion": graph.version,
"userId": user.id,
"executionStatus": prisma.enums.AgentExecutionStatus.COMPLETED,
"startedAt": faker.date_time_this_year(),
"agentPresetId": preset.id if preset else None,
}
)
agent_graph_executions = await db.agentgraphexecution.create_many(
data=graph_execution_data
)
# Need to fetch the created records since create_many doesn't return them
agent_graph_executions = await db.agentgraphexecution.find_many()
# Insert AgentNodeExecutions
print(
f"Inserting {NUM_USERS * MAX_GRAPHS_PER_USER * MAX_EXECUTIONS_PER_GRAPH} agent node executions"
)
node_execution_data = []
for execution in agent_graph_executions:
nodes = [
node for node in agent_nodes if node.agentGraphId == execution.agentGraphId
]
for node in nodes:
node_execution_data.append(
{
"agentGraphExecutionId": execution.id,
"agentNodeId": node.id,
"executionStatus": prisma.enums.AgentExecutionStatus.COMPLETED,
"addedTime": datetime.now(),
}
)
agent_node_executions = await db.agentnodeexecution.create_many(
data=node_execution_data
)
# Need to fetch the created records since create_many doesn't return them
agent_node_executions = await db.agentnodeexecution.find_many()
# Insert AgentNodeExecutionInputOutput
print(
f"Inserting {NUM_USERS * MAX_GRAPHS_PER_USER * MAX_EXECUTIONS_PER_GRAPH} agent node execution input/outputs"
)
input_output_data = []
for node_execution in agent_node_executions:
# Input data
input_output_data.append(
{
"name": "input1",
"data": "{}",
"time": datetime.now(),
"referencedByInputExecId": node_execution.id,
}
)
# Output data
input_output_data.append(
{
"name": "output1",
"data": "{}",
"time": datetime.now(),
"referencedByOutputExecId": node_execution.id,
}
)
await db.agentnodeexecutioninputoutput.create_many(data=input_output_data)
# Insert AgentNodeLinks
print(f"Inserting {NUM_USERS * MAX_GRAPHS_PER_USER} agent node links")
for graph in agent_graphs:
nodes = [node for node in agent_nodes if node.agentGraphId == graph.id]
if len(nodes) >= 2:
source_node = nodes[0]
sink_node = nodes[1]
await db.agentnodelink.create(
data=AgentNodeLinkCreateInput(
agentNodeSourceId=source_node.id,
sourceName="output1",
agentNodeSinkId=sink_node.id,
sinkName="input1",
isStatic=False,
)
)
# Insert AnalyticsDetails
print(f"Inserting {NUM_USERS} analytics details")
for user in users:
for _ in range(1):
await db.analyticsdetails.create(
data=AnalyticsDetailsCreateInput(
userId=user.id,
type=faker.word(),
data=prisma.Json({}),
dataIndex=faker.word(),
)
)
# Insert AnalyticsMetrics
print(f"Inserting {NUM_USERS} analytics metrics")
for user in users:
for _ in range(1):
await db.analyticsmetrics.create(
data=AnalyticsMetricsCreateInput(
userId=user.id,
analyticMetric=faker.word(),
value=random.uniform(0, 100),
dataString=faker.word(),
)
)
# Insert CreditTransaction (formerly UserBlockCredit)
print(f"Inserting {NUM_USERS} credit transactions")
for user in users:
for _ in range(1):
block = random.choice(agent_blocks)
await db.credittransaction.create(
data=CreditTransactionCreateInput(
transactionKey=str(faker.uuid4()),
userId=user.id,
amount=random.randint(1, 100),
type=(
prisma.enums.CreditTransactionType.TOP_UP
if random.random() < 0.5
else prisma.enums.CreditTransactionType.USAGE
),
metadata=prisma.Json({}),
)
)
# Insert StoreListings
store_listings = []
print("Inserting store listings")
for graph in agent_graphs:
user = random.choice(users)
slug = faker.slug()
listing = await db.storelisting.create(
data={
"agentGraphId": graph.id,
"agentGraphVersion": graph.version,
"owningUserId": user.id,
"hasApprovedVersion": random.choice([True, False]),
"slug": slug,
}
)
store_listings.append(listing)
# Insert StoreListingVersions
store_listing_versions = []
print("Inserting store listing versions")
for listing in store_listings:
graph = [g for g in agent_graphs if g.id == listing.agentGraphId][0]
version = await db.storelistingversion.create(
data={
"agentGraphId": graph.id,
"agentGraphVersion": graph.version,
"name": graph.name or faker.sentence(nb_words=3),
"subHeading": faker.sentence(),
"videoUrl": get_video_url() if random.random() < 0.3 else None,
"imageUrls": [get_image() for _ in range(3)],
"description": faker.text(),
"categories": [faker.word() for _ in range(3)],
"isFeatured": random.choice([True, False]),
"isAvailable": True,
"storeListingId": listing.id,
"submissionStatus": random.choice(
[
prisma.enums.SubmissionStatus.PENDING,
prisma.enums.SubmissionStatus.APPROVED,
prisma.enums.SubmissionStatus.REJECTED,
]
),
}
)
store_listing_versions.append(version)
# Insert StoreListingReviews
print("Inserting store listing reviews")
for version in store_listing_versions:
# Create a copy of users list and shuffle it to avoid duplicates
available_reviewers = users.copy()
random.shuffle(available_reviewers)
# Limit number of reviews to available unique reviewers
num_reviews = min(
random.randint(MIN_REVIEWS_PER_VERSION, MAX_REVIEWS_PER_VERSION),
len(available_reviewers),
)
# Take only the first num_reviews reviewers
for reviewer in available_reviewers[:num_reviews]:
await db.storelistingreview.create(
data=StoreListingReviewCreateInput(
storeListingVersionId=version.id,
reviewByUserId=reviewer.id,
score=random.randint(1, 5),
comments=faker.text(),
)
)
# Insert UserOnboarding for some users
print("Inserting user onboarding data")
for user in random.sample(
users, k=int(NUM_USERS * 0.7)
): # 70% of users have onboarding data
completed_steps = []
possible_steps = list(prisma.enums.OnboardingStep)
# Randomly complete some steps
if random.random() < 0.8:
num_steps = random.randint(1, len(possible_steps))
completed_steps = random.sample(possible_steps, k=num_steps)
try:
await db.useronboarding.create(
data={
"userId": user.id,
"completedSteps": completed_steps,
"walletShown": random.choice([True, False]),
"notified": (
random.sample(completed_steps, k=min(3, len(completed_steps)))
if completed_steps
else []
),
"rewardedFor": (
random.sample(completed_steps, k=min(2, len(completed_steps)))
if completed_steps
else []
),
"usageReason": (
random.choice(["personal", "business", "research", "learning"])
if random.random() < 0.7
else None
),
"integrations": random.sample(
["github", "google", "discord", "slack"], k=random.randint(0, 2)
),
"otherIntegrations": (
faker.word() if random.random() < 0.2 else None
),
"selectedStoreListingVersionId": (
random.choice(store_listing_versions).id
if store_listing_versions and random.random() < 0.5
else None
),
"onboardingAgentExecutionId": (
random.choice(agent_graph_executions).id
if agent_graph_executions and random.random() < 0.3
else None
),
"agentRuns": random.randint(0, 10),
}
)
except Exception as e:
print(f"Error creating onboarding for user {user.id}: {e}")
# Try simpler version
await db.useronboarding.create(
data={
"userId": user.id,
}
)
# Insert IntegrationWebhooks for some users
print("Inserting integration webhooks")
for user in random.sample(
users, k=int(NUM_USERS * 0.3)
): # 30% of users have webhooks
for _ in range(random.randint(1, 3)):
await db.integrationwebhook.create(
data=IntegrationWebhookCreateInput(
userId=user.id,
provider=random.choice(["github", "slack", "discord"]),
credentialsId=str(faker.uuid4()),
webhookType=random.choice(["repo", "channel", "server"]),
resource=faker.slug(),
events=[
random.choice(["created", "updated", "deleted"])
for _ in range(random.randint(1, 3))
],
config=prisma.Json({"url": faker.url()}),
secret=str(faker.sha256()),
providerWebhookId=str(faker.uuid4()),
)
)
# Insert APIKeys
print(f"Inserting {NUM_USERS} api keys")
for user in users:
api_key = APIKeySmith().generate_key()
await db.apikey.create(
data={
"name": faker.word(),
"head": api_key.head,
"tail": api_key.tail,
"hash": api_key.hash,
"salt": api_key.salt,
"status": prisma.enums.APIKeyStatus.ACTIVE,
"permissions": [
prisma.enums.APIKeyPermission.EXECUTE_GRAPH,
prisma.enums.APIKeyPermission.READ_GRAPH,
],
"description": faker.text(),
"userId": user.id,
}
)
# Refresh materialized views
print("Refreshing materialized views...")
await db.execute_raw("SELECT refresh_store_materialized_views();")
await db.disconnect()
print("Test data creation completed successfully!")
@pytest.mark.asyncio
@pytest.mark.integration
async def test_main_function_runs_without_errors():
await main()
if __name__ == "__main__":
asyncio.run(main())