mirror of
https://github.com/supabase/agent-skills.git
synced 2026-03-27 10:09:26 +08:00
simple edge function creation example
This commit is contained in:
@@ -1,29 +1,19 @@
|
||||
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
||||
import { existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { expect, test } from "vitest";
|
||||
|
||||
const supabaseDir = join(process.cwd(), "supabase");
|
||||
const migrationsDir = join(supabaseDir, "migrations");
|
||||
|
||||
/** Find the first .sql migration file (agent may name it differently). */
|
||||
function findMigrationFile(): string | null {
|
||||
if (!existsSync(migrationsDir)) return null;
|
||||
const files = readdirSync(migrationsDir).filter((f) => f.endsWith(".sql"));
|
||||
return files.length > 0 ? join(migrationsDir, files[0]) : null;
|
||||
}
|
||||
|
||||
function getMigrationSQL(): string {
|
||||
const file = findMigrationFile();
|
||||
if (!file) throw new Error("No migration file found in supabase/migrations/");
|
||||
return readFileSync(file, "utf-8");
|
||||
}
|
||||
import {
|
||||
findMigrationFiles,
|
||||
getMigrationSQL,
|
||||
supabaseDir,
|
||||
} from "../eval-utils.ts";
|
||||
|
||||
test("supabase project initialized (config.toml exists)", () => {
|
||||
expect(existsSync(join(supabaseDir, "config.toml"))).toBe(true);
|
||||
});
|
||||
|
||||
test("migration file exists in supabase/migrations/", () => {
|
||||
expect(findMigrationFile()).not.toBeNull();
|
||||
expect(findMigrationFiles().length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("creates tasks table", () => {
|
||||
|
||||
148
packages/evals/evals/edge-function-hello-world/EVAL.ts
Normal file
148
packages/evals/evals/edge-function-hello-world/EVAL.ts
Normal file
@@ -0,0 +1,148 @@
|
||||
import { existsSync, readdirSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { expect, test } from "vitest";
|
||||
|
||||
import {
|
||||
findFunctionFile,
|
||||
findSharedCorsFile,
|
||||
functionsDir,
|
||||
getFunctionCode,
|
||||
getSharedCode,
|
||||
supabaseDir,
|
||||
} from "../eval-utils.ts";
|
||||
|
||||
const FUNCTION_NAME = "hello-world";
|
||||
const helloWorldDir = join(functionsDir, FUNCTION_NAME);
|
||||
|
||||
/** Read function code + all shared modules combined. */
|
||||
function getAllCode(): string {
|
||||
const code = getFunctionCode(FUNCTION_NAME);
|
||||
return `${code}\n${getSharedCode()}`;
|
||||
}
|
||||
|
||||
/** Extract the code after the first `catch` keyword to the end of the function. */
|
||||
function getCatchBlockCode(): string {
|
||||
const code = getFunctionCode(FUNCTION_NAME);
|
||||
const catchIndex = code.search(/\bcatch\b/);
|
||||
if (catchIndex === -1) return "";
|
||||
return code.slice(catchIndex);
|
||||
}
|
||||
|
||||
test("supabase project initialized", () => {
|
||||
expect(existsSync(join(supabaseDir, "config.toml"))).toBe(true);
|
||||
});
|
||||
|
||||
test("function directory exists", () => {
|
||||
expect(existsSync(helloWorldDir)).toBe(true);
|
||||
});
|
||||
|
||||
test("function index file exists", () => {
|
||||
expect(findFunctionFile(FUNCTION_NAME)).not.toBeNull();
|
||||
});
|
||||
|
||||
test("uses Deno.serve", () => {
|
||||
const code = getFunctionCode(FUNCTION_NAME);
|
||||
expect(code).toMatch(/Deno\.serve/);
|
||||
});
|
||||
|
||||
test("returns JSON response", () => {
|
||||
// Check both the function file and shared modules for JSON response patterns
|
||||
const allCode = getAllCode();
|
||||
const hasContentTypeHeader =
|
||||
/content-type['"]\s*:\s*['"]application\/json/i.test(allCode);
|
||||
const hasResponseJson = /Response\.json/i.test(allCode);
|
||||
const hasJsonStringify = /JSON\.stringify/i.test(allCode);
|
||||
expect(hasContentTypeHeader || hasResponseJson || hasJsonStringify).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("handles OPTIONS preflight", () => {
|
||||
// OPTIONS handling may be in the function itself or in a shared CORS helper
|
||||
const allCode = getAllCode();
|
||||
expect(allCode).toMatch(/['"]OPTIONS['"]/);
|
||||
expect(allCode).toMatch(/\.method/);
|
||||
});
|
||||
|
||||
test("defines CORS headers", () => {
|
||||
const allCode = getAllCode();
|
||||
expect(allCode).toMatch(/Access-Control-Allow-Origin/);
|
||||
});
|
||||
|
||||
test("CORS allows required headers", () => {
|
||||
const allCode = getAllCode().toLowerCase();
|
||||
// Must include authorization and apikey in allowed headers
|
||||
expect(allCode).toMatch(/access-control-allow-headers/);
|
||||
expect(allCode).toMatch(/authorization/);
|
||||
expect(allCode).toMatch(/apikey/);
|
||||
});
|
||||
|
||||
test("error response has CORS headers", () => {
|
||||
const catchCode = getCatchBlockCode();
|
||||
expect(catchCode.length).toBeGreaterThan(0);
|
||||
// The catch block should either directly reference CORS headers, or call
|
||||
// a shared helper that includes them (e.g. errorResponse, corsHeaders).
|
||||
const sharedCode = getSharedCode();
|
||||
// Direct CORS reference in catch block
|
||||
const directCors =
|
||||
/corsHeaders|cors_headers|Access-Control-Allow-Origin/i.test(catchCode);
|
||||
// Calls a shared helper that itself includes CORS headers
|
||||
const callsSharedHelper =
|
||||
/errorResponse|jsonResponse|json_response|error_response/i.test(
|
||||
catchCode,
|
||||
) && /Access-Control-Allow-Origin/i.test(sharedCode);
|
||||
expect(directCors || callsSharedHelper).toBe(true);
|
||||
});
|
||||
|
||||
test("has try-catch for error handling", () => {
|
||||
const code = getFunctionCode(FUNCTION_NAME);
|
||||
expect(code).toMatch(/\btry\s*\{/);
|
||||
expect(code).toMatch(/\bcatch\b/);
|
||||
});
|
||||
|
||||
test("returns proper error status code", () => {
|
||||
const catchCode = getCatchBlockCode();
|
||||
expect(catchCode.length).toBeGreaterThan(0);
|
||||
// Error response should use status 400 or 500 (not default 200).
|
||||
// Match object-style { status: 500 } or function-call-style fn('msg', 500)
|
||||
const hasObjectStatus = /status:\s*(400|500|4\d{2}|5\d{2})/.test(catchCode);
|
||||
const hasFnArgStatus = /[,(]\s*(400|500|4\d{2}|5\d{2})\s*[),]/.test(
|
||||
catchCode,
|
||||
);
|
||||
expect(hasObjectStatus || hasFnArgStatus).toBe(true);
|
||||
});
|
||||
|
||||
test("shared CORS module exists", () => {
|
||||
expect(findSharedCorsFile()).not.toBeNull();
|
||||
});
|
||||
|
||||
test("function imports from shared", () => {
|
||||
const code = getFunctionCode(FUNCTION_NAME);
|
||||
// Should import from ../_shared/ relative path
|
||||
expect(code).toMatch(/from\s+['"]\.\.\/(_shared|_utils)/);
|
||||
});
|
||||
|
||||
test("function uses hyphenated name", () => {
|
||||
// The function directory should use hyphens, not underscores
|
||||
const dirs = existsSync(functionsDir) ? readdirSync(functionsDir) : [];
|
||||
const helloDir = dirs.find((d) => d.includes("hello") && d.includes("world"));
|
||||
expect(helloDir).toBeDefined();
|
||||
expect(helloDir).toMatch(/^hello-world$/);
|
||||
});
|
||||
|
||||
test("overall quality: demonstrates Edge Function best practices", () => {
|
||||
const allCode = getAllCode().toLowerCase();
|
||||
// A high-quality Edge Function should contain most of these patterns
|
||||
const signals = [
|
||||
/deno\.serve/, // Modern Deno.serve API
|
||||
/['"]options['"]/, // OPTIONS preflight handling
|
||||
/access-control-allow-origin/, // CORS headers defined
|
||||
/\btry\s*\{/, // Error handling with try-catch
|
||||
/status:\s*(400|500|4\d{2}|5\d{2})|[,(]\s*(400|500|4\d{2}|5\d{2})\s*[),]/, // Proper error status codes
|
||||
/from\s+['"]\.\.\/(_shared|_utils)/, // Imports from shared directory
|
||||
/authorization/, // Allows authorization header in CORS
|
||||
/apikey/, // Allows apikey header in CORS
|
||||
];
|
||||
const matches = signals.filter((r) => r.test(allCode));
|
||||
expect(matches.length).toBeGreaterThanOrEqual(6);
|
||||
});
|
||||
8
packages/evals/evals/edge-function-hello-world/PROMPT.md
Normal file
8
packages/evals/evals/edge-function-hello-world/PROMPT.md
Normal file
@@ -0,0 +1,8 @@
|
||||
I want to create my first Supabase Edge Function. Set up the project and create a "hello-world" function that:
|
||||
|
||||
1. Accepts a POST request with a JSON body containing a `name` field
|
||||
2. Returns a JSON response like `{ "message": "Hello {name}!" }`
|
||||
3. Works when called from a browser (frontend app)
|
||||
4. Handles bad input gracefully
|
||||
|
||||
I also want the project organized so I can add more functions later and share common code between them.
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"name": "edge-function-hello-world",
|
||||
"private": true,
|
||||
"type": "module"
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
# For detailed configuration reference documentation, visit:
|
||||
# https://supabase.com/docs/guides/local-development/cli/config
|
||||
# A string used to distinguish different Supabase projects on the same host. Defaults to the
|
||||
# working directory name when running `supabase init`.
|
||||
project_id = "edge-function-hello-world"
|
||||
|
||||
[api]
|
||||
enabled = true
|
||||
# Port to use for the API URL.
|
||||
port = 54321
|
||||
# Schemas to expose in your API. Tables, views and stored procedures in this schema will get API
|
||||
# endpoints. `public` and `graphql_public` schemas are included by default.
|
||||
schemas = ["public", "graphql_public"]
|
||||
# Extra schemas to add to the search_path of every request.
|
||||
extra_search_path = ["public", "extensions"]
|
||||
# The maximum number of rows returns from a view, table, or stored procedure. Limits payload size
|
||||
# for accidental or malicious requests.
|
||||
max_rows = 1000
|
||||
|
||||
[db]
|
||||
# Port to use for the local database URL.
|
||||
port = 54322
|
||||
# Port used by db diff command to initialize the shadow database.
|
||||
shadow_port = 54320
|
||||
# The database major version to use. This has to be the same as your remote database's. Run `SHOW
|
||||
# server_version;` on the remote database to check.
|
||||
major_version = 17
|
||||
|
||||
[db.pooler]
|
||||
enabled = false
|
||||
# Port to use for the local connection pooler.
|
||||
port = 54329
|
||||
# Specifies when a server connection can be reused by other clients.
|
||||
# Configure one of the supported pooler modes: `transaction`, `session`.
|
||||
pool_mode = "transaction"
|
||||
# How many server connections to allow per user/database pair.
|
||||
default_pool_size = 20
|
||||
# Maximum number of client connections allowed.
|
||||
max_client_conn = 100
|
||||
|
||||
[storage]
|
||||
enabled = true
|
||||
# The maximum file size allowed (e.g. "5MB", "500KB").
|
||||
file_size_limit = "50MiB"
|
||||
|
||||
[auth]
|
||||
enabled = true
|
||||
# The base URL of your website. Used as an allow-list for redirects and for constructing URLs used
|
||||
# in emails.
|
||||
site_url = "http://127.0.0.1:3000"
|
||||
# A list of *exact* URLs that auth providers are permitted to redirect to post authentication.
|
||||
additional_redirect_urls = ["https://127.0.0.1:3000"]
|
||||
# How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week).
|
||||
jwt_expiry = 3600
|
||||
# Allow/disallow new user signups to your project.
|
||||
enable_signup = true
|
||||
# Allow/disallow anonymous sign-ins to your project.
|
||||
enable_anonymous_sign_ins = false
|
||||
|
||||
[auth.email]
|
||||
# Allow/disallow new user signups via email to your project.
|
||||
enable_signup = true
|
||||
# If enabled, users need to confirm their email address before signing in.
|
||||
enable_confirmations = false
|
||||
93
packages/evals/evals/eval-utils.ts
Normal file
93
packages/evals/evals/eval-utils.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Common paths
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const supabaseDir = join(process.cwd(), "supabase");
|
||||
export const migrationsDir = join(supabaseDir, "migrations");
|
||||
export const functionsDir = join(supabaseDir, "functions");
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Migration helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Find all .sql migration files (agent may create one or more). */
|
||||
export function findMigrationFiles(): string[] {
|
||||
if (!existsSync(migrationsDir)) return [];
|
||||
return readdirSync(migrationsDir)
|
||||
.filter((f) => f.endsWith(".sql"))
|
||||
.map((f) => join(migrationsDir, f));
|
||||
}
|
||||
|
||||
/** Read and concatenate all migration SQL files. */
|
||||
export function getMigrationSQL(): string {
|
||||
const files = findMigrationFiles();
|
||||
if (files.length === 0)
|
||||
throw new Error("No migration file found in supabase/migrations/");
|
||||
return files.map((f) => readFileSync(f, "utf-8")).join("\n");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Edge Function helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Find the index.ts/tsx entry file for a named Edge Function.
|
||||
*
|
||||
* @param functionName - directory name under supabase/functions/ (e.g. "hello-world")
|
||||
*/
|
||||
export function findFunctionFile(functionName: string): string | null {
|
||||
const fnDir = join(functionsDir, functionName);
|
||||
if (!existsSync(fnDir)) return null;
|
||||
const files = readdirSync(fnDir).filter(
|
||||
(f) => f.startsWith("index.") && (f.endsWith(".ts") || f.endsWith(".tsx")),
|
||||
);
|
||||
return files.length > 0 ? join(fnDir, files[0]) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the source code of a named Edge Function.
|
||||
*
|
||||
* @param functionName - directory name under supabase/functions/ (e.g. "stripe-webhook")
|
||||
*/
|
||||
export function getFunctionCode(functionName: string): string {
|
||||
const file = findFunctionFile(functionName);
|
||||
if (!file)
|
||||
throw new Error(`No index.ts found in supabase/functions/${functionName}/`);
|
||||
return readFileSync(file, "utf-8");
|
||||
}
|
||||
|
||||
/** Find a shared CORS module under supabase/functions/_shared/ (or similar _-prefixed dir). */
|
||||
export function findSharedCorsFile(): string | null {
|
||||
if (!existsSync(functionsDir)) return null;
|
||||
const sharedDirs = readdirSync(functionsDir).filter(
|
||||
(d) => d.startsWith("_") && statSync(join(functionsDir, d)).isDirectory(),
|
||||
);
|
||||
for (const dir of sharedDirs) {
|
||||
const dirPath = join(functionsDir, dir);
|
||||
const files = readdirSync(dirPath).filter((f) => f.includes("cors"));
|
||||
if (files.length > 0) return join(dirPath, files[0]);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Read and concatenate all .ts/.tsx files from _-prefixed shared directories. */
|
||||
export function getSharedCode(): string {
|
||||
if (!existsSync(functionsDir)) return "";
|
||||
const sharedDirs = readdirSync(functionsDir).filter(
|
||||
(d) => d.startsWith("_") && statSync(join(functionsDir, d)).isDirectory(),
|
||||
);
|
||||
const parts: string[] = [];
|
||||
for (const dir of sharedDirs) {
|
||||
const dirPath = join(functionsDir, dir);
|
||||
const files = readdirSync(dirPath).filter(
|
||||
(f) => f.endsWith(".ts") || f.endsWith(".tsx"),
|
||||
);
|
||||
for (const f of files) {
|
||||
parts.push(readFileSync(join(dirPath, f), "utf-8"));
|
||||
}
|
||||
}
|
||||
return parts.join("\n");
|
||||
}
|
||||
@@ -1,25 +1,6 @@
|
||||
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { expect, test } from "vitest";
|
||||
|
||||
const supabaseDir = join(process.cwd(), "supabase");
|
||||
const migrationsDir = join(supabaseDir, "migrations");
|
||||
|
||||
/** Find all .sql migration files (agent may create one or more). */
|
||||
function findMigrationFiles(): string[] {
|
||||
if (!existsSync(migrationsDir)) return [];
|
||||
return readdirSync(migrationsDir)
|
||||
.filter((f) => f.endsWith(".sql"))
|
||||
.map((f) => join(migrationsDir, f));
|
||||
}
|
||||
|
||||
/** Read and concatenate all migration SQL files. */
|
||||
function getMigrationSQL(): string {
|
||||
const files = findMigrationFiles();
|
||||
if (files.length === 0)
|
||||
throw new Error("No migration file found in supabase/migrations/");
|
||||
return files.map((f) => readFileSync(f, "utf-8")).join("\n");
|
||||
}
|
||||
import { findMigrationFiles, getMigrationSQL } from "../eval-utils.ts";
|
||||
|
||||
test("migration file exists", () => {
|
||||
expect(findMigrationFiles().length).toBeGreaterThan(0);
|
||||
|
||||
@@ -1,25 +1,6 @@
|
||||
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { expect, test } from "vitest";
|
||||
|
||||
const supabaseDir = join(process.cwd(), "supabase");
|
||||
const migrationsDir = join(supabaseDir, "migrations");
|
||||
|
||||
/** Find all .sql migration files (agent may create one or multiple). */
|
||||
function findMigrationFiles(): string[] {
|
||||
if (!existsSync(migrationsDir)) return [];
|
||||
return readdirSync(migrationsDir)
|
||||
.filter((f) => f.endsWith(".sql"))
|
||||
.map((f) => join(migrationsDir, f));
|
||||
}
|
||||
|
||||
/** Concatenate all migration SQL into a single string for assertions. */
|
||||
function getMigrationSQL(): string {
|
||||
const files = findMigrationFiles();
|
||||
if (files.length === 0)
|
||||
throw new Error("No migration file found in supabase/migrations/");
|
||||
return files.map((f) => readFileSync(f, "utf-8")).join("\n");
|
||||
}
|
||||
import { findMigrationFiles, getMigrationSQL } from "../eval-utils.ts";
|
||||
|
||||
test("migration file exists", () => {
|
||||
expect(findMigrationFiles().length).toBeGreaterThan(0);
|
||||
|
||||
@@ -1,169 +1,9 @@
|
||||
# Supabase Skills Eval Scenarios
|
||||
|
||||
## Scenario 1: auth-rls-new-project
|
||||
|
||||
**Description:** Set up a new Supabase project from scratch and add
|
||||
authentication with RLS. The agent must initialize the project with the CLI,
|
||||
start the local Supabase stack, then create a tasks table with proper security
|
||||
(RLS policies, auth FK, indexes) in a single idempotent migration.
|
||||
|
||||
**Setup:** The workspace starts empty (no `supabase/` directory). The agent is
|
||||
expected to run `npx supabase init` and `npx supabase start` before creating
|
||||
the migration.
|
||||
|
||||
**Expected skill files read:**
|
||||
|
||||
- `SKILL.md` (skill body with reference file index)
|
||||
- `references/dev-getting-started.md`
|
||||
- `references/db-rls-mandatory.md`
|
||||
- `references/db-rls-policy-types.md`
|
||||
- `references/db-rls-common-mistakes.md`
|
||||
- `references/db-schema-auth-fk.md`
|
||||
- `references/db-schema-timestamps.md`
|
||||
- `references/db-migrations-idempotent.md`
|
||||
|
||||
**Expected result:**
|
||||
|
||||
The agent initializes a Supabase project and creates a migration file that:
|
||||
|
||||
- Creates tasks table with `timestamptz` columns
|
||||
- Has `user_id` FK to `auth.users(id)` with `ON DELETE CASCADE`
|
||||
- Enables RLS (`ALTER TABLE tasks ENABLE ROW LEVEL SECURITY`)
|
||||
- Creates per-operation policies using `(select auth.uid())` with `TO authenticated`
|
||||
- Creates index on `user_id`
|
||||
- Uses `IF NOT EXISTS` for idempotency
|
||||
|
||||
**Scorer:** Binary pass/fail (12 vitest assertions)
|
||||
|
||||
| Test | What it checks |
|
||||
| --- | --- |
|
||||
| supabase project initialized | `supabase/config.toml` exists after agent runs |
|
||||
| migration file exists | Agent created a `.sql` file in `supabase/migrations/` |
|
||||
| creates tasks table | SQL contains `CREATE TABLE ... tasks` |
|
||||
| enables RLS | `ALTER TABLE tasks ENABLE ROW LEVEL SECURITY` |
|
||||
| FK to auth.users | `REFERENCES auth.users` |
|
||||
| ON DELETE CASCADE | Cascade delete on auth FK |
|
||||
| (select auth.uid()) | Subselect form in policies (performance) |
|
||||
| TO authenticated | Policies scoped to authenticated role |
|
||||
| timestamptz | No plain `timestamp` for time columns |
|
||||
| index on user_id | `CREATE INDEX` on the FK column |
|
||||
| IF NOT EXISTS | Idempotent migration |
|
||||
| overall quality | At least 4/5 best-practice signals present |
|
||||
|
||||
## Scenario 2: team-rls-security-definer
|
||||
|
||||
**Description:** Create a SQL migration for a team-based project management app
|
||||
where users belong to organizations via a membership table. The migration must
|
||||
define tables for organizations, memberships, and projects, then secure them
|
||||
with RLS policies that use a `security definer` helper function in a private
|
||||
schema to efficiently resolve team membership without per-row joins.
|
||||
|
||||
**Setup:** The workspace starts with a pre-initialized Supabase project
|
||||
(`supabase/config.toml` exists, empty `supabase/migrations/` directory). The
|
||||
agent creates migration files within this structure.
|
||||
|
||||
**Expected skill files read:**
|
||||
|
||||
- `SKILL.md` (skill body with reference file index)
|
||||
- `references/db-rls-mandatory.md`
|
||||
- `references/db-rls-policy-types.md`
|
||||
- `references/db-rls-common-mistakes.md`
|
||||
- `references/db-rls-performance.md`
|
||||
- `references/db-security-functions.md`
|
||||
- `references/db-schema-auth-fk.md`
|
||||
- `references/db-schema-timestamps.md`
|
||||
- `references/db-perf-indexes.md`
|
||||
- `references/db-migrations-idempotent.md`
|
||||
|
||||
**Expected result:**
|
||||
|
||||
The agent creates a migration file that:
|
||||
|
||||
- Creates organizations, memberships, and projects tables with `timestamptz` columns
|
||||
- Has `user_id` FK to `auth.users(id)` with `ON DELETE CASCADE` on memberships
|
||||
- Has `org_id` FK on projects referencing organizations
|
||||
- Enables RLS on all three tables
|
||||
- Creates a private schema with a `security definer` helper function (`SET search_path = ''`)
|
||||
- Creates RLS policies using `(select auth.uid())` with `TO authenticated`
|
||||
- Creates indexes on membership lookup columns (user_id, org_id)
|
||||
- Has a delete policy on projects restricted to owner role
|
||||
- Uses `IF NOT EXISTS` for idempotency
|
||||
|
||||
**Scorer:** Binary pass/fail (16 vitest assertions)
|
||||
|
||||
| Test | What it checks |
|
||||
| --- | --- |
|
||||
| migration file exists | A `.sql` file exists in `supabase/migrations/` |
|
||||
| creates organizations table | SQL contains `CREATE TABLE` for organizations |
|
||||
| creates memberships table | SQL contains `CREATE TABLE` for memberships |
|
||||
| creates projects table | SQL contains `CREATE TABLE` for projects |
|
||||
| enables RLS on all tables | `ALTER TABLE ... ENABLE ROW LEVEL SECURITY` for all three tables |
|
||||
| FK to auth.users with ON DELETE CASCADE | memberships references `auth.users` with cascade |
|
||||
| org_id FK on projects | projects references organizations |
|
||||
| private schema created | `CREATE SCHEMA ... private` present |
|
||||
| security_definer helper function | Function in private schema with `SECURITY DEFINER` and `SET search_path = ''` |
|
||||
| policies use (select auth.uid()) | Subselect form in all policies referencing auth.uid() |
|
||||
| policies use TO authenticated | All policies scoped to authenticated role |
|
||||
| index on membership lookup columns | `CREATE INDEX` on user_id and/or org_id in memberships |
|
||||
| uses timestamptz | No plain `timestamp` for time columns |
|
||||
| idempotent DDL | Uses `IF NOT EXISTS` or `DROP ... IF EXISTS` patterns |
|
||||
| delete policy restricted to owner role | A delete policy on projects checks for owner/admin role |
|
||||
| overall quality score | At least 10/14 best-practice signals present |
|
||||
|
||||
## Scenario 3: storage-rls-user-folders
|
||||
|
||||
**Description:** Create a SQL migration that sets up Supabase Storage buckets
|
||||
with RLS policies for user-content. An avatars bucket (public reads,
|
||||
authenticated uploads restricted to user folders) and a documents bucket (fully
|
||||
private, user-isolated), with file type restrictions, storage helper functions
|
||||
in policies, and a file_metadata tracking table secured with RLS.
|
||||
|
||||
**Setup:** Pre-initialized Supabase project (`supabase/config.toml` exists)
|
||||
with an empty `supabase/migrations/` directory. The agent creates migration
|
||||
files within this structure.
|
||||
|
||||
**Expected skill files read:**
|
||||
|
||||
- `SKILL.md` (skill body with reference file index)
|
||||
- `references/storage-access-control.md`
|
||||
- `references/db-rls-mandatory.md`
|
||||
- `references/db-rls-common-mistakes.md`
|
||||
- `references/db-rls-performance.md`
|
||||
- `references/db-schema-auth-fk.md`
|
||||
- `references/db-schema-timestamps.md`
|
||||
- `references/db-perf-indexes.md`
|
||||
- `references/db-migrations-idempotent.md`
|
||||
|
||||
**Expected result:**
|
||||
|
||||
The agent creates a migration file that:
|
||||
|
||||
- Inserts avatars bucket into `storage.buckets` with `public = true`, MIME type restrictions, and file size limit
|
||||
- Inserts documents bucket with `public = false`
|
||||
- Creates RLS policies on `storage.objects` using `storage.foldername(name)` with `auth.uid()::text`
|
||||
- Scopes upload policies `TO authenticated` and avatars SELECT policy `TO public`
|
||||
- Creates `file_metadata` table with FK to `auth.users` with `ON DELETE CASCADE`
|
||||
- Enables RLS on `file_metadata` with policies using `(select auth.uid())`
|
||||
- Uses `timestamptz` for time columns, indexes `user_id`, and `IF NOT EXISTS` for idempotency
|
||||
|
||||
**Scorer:** Binary pass/fail (17 vitest assertions)
|
||||
|
||||
| Test | What it checks |
|
||||
| --- | --- |
|
||||
| migration file exists | A `.sql` file exists in `supabase/migrations/` |
|
||||
| creates avatars bucket | SQL inserts into `storage.buckets` with id 'avatars' and `public = true` |
|
||||
| creates documents bucket | SQL inserts into `storage.buckets` with id 'documents' and `public = false` |
|
||||
| avatars bucket has mime type restriction | `allowed_mime_types` includes image types (jpeg, png, webp) |
|
||||
| avatars bucket has file size limit | `file_size_limit` set (around 2MB / 2097152 bytes) |
|
||||
| storage policy uses foldername or path for user isolation | Policy references `storage.foldername(name)` with `auth.uid()::text` |
|
||||
| storage policy uses TO authenticated | Storage upload/delete policies scoped to `TO authenticated` |
|
||||
| public read policy for avatars | A SELECT policy on storage.objects for avatars allows public/anon access |
|
||||
| documents bucket is fully private | Policies for documents restrict all operations to authenticated owner |
|
||||
| creates file_metadata table | SQL contains `CREATE TABLE` for file_metadata |
|
||||
| file_metadata has FK to auth.users with CASCADE | `REFERENCES auth.users` with `ON DELETE CASCADE` |
|
||||
| RLS enabled on file_metadata | `ALTER TABLE file_metadata ENABLE ROW LEVEL SECURITY` |
|
||||
| file_metadata policies use (select auth.uid()) | Subselect form in policies |
|
||||
| uses timestamptz for time columns | No plain `timestamp` in file_metadata |
|
||||
| index on file_metadata user_id | `CREATE INDEX` on user_id column |
|
||||
| idempotent DDL | Uses `IF NOT EXISTS` patterns |
|
||||
| overall quality score | At least 11/15 best-practice signals present |
|
||||
| # | Scenario | Description |
|
||||
|---|----------|-------------|
|
||||
| 1 | [auth-rls-new-project](auth-rls-new-project.md) | Initialize a Supabase project and create a tasks table with RLS |
|
||||
| 2 | [team-rls-security-definer](team-rls-security-definer.md) | Team-based RLS with security definer helper in a private schema |
|
||||
| 3 | [storage-rls-user-folders](storage-rls-user-folders.md) | Storage buckets with RLS policies for user-isolated folders |
|
||||
| 4 | [edge-function-hello-world](edge-function-hello-world.md) | Hello-world Edge Function with CORS and shared utilities |
|
||||
| 5 | edge-function-stripe-webhook | Stripe webhook Edge Function with signature verification and orders migration |
|
||||
124
packages/evals/scenarios/auth-rls-new-project.md
Normal file
124
packages/evals/scenarios/auth-rls-new-project.md
Normal file
@@ -0,0 +1,124 @@
|
||||
# Scenario: auth-rls-new-project
|
||||
|
||||
## Summary
|
||||
|
||||
The agent must set up a new Supabase project from scratch and add
|
||||
authentication with RLS. It must initialize the project with the CLI, start
|
||||
the local Supabase stack, then create a tasks table with proper security (RLS
|
||||
policies, auth FK, indexes) in a single idempotent migration.
|
||||
|
||||
## Real-World Justification
|
||||
|
||||
Why this is a common and important workflow:
|
||||
|
||||
1. **Project initialization + first migration is the canonical onboarding
|
||||
workflow** -- The Supabase getting started guide walks developers through
|
||||
`supabase init`, `supabase start`, and creating their first migration. This
|
||||
is the first thing every new Supabase developer does.
|
||||
- Source: https://supabase.com/docs/guides/local-development/cli/getting-started
|
||||
|
||||
2. **RLS is the most common security question for new Supabase users** --
|
||||
Developers frequently forget to enable RLS, use incorrect policy syntax, or
|
||||
omit the `TO authenticated` clause. The Supabase docs and community
|
||||
discussions repeatedly emphasize that RLS must be enabled on every public
|
||||
table.
|
||||
- Source: https://supabase.com/docs/guides/database/postgres/row-level-security
|
||||
- Source: https://github.com/orgs/supabase/discussions/811
|
||||
|
||||
3. **Auth FK and cascade deletes are a frequent source of bugs** -- Developers
|
||||
often reference `auth.users` incorrectly or forget `ON DELETE CASCADE`,
|
||||
leading to orphaned rows when users are deleted from auth.
|
||||
- Source: https://supabase.com/docs/guides/auth/managing-user-data
|
||||
|
||||
## Skill References Exercised
|
||||
|
||||
Which reference files the agent should consult and what each teaches:
|
||||
|
||||
| Reference File | What It Teaches | What the Agent Should Apply |
|
||||
|---|---|---|
|
||||
| `references/dev-getting-started.md` | `npx supabase init`, `npx supabase start`, project structure | Initialize the project and start the local stack |
|
||||
| `references/db-rls-mandatory.md` | RLS must be enabled on all public tables | Enable RLS on the tasks table |
|
||||
| `references/db-rls-policy-types.md` | PERMISSIVE vs RESTRICTIVE, per-operation policies | Create separate SELECT, INSERT, UPDATE, DELETE policies |
|
||||
| `references/db-rls-common-mistakes.md` | Missing TO clause, user_metadata pitfalls | Always use `TO authenticated` on all policies |
|
||||
| `references/db-schema-auth-fk.md` | FK to auth.users with ON DELETE CASCADE | Reference auth.users with cascade on user_id |
|
||||
| `references/db-schema-timestamps.md` | Use timestamptz not timestamp | All time columns use timestamptz |
|
||||
| `references/db-migrations-idempotent.md` | IF NOT EXISTS for safe reruns | Idempotent DDL throughout the migration |
|
||||
|
||||
## Workspace Setup
|
||||
|
||||
What the workspace starts with before the agent runs:
|
||||
|
||||
- Empty workspace (no `supabase/` directory)
|
||||
- The agent is expected to run `npx supabase init` and `npx supabase start`
|
||||
before creating the migration
|
||||
|
||||
## Agent Task (PROMPT.md draft)
|
||||
|
||||
The prompt to give the agent. Written as a developer would ask it:
|
||||
|
||||
> I'm starting a new Supabase project. Initialize the project, start the local
|
||||
> dev stack, and create a migration for a `tasks` table.
|
||||
>
|
||||
> The tasks table should have:
|
||||
> - A title (text)
|
||||
> - A status column (e.g., pending, in_progress, done)
|
||||
> - Timestamps for created and updated
|
||||
> - A reference to the authenticated user who owns the task
|
||||
>
|
||||
> Set up Row Level Security so users can only see and manage their own tasks.
|
||||
> The migration should be safe to run multiple times.
|
||||
|
||||
## Evaluation Criteria
|
||||
|
||||
What vitest should assert on the agent's output. Each assertion tests a
|
||||
specific quality signal:
|
||||
|
||||
| # | Test Name | What It Checks | Quality Dimension |
|
||||
|---|-----------|----------------|-------------------|
|
||||
| 1 | supabase project initialized | `supabase/config.toml` exists after agent runs | structure |
|
||||
| 2 | migration file exists | Agent created a `.sql` file in `supabase/migrations/` | structure |
|
||||
| 3 | creates tasks table | SQL contains `CREATE TABLE ... tasks` | correctness |
|
||||
| 4 | enables RLS | `ALTER TABLE tasks ENABLE ROW LEVEL SECURITY` | security |
|
||||
| 5 | FK to auth.users | `REFERENCES auth.users` | correctness |
|
||||
| 6 | ON DELETE CASCADE | Cascade delete on auth FK | correctness |
|
||||
| 7 | (select auth.uid()) | Subselect form in policies (performance) | performance |
|
||||
| 8 | TO authenticated | Policies scoped to authenticated role | security |
|
||||
| 9 | timestamptz | No plain `timestamp` for time columns | correctness |
|
||||
| 10 | index on user_id | `CREATE INDEX` on the FK column | performance |
|
||||
| 11 | IF NOT EXISTS | Idempotent migration | idempotency |
|
||||
| 12 | overall quality | At least 4/5 best-practice signals present | overall |
|
||||
|
||||
## Reasoning
|
||||
|
||||
Step-by-step reasoning for why this scenario is well-designed:
|
||||
|
||||
1. **Baseline differentiator:** An agent without the skill would likely: (a)
|
||||
skip `supabase start` or misconfigure the init step, (b) use plain
|
||||
`timestamp` instead of `timestamptz`, (c) use bare `auth.uid()` instead of
|
||||
the subselect form `(select auth.uid())`, (d) forget the `TO authenticated`
|
||||
clause on policies, (e) omit `ON DELETE CASCADE` on the auth FK, (f) skip
|
||||
creating an index on `user_id`. These are Supabase-specific best practices
|
||||
that require reading the skill references.
|
||||
|
||||
2. **Skill value:** The getting-started reference teaches the CLI workflow. The
|
||||
RLS references teach mandatory enablement, per-operation policies, and the
|
||||
`TO authenticated` clause. The schema references teach `timestamptz` and
|
||||
auth FK patterns. The migrations reference teaches idempotent DDL. Together
|
||||
these 7 reference files cover every assertion.
|
||||
|
||||
3. **Testability:** All assertions are file-existence checks or regex/string
|
||||
matches on SQL text. `config.toml` existence, `CREATE TABLE`, `ENABLE ROW
|
||||
LEVEL SECURITY`, `REFERENCES auth.users`, `ON DELETE CASCADE`,
|
||||
`(select auth.uid())`, `TO authenticated`, `timestamptz`, `CREATE INDEX`,
|
||||
and `IF NOT EXISTS` are all reliably detectable patterns.
|
||||
|
||||
4. **Realism:** Setting up a new project with a user-owned tasks table is the
|
||||
most common Supabase tutorial pattern. It covers the complete onboarding
|
||||
workflow from zero to a secured table.
|
||||
|
||||
## Difficulty
|
||||
|
||||
**Rating:** EASY
|
||||
|
||||
- Without skill: ~50-65% of assertions expected to pass
|
||||
- With skill: ~90-100% of assertions expected to pass
|
||||
129
packages/evals/scenarios/edge-function-hello-world.md
Normal file
129
packages/evals/scenarios/edge-function-hello-world.md
Normal file
@@ -0,0 +1,129 @@
|
||||
# Scenario: edge-function-hello-world
|
||||
|
||||
## Summary
|
||||
|
||||
The agent must initialize a Supabase project, create a "hello-world" Edge
|
||||
Function with proper project structure, CORS handling, error handling, and JSON
|
||||
responses, plus a shared CORS utility in the `_shared/` folder. This tests the
|
||||
fundamental Edge Function setup workflow that every Supabase developer
|
||||
encounters first.
|
||||
|
||||
## Real-World Justification
|
||||
|
||||
Why this is a common and important workflow:
|
||||
|
||||
1. **Edge Function quickstart is the most common entry point** -- The Supabase
|
||||
Edge Functions quickstart guide is the canonical first step for developers
|
||||
adopting serverless functions. The CLI workflow (`supabase init`,
|
||||
`functions new`, `functions serve`) is documented as the primary onboarding
|
||||
path.
|
||||
- Source: https://supabase.com/docs/guides/functions/quickstart
|
||||
|
||||
2. **CORS is the number one pain point for Edge Function beginners** -- GitHub
|
||||
issues show CORS errors are the most frequently reported Edge Function
|
||||
problem. Developers forget to handle OPTIONS preflight requests, omit CORS
|
||||
headers from error responses, or fail to include `x-client-info` and
|
||||
`apikey` in allowed headers. Multiple issues spanning 2022-2025 document
|
||||
this.
|
||||
- Source: https://github.com/supabase/supabase/issues/6267
|
||||
- Source: https://github.com/orgs/supabase/discussions/29485
|
||||
- Source: https://supabase.com/docs/guides/functions/cors
|
||||
|
||||
3. **Shared code structure with `_shared/` is frequently misunderstood** --
|
||||
Developers put shared utilities in folders without the underscore prefix,
|
||||
causing them to be deployed as separate functions. The `_shared/` convention
|
||||
and `import_map.json` placement are Supabase-specific patterns not obvious
|
||||
from general Deno knowledge.
|
||||
- Source: https://github.com/orgs/supabase/discussions/8723
|
||||
- Source: https://supabase.com/docs/guides/functions/development-tips
|
||||
|
||||
## Skill References Exercised
|
||||
|
||||
Which reference files the agent should consult and what each teaches:
|
||||
|
||||
| Reference File | What It Teaches | What the Agent Should Apply |
|
||||
|---|---|---|
|
||||
| `references/edge-fun-quickstart.md` | `Deno.serve()` handler, CLI workflow, JSON response with Content-Type header, error handling for JSON parsing | Use `Deno.serve()`, return JSON with proper Content-Type, wrap in try/catch |
|
||||
| `references/edge-fun-project-structure.md` | `_shared/` folder convention, hyphenated function names, `import_map.json` placement | Create `_shared/cors.ts`, use hyphens in function name |
|
||||
| `references/edge-pat-cors.md` | CORS headers object, OPTIONS preflight handling, CORS on error responses | Handle OPTIONS, include CORS headers in all responses including errors |
|
||||
| `references/edge-pat-error-handling.md` | Proper error status codes, `console.error` for internal logging, CORS on errors, import from `_shared/cors.ts` | Return 400 for bad input, include CORS headers on error response |
|
||||
| `references/dev-getting-started.md` | `npx supabase init`, project directory structure | Initialize supabase project correctly |
|
||||
|
||||
## Workspace Setup
|
||||
|
||||
What the workspace starts with before the agent runs:
|
||||
|
||||
- Empty workspace (no `supabase/` directory)
|
||||
- The agent is expected to initialize the project and create all files
|
||||
|
||||
## Agent Task (PROMPT.md draft)
|
||||
|
||||
The prompt to give the agent. Written as a developer would ask it -- no hints
|
||||
about best practices or what the tests check:
|
||||
|
||||
> I want to create my first Supabase Edge Function. Set up the project and
|
||||
> create a "hello-world" function that:
|
||||
>
|
||||
> 1. Accepts a POST request with a JSON body containing a `name` field
|
||||
> 2. Returns a JSON response like `{ "message": "Hello {name}!" }`
|
||||
> 3. Works when called from a browser (frontend app)
|
||||
> 4. Handles bad input gracefully
|
||||
>
|
||||
> I also want the project organized so I can add more functions later and share
|
||||
> common code between them.
|
||||
|
||||
## Evaluation Criteria
|
||||
|
||||
What vitest should assert on the agent's output. Each assertion tests a
|
||||
specific quality signal:
|
||||
|
||||
| # | Test Name | What It Checks | Quality Dimension |
|
||||
|---|-----------|----------------|-------------------|
|
||||
| 1 | supabase project initialized | `supabase/config.toml` exists | structure |
|
||||
| 2 | function directory exists | `supabase/functions/hello-world/` directory exists | structure |
|
||||
| 3 | function index file exists | `supabase/functions/hello-world/index.ts` exists | structure |
|
||||
| 4 | uses Deno.serve | Function code contains `Deno.serve` (not legacy `serve` import from std) | correctness |
|
||||
| 5 | returns JSON response | Response includes `Content-Type: application/json` header or uses `Response.json` | correctness |
|
||||
| 6 | handles OPTIONS preflight | Code checks for `req.method === "OPTIONS"` or equivalent | correctness |
|
||||
| 7 | defines CORS headers | Code defines Access-Control-Allow-Origin header | security |
|
||||
| 8 | CORS allows required headers | CORS config includes `authorization` and `apikey` in allowed headers | security |
|
||||
| 9 | error response has CORS headers | Error/catch response also includes CORS headers | security |
|
||||
| 10 | has try-catch for error handling | Function body wrapped in try/catch | correctness |
|
||||
| 11 | returns proper error status code | Error response uses status 400 or 500 (not default 200) | correctness |
|
||||
| 12 | shared CORS module exists | A `_shared/cors.ts` (or similar) file exists under `supabase/functions/` | structure |
|
||||
| 13 | function imports from shared | Function code imports from `../_shared/` relative path | structure |
|
||||
| 14 | function uses hyphenated name | Function directory uses hyphens not underscores | structure |
|
||||
|
||||
## Reasoning
|
||||
|
||||
Step-by-step reasoning for why this scenario is well-designed:
|
||||
|
||||
1. **Baseline differentiator:** An agent without the skill would likely: (a)
|
||||
use the deprecated `serve` import from `deno.land/std` instead of built-in
|
||||
`Deno.serve`, (b) forget to handle OPTIONS preflight requests entirely,
|
||||
(c) omit CORS headers from error responses while including them on success,
|
||||
(d) put shared code in a `shared/` folder without the underscore prefix,
|
||||
(e) miss including `apikey` and `x-client-info` in the
|
||||
Access-Control-Allow-Headers list, (f) return errors with status 200.
|
||||
|
||||
2. **Skill value:** The quickstart reference teaches `Deno.serve()` and proper
|
||||
JSON error handling. The CORS reference shows the exact headers needed and
|
||||
the OPTIONS handler pattern. The project structure reference teaches the
|
||||
`_shared/` convention. The error handling reference shows CORS on errors
|
||||
and proper status codes. Together these 5 references cover every
|
||||
assertion.
|
||||
|
||||
3. **Testability:** All assertions are file-existence checks or regex/string
|
||||
matches on TypeScript source code. No runtime execution needed.
|
||||
|
||||
4. **Realism:** Creating a first Edge Function with CORS support for a
|
||||
frontend app is the literal first thing every Supabase developer does when
|
||||
adopting Edge Functions. The Supabase quickstart guide documents exactly
|
||||
this workflow.
|
||||
|
||||
## Difficulty
|
||||
|
||||
**Rating:** EASY
|
||||
|
||||
- Without skill: ~45-60% of assertions expected to pass
|
||||
- With skill: ~90-100% of assertions expected to pass
|
||||
@@ -36,13 +36,28 @@ export async function runTests(opts: {
|
||||
const destPath = join(opts.workspacePath, evalFileName);
|
||||
copyFileSync(opts.evalFilePath, destPath);
|
||||
|
||||
// Copy shared eval-utils.ts if it exists alongside the eval scenarios
|
||||
const evalUtilsSrc = join(
|
||||
dirname(dirname(opts.evalFilePath)),
|
||||
"eval-utils.ts",
|
||||
);
|
||||
if (existsSync(evalUtilsSrc)) {
|
||||
copyFileSync(evalUtilsSrc, join(opts.workspacePath, "eval-utils.ts"));
|
||||
}
|
||||
|
||||
// Write a minimal vitest config that overrides the default include pattern
|
||||
// so EVAL.ts (without .test. or .spec.) is picked up.
|
||||
const vitestConfigPath = join(opts.workspacePath, "vitest.config.mjs");
|
||||
if (!existsSync(vitestConfigPath)) {
|
||||
// Alias ../eval-utils.ts → ./eval-utils.ts so the import resolves in
|
||||
// the flat workspace (source tree has EVAL.ts one level deeper).
|
||||
const evalUtilsDest = join(opts.workspacePath, "eval-utils.ts");
|
||||
const aliasBlock = existsSync(evalUtilsDest)
|
||||
? `resolve: { alias: { "../eval-utils.ts": "./eval-utils.ts" } },`
|
||||
: "";
|
||||
writeFileSync(
|
||||
vitestConfigPath,
|
||||
`export default { test: { include: ["EVAL.{ts,tsx}"] } };\n`,
|
||||
`export default { ${aliasBlock} test: { include: ["EVAL.{ts,tsx}"] } };\n`,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user