simple edge function creation example

This commit is contained in:
Pedro Rodrigues
2026-02-20 16:50:59 +00:00
parent 386b9fbb05
commit ce7eb8b28b
14 changed files with 603 additions and 225 deletions

View File

@@ -1,29 +1,19 @@
import { existsSync, readdirSync, readFileSync } from "node:fs"; import { existsSync } from "node:fs";
import { join } from "node:path"; import { join } from "node:path";
import { expect, test } from "vitest"; import { expect, test } from "vitest";
const supabaseDir = join(process.cwd(), "supabase"); import {
const migrationsDir = join(supabaseDir, "migrations"); findMigrationFiles,
getMigrationSQL,
/** Find the first .sql migration file (agent may name it differently). */ supabaseDir,
function findMigrationFile(): string | null { } from "../eval-utils.ts";
if (!existsSync(migrationsDir)) return null;
const files = readdirSync(migrationsDir).filter((f) => f.endsWith(".sql"));
return files.length > 0 ? join(migrationsDir, files[0]) : null;
}
function getMigrationSQL(): string {
const file = findMigrationFile();
if (!file) throw new Error("No migration file found in supabase/migrations/");
return readFileSync(file, "utf-8");
}
test("supabase project initialized (config.toml exists)", () => { test("supabase project initialized (config.toml exists)", () => {
expect(existsSync(join(supabaseDir, "config.toml"))).toBe(true); expect(existsSync(join(supabaseDir, "config.toml"))).toBe(true);
}); });
test("migration file exists in supabase/migrations/", () => { test("migration file exists in supabase/migrations/", () => {
expect(findMigrationFile()).not.toBeNull(); expect(findMigrationFiles().length).toBeGreaterThan(0);
}); });
test("creates tasks table", () => { test("creates tasks table", () => {

View File

@@ -0,0 +1,148 @@
import { existsSync, readdirSync } from "node:fs";
import { join } from "node:path";
import { expect, test } from "vitest";
import {
findFunctionFile,
findSharedCorsFile,
functionsDir,
getFunctionCode,
getSharedCode,
supabaseDir,
} from "../eval-utils.ts";
const FUNCTION_NAME = "hello-world";
const helloWorldDir = join(functionsDir, FUNCTION_NAME);
/** Read function code + all shared modules combined. */
function getAllCode(): string {
const code = getFunctionCode(FUNCTION_NAME);
return `${code}\n${getSharedCode()}`;
}
/** Extract the code after the first `catch` keyword to the end of the function. */
function getCatchBlockCode(): string {
const code = getFunctionCode(FUNCTION_NAME);
const catchIndex = code.search(/\bcatch\b/);
if (catchIndex === -1) return "";
return code.slice(catchIndex);
}
test("supabase project initialized", () => {
expect(existsSync(join(supabaseDir, "config.toml"))).toBe(true);
});
test("function directory exists", () => {
expect(existsSync(helloWorldDir)).toBe(true);
});
test("function index file exists", () => {
expect(findFunctionFile(FUNCTION_NAME)).not.toBeNull();
});
test("uses Deno.serve", () => {
const code = getFunctionCode(FUNCTION_NAME);
expect(code).toMatch(/Deno\.serve/);
});
test("returns JSON response", () => {
// Check both the function file and shared modules for JSON response patterns
const allCode = getAllCode();
const hasContentTypeHeader =
/content-type['"]\s*:\s*['"]application\/json/i.test(allCode);
const hasResponseJson = /Response\.json/i.test(allCode);
const hasJsonStringify = /JSON\.stringify/i.test(allCode);
expect(hasContentTypeHeader || hasResponseJson || hasJsonStringify).toBe(
true,
);
});
test("handles OPTIONS preflight", () => {
// OPTIONS handling may be in the function itself or in a shared CORS helper
const allCode = getAllCode();
expect(allCode).toMatch(/['"]OPTIONS['"]/);
expect(allCode).toMatch(/\.method/);
});
test("defines CORS headers", () => {
const allCode = getAllCode();
expect(allCode).toMatch(/Access-Control-Allow-Origin/);
});
test("CORS allows required headers", () => {
const allCode = getAllCode().toLowerCase();
// Must include authorization and apikey in allowed headers
expect(allCode).toMatch(/access-control-allow-headers/);
expect(allCode).toMatch(/authorization/);
expect(allCode).toMatch(/apikey/);
});
test("error response has CORS headers", () => {
const catchCode = getCatchBlockCode();
expect(catchCode.length).toBeGreaterThan(0);
// The catch block should either directly reference CORS headers, or call
// a shared helper that includes them (e.g. errorResponse, corsHeaders).
const sharedCode = getSharedCode();
// Direct CORS reference in catch block
const directCors =
/corsHeaders|cors_headers|Access-Control-Allow-Origin/i.test(catchCode);
// Calls a shared helper that itself includes CORS headers
const callsSharedHelper =
/errorResponse|jsonResponse|json_response|error_response/i.test(
catchCode,
) && /Access-Control-Allow-Origin/i.test(sharedCode);
expect(directCors || callsSharedHelper).toBe(true);
});
test("has try-catch for error handling", () => {
const code = getFunctionCode(FUNCTION_NAME);
expect(code).toMatch(/\btry\s*\{/);
expect(code).toMatch(/\bcatch\b/);
});
test("returns proper error status code", () => {
const catchCode = getCatchBlockCode();
expect(catchCode.length).toBeGreaterThan(0);
// Error response should use status 400 or 500 (not default 200).
// Match object-style { status: 500 } or function-call-style fn('msg', 500)
const hasObjectStatus = /status:\s*(400|500|4\d{2}|5\d{2})/.test(catchCode);
const hasFnArgStatus = /[,(]\s*(400|500|4\d{2}|5\d{2})\s*[),]/.test(
catchCode,
);
expect(hasObjectStatus || hasFnArgStatus).toBe(true);
});
test("shared CORS module exists", () => {
expect(findSharedCorsFile()).not.toBeNull();
});
test("function imports from shared", () => {
const code = getFunctionCode(FUNCTION_NAME);
// Should import from ../_shared/ relative path
expect(code).toMatch(/from\s+['"]\.\.\/(_shared|_utils)/);
});
test("function uses hyphenated name", () => {
// The function directory should use hyphens, not underscores
const dirs = existsSync(functionsDir) ? readdirSync(functionsDir) : [];
const helloDir = dirs.find((d) => d.includes("hello") && d.includes("world"));
expect(helloDir).toBeDefined();
expect(helloDir).toMatch(/^hello-world$/);
});
test("overall quality: demonstrates Edge Function best practices", () => {
const allCode = getAllCode().toLowerCase();
// A high-quality Edge Function should contain most of these patterns
const signals = [
/deno\.serve/, // Modern Deno.serve API
/['"]options['"]/, // OPTIONS preflight handling
/access-control-allow-origin/, // CORS headers defined
/\btry\s*\{/, // Error handling with try-catch
/status:\s*(400|500|4\d{2}|5\d{2})|[,(]\s*(400|500|4\d{2}|5\d{2})\s*[),]/, // Proper error status codes
/from\s+['"]\.\.\/(_shared|_utils)/, // Imports from shared directory
/authorization/, // Allows authorization header in CORS
/apikey/, // Allows apikey header in CORS
];
const matches = signals.filter((r) => r.test(allCode));
expect(matches.length).toBeGreaterThanOrEqual(6);
});

View File

@@ -0,0 +1,8 @@
I want to create my first Supabase Edge Function. Set up the project and create a "hello-world" function that:
1. Accepts a POST request with a JSON body containing a `name` field
2. Returns a JSON response like `{ "message": "Hello {name}!" }`
3. Works when called from a browser (frontend app)
4. Handles bad input gracefully
I also want the project organized so I can add more functions later and share common code between them.

View File

@@ -0,0 +1,5 @@
{
"name": "edge-function-hello-world",
"private": true,
"type": "module"
}

View File

@@ -0,0 +1,64 @@
# For detailed configuration reference documentation, visit:
# https://supabase.com/docs/guides/local-development/cli/config
# A string used to distinguish different Supabase projects on the same host. Defaults to the
# working directory name when running `supabase init`.
project_id = "edge-function-hello-world"
[api]
enabled = true
# Port to use for the API URL.
port = 54321
# Schemas to expose in your API. Tables, views and stored procedures in this schema will get API
# endpoints. `public` and `graphql_public` schemas are included by default.
schemas = ["public", "graphql_public"]
# Extra schemas to add to the search_path of every request.
extra_search_path = ["public", "extensions"]
# The maximum number of rows returns from a view, table, or stored procedure. Limits payload size
# for accidental or malicious requests.
max_rows = 1000
[db]
# Port to use for the local database URL.
port = 54322
# Port used by db diff command to initialize the shadow database.
shadow_port = 54320
# The database major version to use. This has to be the same as your remote database's. Run `SHOW
# server_version;` on the remote database to check.
major_version = 17
[db.pooler]
enabled = false
# Port to use for the local connection pooler.
port = 54329
# Specifies when a server connection can be reused by other clients.
# Configure one of the supported pooler modes: `transaction`, `session`.
pool_mode = "transaction"
# How many server connections to allow per user/database pair.
default_pool_size = 20
# Maximum number of client connections allowed.
max_client_conn = 100
[storage]
enabled = true
# The maximum file size allowed (e.g. "5MB", "500KB").
file_size_limit = "50MiB"
[auth]
enabled = true
# The base URL of your website. Used as an allow-list for redirects and for constructing URLs used
# in emails.
site_url = "http://127.0.0.1:3000"
# A list of *exact* URLs that auth providers are permitted to redirect to post authentication.
additional_redirect_urls = ["https://127.0.0.1:3000"]
# How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week).
jwt_expiry = 3600
# Allow/disallow new user signups to your project.
enable_signup = true
# Allow/disallow anonymous sign-ins to your project.
enable_anonymous_sign_ins = false
[auth.email]
# Allow/disallow new user signups via email to your project.
enable_signup = true
# If enabled, users need to confirm their email address before signing in.
enable_confirmations = false

View File

@@ -0,0 +1,93 @@
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
import { join } from "node:path";
// ---------------------------------------------------------------------------
// Common paths
// ---------------------------------------------------------------------------
export const supabaseDir = join(process.cwd(), "supabase");
export const migrationsDir = join(supabaseDir, "migrations");
export const functionsDir = join(supabaseDir, "functions");
// ---------------------------------------------------------------------------
// Migration helpers
// ---------------------------------------------------------------------------
/** Find all .sql migration files (agent may create one or more). */
export function findMigrationFiles(): string[] {
if (!existsSync(migrationsDir)) return [];
return readdirSync(migrationsDir)
.filter((f) => f.endsWith(".sql"))
.map((f) => join(migrationsDir, f));
}
/** Read and concatenate all migration SQL files. */
export function getMigrationSQL(): string {
const files = findMigrationFiles();
if (files.length === 0)
throw new Error("No migration file found in supabase/migrations/");
return files.map((f) => readFileSync(f, "utf-8")).join("\n");
}
// ---------------------------------------------------------------------------
// Edge Function helpers
// ---------------------------------------------------------------------------
/**
* Find the index.ts/tsx entry file for a named Edge Function.
*
* @param functionName - directory name under supabase/functions/ (e.g. "hello-world")
*/
export function findFunctionFile(functionName: string): string | null {
const fnDir = join(functionsDir, functionName);
if (!existsSync(fnDir)) return null;
const files = readdirSync(fnDir).filter(
(f) => f.startsWith("index.") && (f.endsWith(".ts") || f.endsWith(".tsx")),
);
return files.length > 0 ? join(fnDir, files[0]) : null;
}
/**
* Read the source code of a named Edge Function.
*
* @param functionName - directory name under supabase/functions/ (e.g. "stripe-webhook")
*/
export function getFunctionCode(functionName: string): string {
const file = findFunctionFile(functionName);
if (!file)
throw new Error(`No index.ts found in supabase/functions/${functionName}/`);
return readFileSync(file, "utf-8");
}
/** Find a shared CORS module under supabase/functions/_shared/ (or similar _-prefixed dir). */
export function findSharedCorsFile(): string | null {
if (!existsSync(functionsDir)) return null;
const sharedDirs = readdirSync(functionsDir).filter(
(d) => d.startsWith("_") && statSync(join(functionsDir, d)).isDirectory(),
);
for (const dir of sharedDirs) {
const dirPath = join(functionsDir, dir);
const files = readdirSync(dirPath).filter((f) => f.includes("cors"));
if (files.length > 0) return join(dirPath, files[0]);
}
return null;
}
/** Read and concatenate all .ts/.tsx files from _-prefixed shared directories. */
export function getSharedCode(): string {
if (!existsSync(functionsDir)) return "";
const sharedDirs = readdirSync(functionsDir).filter(
(d) => d.startsWith("_") && statSync(join(functionsDir, d)).isDirectory(),
);
const parts: string[] = [];
for (const dir of sharedDirs) {
const dirPath = join(functionsDir, dir);
const files = readdirSync(dirPath).filter(
(f) => f.endsWith(".ts") || f.endsWith(".tsx"),
);
for (const f of files) {
parts.push(readFileSync(join(dirPath, f), "utf-8"));
}
}
return parts.join("\n");
}

View File

@@ -1,25 +1,6 @@
import { existsSync, readdirSync, readFileSync } from "node:fs";
import { join } from "node:path";
import { expect, test } from "vitest"; import { expect, test } from "vitest";
const supabaseDir = join(process.cwd(), "supabase"); import { findMigrationFiles, getMigrationSQL } from "../eval-utils.ts";
const migrationsDir = join(supabaseDir, "migrations");
/** Find all .sql migration files (agent may create one or more). */
function findMigrationFiles(): string[] {
if (!existsSync(migrationsDir)) return [];
return readdirSync(migrationsDir)
.filter((f) => f.endsWith(".sql"))
.map((f) => join(migrationsDir, f));
}
/** Read and concatenate all migration SQL files. */
function getMigrationSQL(): string {
const files = findMigrationFiles();
if (files.length === 0)
throw new Error("No migration file found in supabase/migrations/");
return files.map((f) => readFileSync(f, "utf-8")).join("\n");
}
test("migration file exists", () => { test("migration file exists", () => {
expect(findMigrationFiles().length).toBeGreaterThan(0); expect(findMigrationFiles().length).toBeGreaterThan(0);

View File

@@ -1,25 +1,6 @@
import { existsSync, readdirSync, readFileSync } from "node:fs";
import { join } from "node:path";
import { expect, test } from "vitest"; import { expect, test } from "vitest";
const supabaseDir = join(process.cwd(), "supabase"); import { findMigrationFiles, getMigrationSQL } from "../eval-utils.ts";
const migrationsDir = join(supabaseDir, "migrations");
/** Find all .sql migration files (agent may create one or multiple). */
function findMigrationFiles(): string[] {
if (!existsSync(migrationsDir)) return [];
return readdirSync(migrationsDir)
.filter((f) => f.endsWith(".sql"))
.map((f) => join(migrationsDir, f));
}
/** Concatenate all migration SQL into a single string for assertions. */
function getMigrationSQL(): string {
const files = findMigrationFiles();
if (files.length === 0)
throw new Error("No migration file found in supabase/migrations/");
return files.map((f) => readFileSync(f, "utf-8")).join("\n");
}
test("migration file exists", () => { test("migration file exists", () => {
expect(findMigrationFiles().length).toBeGreaterThan(0); expect(findMigrationFiles().length).toBeGreaterThan(0);

View File

@@ -1,169 +1,9 @@
# Supabase Skills Eval Scenarios # Supabase Skills Eval Scenarios
## Scenario 1: auth-rls-new-project | # | Scenario | Description |
|---|----------|-------------|
**Description:** Set up a new Supabase project from scratch and add | 1 | [auth-rls-new-project](auth-rls-new-project.md) | Initialize a Supabase project and create a tasks table with RLS |
authentication with RLS. The agent must initialize the project with the CLI, | 2 | [team-rls-security-definer](team-rls-security-definer.md) | Team-based RLS with security definer helper in a private schema |
start the local Supabase stack, then create a tasks table with proper security | 3 | [storage-rls-user-folders](storage-rls-user-folders.md) | Storage buckets with RLS policies for user-isolated folders |
(RLS policies, auth FK, indexes) in a single idempotent migration. | 4 | [edge-function-hello-world](edge-function-hello-world.md) | Hello-world Edge Function with CORS and shared utilities |
| 5 | edge-function-stripe-webhook | Stripe webhook Edge Function with signature verification and orders migration |
**Setup:** The workspace starts empty (no `supabase/` directory). The agent is
expected to run `npx supabase init` and `npx supabase start` before creating
the migration.
**Expected skill files read:**
- `SKILL.md` (skill body with reference file index)
- `references/dev-getting-started.md`
- `references/db-rls-mandatory.md`
- `references/db-rls-policy-types.md`
- `references/db-rls-common-mistakes.md`
- `references/db-schema-auth-fk.md`
- `references/db-schema-timestamps.md`
- `references/db-migrations-idempotent.md`
**Expected result:**
The agent initializes a Supabase project and creates a migration file that:
- Creates tasks table with `timestamptz` columns
- Has `user_id` FK to `auth.users(id)` with `ON DELETE CASCADE`
- Enables RLS (`ALTER TABLE tasks ENABLE ROW LEVEL SECURITY`)
- Creates per-operation policies using `(select auth.uid())` with `TO authenticated`
- Creates index on `user_id`
- Uses `IF NOT EXISTS` for idempotency
**Scorer:** Binary pass/fail (12 vitest assertions)
| Test | What it checks |
| --- | --- |
| supabase project initialized | `supabase/config.toml` exists after agent runs |
| migration file exists | Agent created a `.sql` file in `supabase/migrations/` |
| creates tasks table | SQL contains `CREATE TABLE ... tasks` |
| enables RLS | `ALTER TABLE tasks ENABLE ROW LEVEL SECURITY` |
| FK to auth.users | `REFERENCES auth.users` |
| ON DELETE CASCADE | Cascade delete on auth FK |
| (select auth.uid()) | Subselect form in policies (performance) |
| TO authenticated | Policies scoped to authenticated role |
| timestamptz | No plain `timestamp` for time columns |
| index on user_id | `CREATE INDEX` on the FK column |
| IF NOT EXISTS | Idempotent migration |
| overall quality | At least 4/5 best-practice signals present |
## Scenario 2: team-rls-security-definer
**Description:** Create a SQL migration for a team-based project management app
where users belong to organizations via a membership table. The migration must
define tables for organizations, memberships, and projects, then secure them
with RLS policies that use a `security definer` helper function in a private
schema to efficiently resolve team membership without per-row joins.
**Setup:** The workspace starts with a pre-initialized Supabase project
(`supabase/config.toml` exists, empty `supabase/migrations/` directory). The
agent creates migration files within this structure.
**Expected skill files read:**
- `SKILL.md` (skill body with reference file index)
- `references/db-rls-mandatory.md`
- `references/db-rls-policy-types.md`
- `references/db-rls-common-mistakes.md`
- `references/db-rls-performance.md`
- `references/db-security-functions.md`
- `references/db-schema-auth-fk.md`
- `references/db-schema-timestamps.md`
- `references/db-perf-indexes.md`
- `references/db-migrations-idempotent.md`
**Expected result:**
The agent creates a migration file that:
- Creates organizations, memberships, and projects tables with `timestamptz` columns
- Has `user_id` FK to `auth.users(id)` with `ON DELETE CASCADE` on memberships
- Has `org_id` FK on projects referencing organizations
- Enables RLS on all three tables
- Creates a private schema with a `security definer` helper function (`SET search_path = ''`)
- Creates RLS policies using `(select auth.uid())` with `TO authenticated`
- Creates indexes on membership lookup columns (user_id, org_id)
- Has a delete policy on projects restricted to owner role
- Uses `IF NOT EXISTS` for idempotency
**Scorer:** Binary pass/fail (16 vitest assertions)
| Test | What it checks |
| --- | --- |
| migration file exists | A `.sql` file exists in `supabase/migrations/` |
| creates organizations table | SQL contains `CREATE TABLE` for organizations |
| creates memberships table | SQL contains `CREATE TABLE` for memberships |
| creates projects table | SQL contains `CREATE TABLE` for projects |
| enables RLS on all tables | `ALTER TABLE ... ENABLE ROW LEVEL SECURITY` for all three tables |
| FK to auth.users with ON DELETE CASCADE | memberships references `auth.users` with cascade |
| org_id FK on projects | projects references organizations |
| private schema created | `CREATE SCHEMA ... private` present |
| security_definer helper function | Function in private schema with `SECURITY DEFINER` and `SET search_path = ''` |
| policies use (select auth.uid()) | Subselect form in all policies referencing auth.uid() |
| policies use TO authenticated | All policies scoped to authenticated role |
| index on membership lookup columns | `CREATE INDEX` on user_id and/or org_id in memberships |
| uses timestamptz | No plain `timestamp` for time columns |
| idempotent DDL | Uses `IF NOT EXISTS` or `DROP ... IF EXISTS` patterns |
| delete policy restricted to owner role | A delete policy on projects checks for owner/admin role |
| overall quality score | At least 10/14 best-practice signals present |
## Scenario 3: storage-rls-user-folders
**Description:** Create a SQL migration that sets up Supabase Storage buckets
with RLS policies for user-content. An avatars bucket (public reads,
authenticated uploads restricted to user folders) and a documents bucket (fully
private, user-isolated), with file type restrictions, storage helper functions
in policies, and a file_metadata tracking table secured with RLS.
**Setup:** Pre-initialized Supabase project (`supabase/config.toml` exists)
with an empty `supabase/migrations/` directory. The agent creates migration
files within this structure.
**Expected skill files read:**
- `SKILL.md` (skill body with reference file index)
- `references/storage-access-control.md`
- `references/db-rls-mandatory.md`
- `references/db-rls-common-mistakes.md`
- `references/db-rls-performance.md`
- `references/db-schema-auth-fk.md`
- `references/db-schema-timestamps.md`
- `references/db-perf-indexes.md`
- `references/db-migrations-idempotent.md`
**Expected result:**
The agent creates a migration file that:
- Inserts avatars bucket into `storage.buckets` with `public = true`, MIME type restrictions, and file size limit
- Inserts documents bucket with `public = false`
- Creates RLS policies on `storage.objects` using `storage.foldername(name)` with `auth.uid()::text`
- Scopes upload policies `TO authenticated` and avatars SELECT policy `TO public`
- Creates `file_metadata` table with FK to `auth.users` with `ON DELETE CASCADE`
- Enables RLS on `file_metadata` with policies using `(select auth.uid())`
- Uses `timestamptz` for time columns, indexes `user_id`, and `IF NOT EXISTS` for idempotency
**Scorer:** Binary pass/fail (17 vitest assertions)
| Test | What it checks |
| --- | --- |
| migration file exists | A `.sql` file exists in `supabase/migrations/` |
| creates avatars bucket | SQL inserts into `storage.buckets` with id 'avatars' and `public = true` |
| creates documents bucket | SQL inserts into `storage.buckets` with id 'documents' and `public = false` |
| avatars bucket has mime type restriction | `allowed_mime_types` includes image types (jpeg, png, webp) |
| avatars bucket has file size limit | `file_size_limit` set (around 2MB / 2097152 bytes) |
| storage policy uses foldername or path for user isolation | Policy references `storage.foldername(name)` with `auth.uid()::text` |
| storage policy uses TO authenticated | Storage upload/delete policies scoped to `TO authenticated` |
| public read policy for avatars | A SELECT policy on storage.objects for avatars allows public/anon access |
| documents bucket is fully private | Policies for documents restrict all operations to authenticated owner |
| creates file_metadata table | SQL contains `CREATE TABLE` for file_metadata |
| file_metadata has FK to auth.users with CASCADE | `REFERENCES auth.users` with `ON DELETE CASCADE` |
| RLS enabled on file_metadata | `ALTER TABLE file_metadata ENABLE ROW LEVEL SECURITY` |
| file_metadata policies use (select auth.uid()) | Subselect form in policies |
| uses timestamptz for time columns | No plain `timestamp` in file_metadata |
| index on file_metadata user_id | `CREATE INDEX` on user_id column |
| idempotent DDL | Uses `IF NOT EXISTS` patterns |
| overall quality score | At least 11/15 best-practice signals present |

View File

@@ -0,0 +1,124 @@
# Scenario: auth-rls-new-project
## Summary
The agent must set up a new Supabase project from scratch and add
authentication with RLS. It must initialize the project with the CLI, start
the local Supabase stack, then create a tasks table with proper security (RLS
policies, auth FK, indexes) in a single idempotent migration.
## Real-World Justification
Why this is a common and important workflow:
1. **Project initialization + first migration is the canonical onboarding
workflow** -- The Supabase getting started guide walks developers through
`supabase init`, `supabase start`, and creating their first migration. This
is the first thing every new Supabase developer does.
- Source: https://supabase.com/docs/guides/local-development/cli/getting-started
2. **RLS is the most common security question for new Supabase users** --
Developers frequently forget to enable RLS, use incorrect policy syntax, or
omit the `TO authenticated` clause. The Supabase docs and community
discussions repeatedly emphasize that RLS must be enabled on every public
table.
- Source: https://supabase.com/docs/guides/database/postgres/row-level-security
- Source: https://github.com/orgs/supabase/discussions/811
3. **Auth FK and cascade deletes are a frequent source of bugs** -- Developers
often reference `auth.users` incorrectly or forget `ON DELETE CASCADE`,
leading to orphaned rows when users are deleted from auth.
- Source: https://supabase.com/docs/guides/auth/managing-user-data
## Skill References Exercised
Which reference files the agent should consult and what each teaches:
| Reference File | What It Teaches | What the Agent Should Apply |
|---|---|---|
| `references/dev-getting-started.md` | `npx supabase init`, `npx supabase start`, project structure | Initialize the project and start the local stack |
| `references/db-rls-mandatory.md` | RLS must be enabled on all public tables | Enable RLS on the tasks table |
| `references/db-rls-policy-types.md` | PERMISSIVE vs RESTRICTIVE, per-operation policies | Create separate SELECT, INSERT, UPDATE, DELETE policies |
| `references/db-rls-common-mistakes.md` | Missing TO clause, user_metadata pitfalls | Always use `TO authenticated` on all policies |
| `references/db-schema-auth-fk.md` | FK to auth.users with ON DELETE CASCADE | Reference auth.users with cascade on user_id |
| `references/db-schema-timestamps.md` | Use timestamptz not timestamp | All time columns use timestamptz |
| `references/db-migrations-idempotent.md` | IF NOT EXISTS for safe reruns | Idempotent DDL throughout the migration |
## Workspace Setup
What the workspace starts with before the agent runs:
- Empty workspace (no `supabase/` directory)
- The agent is expected to run `npx supabase init` and `npx supabase start`
before creating the migration
## Agent Task (PROMPT.md draft)
The prompt to give the agent. Written as a developer would ask it:
> I'm starting a new Supabase project. Initialize the project, start the local
> dev stack, and create a migration for a `tasks` table.
>
> The tasks table should have:
> - A title (text)
> - A status column (e.g., pending, in_progress, done)
> - Timestamps for created and updated
> - A reference to the authenticated user who owns the task
>
> Set up Row Level Security so users can only see and manage their own tasks.
> The migration should be safe to run multiple times.
## Evaluation Criteria
What vitest should assert on the agent's output. Each assertion tests a
specific quality signal:
| # | Test Name | What It Checks | Quality Dimension |
|---|-----------|----------------|-------------------|
| 1 | supabase project initialized | `supabase/config.toml` exists after agent runs | structure |
| 2 | migration file exists | Agent created a `.sql` file in `supabase/migrations/` | structure |
| 3 | creates tasks table | SQL contains `CREATE TABLE ... tasks` | correctness |
| 4 | enables RLS | `ALTER TABLE tasks ENABLE ROW LEVEL SECURITY` | security |
| 5 | FK to auth.users | `REFERENCES auth.users` | correctness |
| 6 | ON DELETE CASCADE | Cascade delete on auth FK | correctness |
| 7 | (select auth.uid()) | Subselect form in policies (performance) | performance |
| 8 | TO authenticated | Policies scoped to authenticated role | security |
| 9 | timestamptz | No plain `timestamp` for time columns | correctness |
| 10 | index on user_id | `CREATE INDEX` on the FK column | performance |
| 11 | IF NOT EXISTS | Idempotent migration | idempotency |
| 12 | overall quality | At least 4/5 best-practice signals present | overall |
## Reasoning
Step-by-step reasoning for why this scenario is well-designed:
1. **Baseline differentiator:** An agent without the skill would likely: (a)
skip `supabase start` or misconfigure the init step, (b) use plain
`timestamp` instead of `timestamptz`, (c) use bare `auth.uid()` instead of
the subselect form `(select auth.uid())`, (d) forget the `TO authenticated`
clause on policies, (e) omit `ON DELETE CASCADE` on the auth FK, (f) skip
creating an index on `user_id`. These are Supabase-specific best practices
that require reading the skill references.
2. **Skill value:** The getting-started reference teaches the CLI workflow. The
RLS references teach mandatory enablement, per-operation policies, and the
`TO authenticated` clause. The schema references teach `timestamptz` and
auth FK patterns. The migrations reference teaches idempotent DDL. Together
these 7 reference files cover every assertion.
3. **Testability:** All assertions are file-existence checks or regex/string
matches on SQL text. `config.toml` existence, `CREATE TABLE`, `ENABLE ROW
LEVEL SECURITY`, `REFERENCES auth.users`, `ON DELETE CASCADE`,
`(select auth.uid())`, `TO authenticated`, `timestamptz`, `CREATE INDEX`,
and `IF NOT EXISTS` are all reliably detectable patterns.
4. **Realism:** Setting up a new project with a user-owned tasks table is the
most common Supabase tutorial pattern. It covers the complete onboarding
workflow from zero to a secured table.
## Difficulty
**Rating:** EASY
- Without skill: ~50-65% of assertions expected to pass
- With skill: ~90-100% of assertions expected to pass

View File

@@ -0,0 +1,129 @@
# Scenario: edge-function-hello-world
## Summary
The agent must initialize a Supabase project, create a "hello-world" Edge
Function with proper project structure, CORS handling, error handling, and JSON
responses, plus a shared CORS utility in the `_shared/` folder. This tests the
fundamental Edge Function setup workflow that every Supabase developer
encounters first.
## Real-World Justification
Why this is a common and important workflow:
1. **Edge Function quickstart is the most common entry point** -- The Supabase
Edge Functions quickstart guide is the canonical first step for developers
adopting serverless functions. The CLI workflow (`supabase init`,
`functions new`, `functions serve`) is documented as the primary onboarding
path.
- Source: https://supabase.com/docs/guides/functions/quickstart
2. **CORS is the number one pain point for Edge Function beginners** -- GitHub
issues show CORS errors are the most frequently reported Edge Function
problem. Developers forget to handle OPTIONS preflight requests, omit CORS
headers from error responses, or fail to include `x-client-info` and
`apikey` in allowed headers. Multiple issues spanning 2022-2025 document
this.
- Source: https://github.com/supabase/supabase/issues/6267
- Source: https://github.com/orgs/supabase/discussions/29485
- Source: https://supabase.com/docs/guides/functions/cors
3. **Shared code structure with `_shared/` is frequently misunderstood** --
Developers put shared utilities in folders without the underscore prefix,
causing them to be deployed as separate functions. The `_shared/` convention
and `import_map.json` placement are Supabase-specific patterns not obvious
from general Deno knowledge.
- Source: https://github.com/orgs/supabase/discussions/8723
- Source: https://supabase.com/docs/guides/functions/development-tips
## Skill References Exercised
Which reference files the agent should consult and what each teaches:
| Reference File | What It Teaches | What the Agent Should Apply |
|---|---|---|
| `references/edge-fun-quickstart.md` | `Deno.serve()` handler, CLI workflow, JSON response with Content-Type header, error handling for JSON parsing | Use `Deno.serve()`, return JSON with proper Content-Type, wrap in try/catch |
| `references/edge-fun-project-structure.md` | `_shared/` folder convention, hyphenated function names, `import_map.json` placement | Create `_shared/cors.ts`, use hyphens in function name |
| `references/edge-pat-cors.md` | CORS headers object, OPTIONS preflight handling, CORS on error responses | Handle OPTIONS, include CORS headers in all responses including errors |
| `references/edge-pat-error-handling.md` | Proper error status codes, `console.error` for internal logging, CORS on errors, import from `_shared/cors.ts` | Return 400 for bad input, include CORS headers on error response |
| `references/dev-getting-started.md` | `npx supabase init`, project directory structure | Initialize supabase project correctly |
## Workspace Setup
What the workspace starts with before the agent runs:
- Empty workspace (no `supabase/` directory)
- The agent is expected to initialize the project and create all files
## Agent Task (PROMPT.md draft)
The prompt to give the agent. Written as a developer would ask it -- no hints
about best practices or what the tests check:
> I want to create my first Supabase Edge Function. Set up the project and
> create a "hello-world" function that:
>
> 1. Accepts a POST request with a JSON body containing a `name` field
> 2. Returns a JSON response like `{ "message": "Hello {name}!" }`
> 3. Works when called from a browser (frontend app)
> 4. Handles bad input gracefully
>
> I also want the project organized so I can add more functions later and share
> common code between them.
## Evaluation Criteria
What vitest should assert on the agent's output. Each assertion tests a
specific quality signal:
| # | Test Name | What It Checks | Quality Dimension |
|---|-----------|----------------|-------------------|
| 1 | supabase project initialized | `supabase/config.toml` exists | structure |
| 2 | function directory exists | `supabase/functions/hello-world/` directory exists | structure |
| 3 | function index file exists | `supabase/functions/hello-world/index.ts` exists | structure |
| 4 | uses Deno.serve | Function code contains `Deno.serve` (not legacy `serve` import from std) | correctness |
| 5 | returns JSON response | Response includes `Content-Type: application/json` header or uses `Response.json` | correctness |
| 6 | handles OPTIONS preflight | Code checks for `req.method === "OPTIONS"` or equivalent | correctness |
| 7 | defines CORS headers | Code defines Access-Control-Allow-Origin header | security |
| 8 | CORS allows required headers | CORS config includes `authorization` and `apikey` in allowed headers | security |
| 9 | error response has CORS headers | Error/catch response also includes CORS headers | security |
| 10 | has try-catch for error handling | Function body wrapped in try/catch | correctness |
| 11 | returns proper error status code | Error response uses status 400 or 500 (not default 200) | correctness |
| 12 | shared CORS module exists | A `_shared/cors.ts` (or similar) file exists under `supabase/functions/` | structure |
| 13 | function imports from shared | Function code imports from `../_shared/` relative path | structure |
| 14 | function uses hyphenated name | Function directory uses hyphens not underscores | structure |
## Reasoning
Step-by-step reasoning for why this scenario is well-designed:
1. **Baseline differentiator:** An agent without the skill would likely: (a)
use the deprecated `serve` import from `deno.land/std` instead of built-in
`Deno.serve`, (b) forget to handle OPTIONS preflight requests entirely,
(c) omit CORS headers from error responses while including them on success,
(d) put shared code in a `shared/` folder without the underscore prefix,
(e) miss including `apikey` and `x-client-info` in the
Access-Control-Allow-Headers list, (f) return errors with status 200.
2. **Skill value:** The quickstart reference teaches `Deno.serve()` and proper
JSON error handling. The CORS reference shows the exact headers needed and
the OPTIONS handler pattern. The project structure reference teaches the
`_shared/` convention. The error handling reference shows CORS on errors
and proper status codes. Together these 5 references cover every
assertion.
3. **Testability:** All assertions are file-existence checks or regex/string
matches on TypeScript source code. No runtime execution needed.
4. **Realism:** Creating a first Edge Function with CORS support for a
frontend app is the literal first thing every Supabase developer does when
adopting Edge Functions. The Supabase quickstart guide documents exactly
this workflow.
## Difficulty
**Rating:** EASY
- Without skill: ~45-60% of assertions expected to pass
- With skill: ~90-100% of assertions expected to pass

View File

@@ -36,13 +36,28 @@ export async function runTests(opts: {
const destPath = join(opts.workspacePath, evalFileName); const destPath = join(opts.workspacePath, evalFileName);
copyFileSync(opts.evalFilePath, destPath); copyFileSync(opts.evalFilePath, destPath);
// Copy shared eval-utils.ts if it exists alongside the eval scenarios
const evalUtilsSrc = join(
dirname(dirname(opts.evalFilePath)),
"eval-utils.ts",
);
if (existsSync(evalUtilsSrc)) {
copyFileSync(evalUtilsSrc, join(opts.workspacePath, "eval-utils.ts"));
}
// Write a minimal vitest config that overrides the default include pattern // Write a minimal vitest config that overrides the default include pattern
// so EVAL.ts (without .test. or .spec.) is picked up. // so EVAL.ts (without .test. or .spec.) is picked up.
const vitestConfigPath = join(opts.workspacePath, "vitest.config.mjs"); const vitestConfigPath = join(opts.workspacePath, "vitest.config.mjs");
if (!existsSync(vitestConfigPath)) { if (!existsSync(vitestConfigPath)) {
// Alias ../eval-utils.ts → ./eval-utils.ts so the import resolves in
// the flat workspace (source tree has EVAL.ts one level deeper).
const evalUtilsDest = join(opts.workspacePath, "eval-utils.ts");
const aliasBlock = existsSync(evalUtilsDest)
? `resolve: { alias: { "../eval-utils.ts": "./eval-utils.ts" } },`
: "";
writeFileSync( writeFileSync(
vitestConfigPath, vitestConfigPath,
`export default { test: { include: ["EVAL.{ts,tsx}"] } };\n`, `export default { ${aliasBlock} test: { include: ["EVAL.{ts,tsx}"] } };\n`,
); );
} }