From 2d4f780f49c2f1836aea9355bd11f3d31f720707 Mon Sep 17 00:00:00 2001 From: Julien Goux Date: Thu, 25 Jun 2026 11:17:54 +0200 Subject: [PATCH 1/3] feat(db): support gzipped SQL seeds --- apps/cli-go/docs/supabase/db/reset.md | 2 +- apps/cli-go/pkg/config/templates/config.toml | 1 + apps/cli-go/pkg/config/testdata/config.toml | 1 + apps/cli-go/pkg/migration/file.go | 107 ++++++++++++++++-- apps/cli-go/pkg/migration/seed_test.go | 86 ++++++++++++++ .../src/shared/init/project-init.templates.ts | 1 + apps/docs/public/cli/config.schema.json | 8 +- packages/config/src/db.ts | 5 +- packages/config/testdata/legacy-config.toml | 1 + 9 files changed, 196 insertions(+), 16 deletions(-) diff --git a/apps/cli-go/docs/supabase/db/reset.md b/apps/cli-go/docs/supabase/db/reset.md index acb9b9832b..915e8cb58e 100644 --- a/apps/cli-go/docs/supabase/db/reset.md +++ b/apps/cli-go/docs/supabase/db/reset.md @@ -4,6 +4,6 @@ Resets the local database to a clean state. Requires the local development stack to be started by running `supabase start`. -Recreates the local Postgres container and applies all local migrations found in `supabase/migrations` directory. If test data is defined in `supabase/seed.sql`, it will be seeded after the migrations are run. Any other data or schema changes made during local development will be discarded. +Recreates the local Postgres container and applies all local migrations found in `supabase/migrations` directory. If test data is defined in `supabase/seed.sql` or configured as a gzipped SQL seed file, it will be seeded after the migrations are run. Any other data or schema changes made during local development will be discarded. When running db reset with `--linked` or `--db-url` flag, a SQL script is executed to identify and drop all user created entities in the remote database. Since Postgres roles are cluster level entities, any custom roles created through the dashboard or `supabase/roles.sql` will not be deleted by remote reset. diff --git a/apps/cli-go/pkg/config/templates/config.toml b/apps/cli-go/pkg/config/templates/config.toml index 56cc27beac..874ce1464c 100644 --- a/apps/cli-go/pkg/config/templates/config.toml +++ b/apps/cli-go/pkg/config/templates/config.toml @@ -68,6 +68,7 @@ schema_paths = [] enabled = true # Specifies an ordered list of seed files to load during db reset. # Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] [db.network_restrictions] diff --git a/apps/cli-go/pkg/config/testdata/config.toml b/apps/cli-go/pkg/config/testdata/config.toml index 1c60b8af17..afe401413d 100644 --- a/apps/cli-go/pkg/config/testdata/config.toml +++ b/apps/cli-go/pkg/config/testdata/config.toml @@ -62,6 +62,7 @@ test_key = "test_value" enabled = true # Specifies an ordered list of seed files to load during db reset. # Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] [db.network_restrictions] diff --git a/apps/cli-go/pkg/migration/file.go b/apps/cli-go/pkg/migration/file.go index 540c129e33..dcdad011ff 100644 --- a/apps/cli-go/pkg/migration/file.go +++ b/apps/cli-go/pkg/migration/file.go @@ -2,8 +2,10 @@ package migration import ( "bytes" + "compress/gzip" "context" "crypto/sha256" + "encoding/binary" "encoding/hex" "fmt" "io" @@ -48,17 +50,15 @@ func NewMigrationFromFile(path string, fsys fs.FS) (*MigrationFile, error) { } func parseFile(path string, fsys fs.FS) ([]string, error) { - sql, err := fsys.Open(path) + sql, scannerBufferSize, err := openSQL(path, fsys, "migration file") if err != nil { - return nil, errors.Errorf("failed to open migration file: %w", err) + return nil, err } defer sql.Close() - // Unless explicitly specified, Use file length as max buffer size + // Unless explicitly specified, use file length as max buffer size. if !viper.IsSet("SCANNER_BUFFER_SIZE") { - if fi, err := sql.Stat(); err == nil { - if size := int(fi.Size()); size > parser.MaxScannerCapacity { - parser.MaxScannerCapacity = size - } + if scannerBufferSize > parser.MaxScannerCapacity { + parser.MaxScannerCapacity = scannerBufferSize } } return parser.SplitAndTrim(sql) @@ -182,9 +182,9 @@ type SeedFile struct { } func NewSeedFile(path string, fsys fs.FS) (*SeedFile, error) { - sql, err := fsys.Open(path) + sql, _, err := openSQL(path, fsys, "seed file") if err != nil { - return nil, errors.Errorf("failed to open seed file: %w", err) + return nil, err } defer sql.Close() hash := sha256.New() @@ -195,6 +195,95 @@ func NewSeedFile(path string, fsys fs.FS) (*SeedFile, error) { return &SeedFile{Path: path, Hash: digest}, nil } +func openSQL(path string, fsys fs.FS, kind string) (io.ReadCloser, int, error) { + sql, err := fsys.Open(path) + if err != nil { + return nil, 0, errors.Errorf("failed to open %s: %w", kind, err) + } + scannerBufferSize := maxScannerBufferSize(path, sql) + if !isCompressedSQL(path) { + return sql, scannerBufferSize, nil + } + gz, err := gzip.NewReader(sql) + if err != nil { + _ = sql.Close() + return nil, 0, errors.Errorf("failed to decompress %s: %w", kind, err) + } + return &compressedSQLReader{Reader: gz, gz: gz, file: sql}, scannerBufferSize, nil +} + +func maxScannerBufferSize(path string, sql fs.File) int { + info, err := sql.Stat() + if err != nil { + return 0 + } + size := info.Size() + if isCompressedSQL(path) { + if uncompressedSize, ok := gzipUncompressedSize(sql); ok && uncompressedSize > size { + size = uncompressedSize + } + } + return safeInt(size) +} + +func gzipUncompressedSize(sql fs.File) (int64, bool) { + seeker, ok := sql.(io.Seeker) + if !ok { + return 0, false + } + pos, err := seeker.Seek(0, io.SeekCurrent) + if err != nil { + return 0, false + } + defer func() { + _, _ = seeker.Seek(pos, io.SeekStart) + }() + end, err := seeker.Seek(0, io.SeekEnd) + if err != nil || end < 4 { + return 0, false + } + if _, err := seeker.Seek(-4, io.SeekEnd); err != nil { + return 0, false + } + var footer [4]byte + if _, err := io.ReadFull(sql, footer[:]); err != nil { + return 0, false + } + return int64(binary.LittleEndian.Uint32(footer[:])), true +} + +func safeInt(size int64) int { + if size <= 0 { + return 0 + } + maxInt := int64(^uint(0) >> 1) + if size > maxInt { + return int(maxInt) + } + return int(size) +} + +func isCompressedSQL(path string) bool { + return strings.HasSuffix(strings.ToLower(path), ".sql.gz") +} + +type compressedSQLReader struct { + io.Reader + gz *gzip.Reader + file fs.File +} + +func (r *compressedSQLReader) Close() error { + var firstErr error + if err := r.gz.Close(); err != nil { + firstErr = err + } + if err := r.file.Close(); err != nil && firstErr == nil { + firstErr = err + } + return firstErr +} + func (m *SeedFile) ExecBatchWithCache(ctx context.Context, conn *pgx.Conn, fsys fs.FS) error { // Parse each file individually to reduce memory usage lines, err := parseFile(m.Path, fsys) diff --git a/apps/cli-go/pkg/migration/seed_test.go b/apps/cli-go/pkg/migration/seed_test.go index db4337b54c..f5defec3a1 100644 --- a/apps/cli-go/pkg/migration/seed_test.go +++ b/apps/cli-go/pkg/migration/seed_test.go @@ -1,6 +1,8 @@ package migration import ( + "bytes" + "compress/gzip" "context" _ "embed" "os" @@ -36,6 +38,42 @@ func TestPendingSeeds(t *testing.T) { assert.False(t, seeds[0].Dirty) }) + t.Run("finds gzipped seeds", func(t *testing.T) { + pending := []string{"testdata/seed.sql.gz"} + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: gzipData(t, testSeed)}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 0") + // Run test + seeds, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.NoError(t, err) + require.Len(t, seeds, 1) + assert.Equal(t, seeds[0].Path, pending[0]) + assert.Equal(t, seeds[0].Hash, "61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3") + assert.False(t, seeds[0].Dirty) + }) + + t.Run("throws error on invalid gzipped seed", func(t *testing.T) { + pending := []string{"testdata/seed.sql.gz"} + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: []byte("not gzip data")}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 0") + // Run test + _, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.ErrorContains(t, err, "failed to decompress seed file") + }) + t.Run("finds dirty seeds", func(t *testing.T) { // Setup mock postgres conn := pgtest.NewConn() @@ -124,6 +162,28 @@ func TestSeedData(t *testing.T) { // Check error assert.ErrorContains(t, err, `ERROR: null value in column "age" of relation "employees" (SQLSTATE 23502)`) }) + + t.Run("seeds from gzipped file", func(t *testing.T) { + seed := SeedFile{ + Path: "testdata/seed.sql.gz", + Hash: "61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3", + } + fsys := fs.MapFS{ + seed.Path: &fs.MapFile{Data: gzipData(t, testSeed)}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + mockSeedHistory(conn). + Query(testSeed). + Reply("INSERT 0 1"). + Query(UPSERT_SEED_FILE, seed.Path, seed.Hash). + Reply("INSERT 0 1") + // Run test + err := SeedData(context.Background(), []SeedFile{seed}, conn.MockClient(t), fsys) + // Check error + assert.NoError(t, err) + }) } func mockSeedHistory(conn *pgtest.MockConn) *pgtest.MockConn { @@ -173,4 +233,30 @@ func TestSeedGlobals(t *testing.T) { // Check error assert.ErrorContains(t, err, `ERROR: database "postgres" does not exist (SQLSTATE 3D000)`) }) + + t.Run("seeds from gzipped file", func(t *testing.T) { + pending := []string{"testdata/1_globals.sql.gz"} + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: gzipData(t, testGlobals)}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(testGlobals). + Reply("CREATE ROLE") + // Run test + err := SeedGlobals(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.NoError(t, err) + }) +} + +func gzipData(t *testing.T, input string) []byte { + t.Helper() + var compressed bytes.Buffer + writer := gzip.NewWriter(&compressed) + _, err := writer.Write([]byte(input)) + require.NoError(t, err) + require.NoError(t, writer.Close()) + return compressed.Bytes() } diff --git a/apps/cli/src/shared/init/project-init.templates.ts b/apps/cli/src/shared/init/project-init.templates.ts index 6415e44a22..e393666499 100644 --- a/apps/cli/src/shared/init/project-init.templates.ts +++ b/apps/cli/src/shared/init/project-init.templates.ts @@ -68,6 +68,7 @@ schema_paths = [] enabled = true # Specifies an ordered list of seed files to load during db reset. # Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] [db.network_restrictions] diff --git a/apps/docs/public/cli/config.schema.json b/apps/docs/public/cli/config.schema.json index 28062d0891..7d59025ec7 100644 --- a/apps/docs/public/cli/config.schema.json +++ b/apps/docs/public/cli/config.schema.json @@ -2440,9 +2440,9 @@ "type": "array", "items": { "type": "string", - "description": "Path to a SQL file used to seed the database." + "description": "Path to a SQL or gzipped SQL file used to seed the database." }, - "description": "Ordered list of seed files to load during db reset.", + "description": "Ordered list of seed files to load during db reset. Supports .sql and .sql.gz files.", "default": [ "./seed.sql" ] @@ -5985,9 +5985,9 @@ "type": "array", "items": { "type": "string", - "description": "Path to a SQL file used to seed the database." + "description": "Path to a SQL or gzipped SQL file used to seed the database." }, - "description": "Ordered list of seed files to load during db reset.", + "description": "Ordered list of seed files to load during db reset. Supports .sql and .sql.gz files.", "default": [ "./seed.sql" ] diff --git a/packages/config/src/db.ts b/packages/config/src/db.ts index 1bc6c57c9f..857062a116 100644 --- a/packages/config/src/db.ts +++ b/packages/config/src/db.ts @@ -151,13 +151,14 @@ export const db = Schema.Struct({ }).pipe(Schema.withDecodingDefaultKey(Effect.succeed(defaultSeedEnabled))), sql_paths: Schema.Array( Schema.String.annotate({ - description: "Path to a SQL file used to seed the database.", + description: "Path to a SQL or gzipped SQL file used to seed the database.", tags, }), ) .annotate({ default: defaultSqlPaths, - description: "Ordered list of seed files to load during db reset.", + description: + "Ordered list of seed files to load during db reset. Supports .sql and .sql.gz files.", tags, }) .pipe(Schema.withDecodingDefaultKey(Effect.succeed([...defaultSqlPaths]))), diff --git a/packages/config/testdata/legacy-config.toml b/packages/config/testdata/legacy-config.toml index b228a9c073..25ddc89ae7 100644 --- a/packages/config/testdata/legacy-config.toml +++ b/packages/config/testdata/legacy-config.toml @@ -62,6 +62,7 @@ test_key = "test_value" enabled = true # Specifies an ordered list of seed files to load during db reset. # Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] [db.network_restrictions] From 1a88f710b56a131f3d13de3cbd19e2197319df7d Mon Sep 17 00:00:00 2001 From: Julien Goux Date: Thu, 25 Jun 2026 12:10:56 +0200 Subject: [PATCH 2/3] chore(db): address gzipped seed review --- apps/cli-go/docs/supabase/db/reset.md | 2 +- apps/cli-go/pkg/config/templates/config.toml | 2 +- apps/cli-go/pkg/config/testdata/config.toml | 2 +- apps/cli-go/pkg/migration/file.go | 131 +++++++------ apps/cli-go/pkg/migration/file_test.go | 8 + apps/cli-go/pkg/migration/seed_test.go | 174 ++++++++++++++++-- .../src/shared/init/project-init.templates.ts | 2 +- packages/config/testdata/legacy-config.toml | 2 +- 8 files changed, 241 insertions(+), 82 deletions(-) diff --git a/apps/cli-go/docs/supabase/db/reset.md b/apps/cli-go/docs/supabase/db/reset.md index 915e8cb58e..67e44f35a1 100644 --- a/apps/cli-go/docs/supabase/db/reset.md +++ b/apps/cli-go/docs/supabase/db/reset.md @@ -4,6 +4,6 @@ Resets the local database to a clean state. Requires the local development stack to be started by running `supabase start`. -Recreates the local Postgres container and applies all local migrations found in `supabase/migrations` directory. If test data is defined in `supabase/seed.sql` or configured as a gzipped SQL seed file, it will be seeded after the migrations are run. Any other data or schema changes made during local development will be discarded. +Recreates the local Postgres container and applies all local migrations found in `supabase/migrations` directory. If test data is defined in `supabase/seed.sql` or configured in `[db.seed] sql_paths`, it will be seeded after the migrations are run. Any other data or schema changes made during local development will be discarded. When running db reset with `--linked` or `--db-url` flag, a SQL script is executed to identify and drop all user created entities in the remote database. Since Postgres roles are cluster level entities, any custom roles created through the dashboard or `supabase/roles.sql` will not be deleted by remote reset. diff --git a/apps/cli-go/pkg/config/templates/config.toml b/apps/cli-go/pkg/config/templates/config.toml index 874ce1464c..0416d9ad10 100644 --- a/apps/cli-go/pkg/config/templates/config.toml +++ b/apps/cli-go/pkg/config/templates/config.toml @@ -67,7 +67,7 @@ schema_paths = [] # If enabled, seeds the database after migrations during a db reset. enabled = true # Specifies an ordered list of seed files to load during db reset. -# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports glob patterns relative to supabase directory: ["./seeds/*.sql", "./seeds/*.sql.gz"] # Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] diff --git a/apps/cli-go/pkg/config/testdata/config.toml b/apps/cli-go/pkg/config/testdata/config.toml index afe401413d..6daa4b13c4 100644 --- a/apps/cli-go/pkg/config/testdata/config.toml +++ b/apps/cli-go/pkg/config/testdata/config.toml @@ -61,7 +61,7 @@ test_key = "test_value" # If enabled, seeds the database after migrations during a db reset. enabled = true # Specifies an ordered list of seed files to load during db reset. -# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports glob patterns relative to supabase directory: ["./seeds/*.sql", "./seeds/*.sql.gz"] # Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] diff --git a/apps/cli-go/pkg/migration/file.go b/apps/cli-go/pkg/migration/file.go index dcdad011ff..98edcf1556 100644 --- a/apps/cli-go/pkg/migration/file.go +++ b/apps/cli-go/pkg/migration/file.go @@ -5,8 +5,8 @@ import ( "compress/gzip" "context" "crypto/sha256" - "encoding/binary" "encoding/hex" + stderrors "errors" "fmt" "io" "io/fs" @@ -33,7 +33,12 @@ var ( typeNamePattern = regexp.MustCompile(`type "([^"]+)" does not exist`) ) +const defaultSeedFileSizeLimit = 10 << 30 + func NewMigrationFromFile(path string, fsys fs.FS) (*MigrationFile, error) { + if isCompressedSQL(path) { + return nil, errors.Errorf("compressed SQL files are only supported for seed files: %s", path) + } lines, err := parseFile(path, fsys) if err != nil { return nil, err @@ -50,17 +55,22 @@ func NewMigrationFromFile(path string, fsys fs.FS) (*MigrationFile, error) { } func parseFile(path string, fsys fs.FS) ([]string, error) { - sql, scannerBufferSize, err := openSQL(path, fsys, "migration file") + sql, err := fsys.Open(path) if err != nil { - return nil, err + return nil, errors.Errorf("failed to open migration file: %w", err) } defer sql.Close() - // Unless explicitly specified, use file length as max buffer size. - if !viper.IsSet("SCANNER_BUFFER_SIZE") { - if scannerBufferSize > parser.MaxScannerCapacity { - parser.MaxScannerCapacity = scannerBufferSize - } + setScannerCapacity(fileSize(sql)) + return parser.SplitAndTrim(sql) +} + +func parseSeedFile(path string, fsys fs.FS) ([]string, error) { + sql, scannerBufferSize, err := openSeedSQL(path, fsys) + if err != nil { + return nil, err } + defer sql.Close() + setScannerCapacity(scannerBufferSize) return parser.SplitAndTrim(sql) } @@ -182,11 +192,13 @@ type SeedFile struct { } func NewSeedFile(path string, fsys fs.FS) (*SeedFile, error) { - sql, _, err := openSQL(path, fsys, "seed file") + sql, _, err := openSeedSQL(path, fsys) if err != nil { return nil, err } defer sql.Close() + // Seed history hashes the decompressed SQL so gzip metadata changes and + // equivalent recompressions do not mark the seed dirty. hash := sha256.New() if _, err := io.Copy(hash, sql); err != nil { return nil, errors.Errorf("failed to hash file: %w", err) @@ -195,61 +207,49 @@ func NewSeedFile(path string, fsys fs.FS) (*SeedFile, error) { return &SeedFile{Path: path, Hash: digest}, nil } -func openSQL(path string, fsys fs.FS, kind string) (io.ReadCloser, int, error) { +func openSeedSQL(path string, fsys fs.FS) (io.ReadCloser, int, error) { sql, err := fsys.Open(path) if err != nil { - return nil, 0, errors.Errorf("failed to open %s: %w", kind, err) + return nil, 0, errors.Errorf("failed to open seed file: %w", err) } - scannerBufferSize := maxScannerBufferSize(path, sql) if !isCompressedSQL(path) { - return sql, scannerBufferSize, nil + return sql, fileSize(sql), nil } gz, err := gzip.NewReader(sql) if err != nil { _ = sql.Close() - return nil, 0, errors.Errorf("failed to decompress %s: %w", kind, err) + return nil, 0, errors.Errorf("failed to decompress seed file: %w", err) } - return &compressedSQLReader{Reader: gz, gz: gz, file: sql}, scannerBufferSize, nil + return &compressedSQLReader{ + gz: gz, + file: sql, + maxBytes: seedFileSizeLimit(), + }, 0, nil } -func maxScannerBufferSize(path string, sql fs.File) int { - info, err := sql.Stat() - if err != nil { - return 0 - } - size := info.Size() - if isCompressedSQL(path) { - if uncompressedSize, ok := gzipUncompressedSize(sql); ok && uncompressedSize > size { - size = uncompressedSize +func setScannerCapacity(scannerBufferSize int) { + // Unless explicitly specified, use file length as max buffer size. + if !viper.IsSet("SCANNER_BUFFER_SIZE") { + if scannerBufferSize > parser.MaxScannerCapacity { + parser.MaxScannerCapacity = scannerBufferSize } } - return safeInt(size) } -func gzipUncompressedSize(sql fs.File) (int64, bool) { - seeker, ok := sql.(io.Seeker) - if !ok { - return 0, false - } - pos, err := seeker.Seek(0, io.SeekCurrent) +func fileSize(sql fs.File) int { + info, err := sql.Stat() if err != nil { - return 0, false - } - defer func() { - _, _ = seeker.Seek(pos, io.SeekStart) - }() - end, err := seeker.Seek(0, io.SeekEnd) - if err != nil || end < 4 { - return 0, false - } - if _, err := seeker.Seek(-4, io.SeekEnd); err != nil { - return 0, false + return 0 } - var footer [4]byte - if _, err := io.ReadFull(sql, footer[:]); err != nil { - return 0, false + return safeInt(info.Size()) +} + +func seedFileSizeLimit() int64 { + limit := int64(viper.GetSizeInBytes("SEED_FILE_SIZE_LIMIT")) + if limit <= 0 { + return defaultSeedFileSizeLimit } - return int64(binary.LittleEndian.Uint32(footer[:])), true + return limit } func safeInt(size int64) int { @@ -268,25 +268,42 @@ func isCompressedSQL(path string) bool { } type compressedSQLReader struct { - io.Reader - gz *gzip.Reader - file fs.File + gz *gzip.Reader + file fs.File + maxBytes int64 + bytesRead int64 } -func (r *compressedSQLReader) Close() error { - var firstErr error - if err := r.gz.Close(); err != nil { - firstErr = err +func (r *compressedSQLReader) Read(p []byte) (int, error) { + if r.maxBytes > 0 { + remaining := r.maxBytes - r.bytesRead + if remaining <= 0 { + var probe [1]byte + n, err := r.gz.Read(probe[:]) + if n > 0 { + return 0, errors.Errorf("decompressed seed file exceeds %d bytes", r.maxBytes) + } + return 0, err + } + if int64(len(p)) > remaining { + p = p[:remaining] + } } - if err := r.file.Close(); err != nil && firstErr == nil { - firstErr = err + n, err := r.gz.Read(p) + r.bytesRead += int64(n) + if n > 0 && err == io.EOF { + return n, nil } - return firstErr + return n, err +} + +func (r *compressedSQLReader) Close() error { + return stderrors.Join(r.gz.Close(), r.file.Close()) } func (m *SeedFile) ExecBatchWithCache(ctx context.Context, conn *pgx.Conn, fsys fs.FS) error { // Parse each file individually to reduce memory usage - lines, err := parseFile(m.Path, fsys) + lines, err := parseSeedFile(m.Path, fsys) if err != nil { return err } diff --git a/apps/cli-go/pkg/migration/file_test.go b/apps/cli-go/pkg/migration/file_test.go index 703f26954c..76e1a621c2 100644 --- a/apps/cli-go/pkg/migration/file_test.go +++ b/apps/cli-go/pkg/migration/file_test.go @@ -30,6 +30,14 @@ func TestMigrationFile(t *testing.T) { assert.Equal(t, "20220727064247", migration.Version) }) + t.Run("new from gzipped file returns clear error", func(t *testing.T) { + // Run test + migration, err := NewMigrationFromFile("20220727064247_create_table.sql.gz", fs.MapFS{}) + // Check error + assert.ErrorContains(t, err, "compressed SQL files are only supported for seed files") + assert.Nil(t, migration) + }) + t.Run("new from reader errors on max token", func(t *testing.T) { viper.Reset() sql := "\tBEGIN; " + strings.Repeat("a", parser.MaxScannerCapacity) diff --git a/apps/cli-go/pkg/migration/seed_test.go b/apps/cli-go/pkg/migration/seed_test.go index f5defec3a1..b07b469307 100644 --- a/apps/cli-go/pkg/migration/seed_test.go +++ b/apps/cli-go/pkg/migration/seed_test.go @@ -4,13 +4,19 @@ import ( "bytes" "compress/gzip" "context" + "crypto/sha256" _ "embed" + "encoding/hex" + "io" + stdfs "io/fs" "os" "testing" fs "testing/fstest" + "time" "github.com/jackc/pgerrcode" "github.com/jackc/pgx/v4" + "github.com/spf13/viper" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/supabase/cli/pkg/pgtest" @@ -58,6 +64,26 @@ func TestPendingSeeds(t *testing.T) { assert.False(t, seeds[0].Dirty) }) + t.Run("finds dirty gzipped seeds", func(t *testing.T) { + pending := []string{"testdata/seed.sql.gz"} + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: gzipData(t, testSeed)}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 1", SeedFile{Path: pending[0], Hash: "outdated"}) + // Run test + seeds, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.NoError(t, err) + require.Len(t, seeds, 1) + assert.Equal(t, seeds[0].Path, pending[0]) + assert.Equal(t, seeds[0].Hash, "61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3") + assert.True(t, seeds[0].Dirty) + }) + t.Run("throws error on invalid gzipped seed", func(t *testing.T) { pending := []string{"testdata/seed.sql.gz"} fsys := fs.MapFS{ @@ -74,6 +100,64 @@ func TestPendingSeeds(t *testing.T) { assert.ErrorContains(t, err, "failed to decompress seed file") }) + t.Run("throws error on truncated gzipped seed", func(t *testing.T) { + pending := []string{"testdata/seed.sql.gz"} + compressed := gzipData(t, testSeed) + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: compressed[:len(compressed)/2]}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 0") + // Run test + _, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.ErrorContains(t, err, "failed to hash file") + assert.ErrorIs(t, err, io.ErrUnexpectedEOF) + }) + + t.Run("throws error when gzipped seed exceeds size limit", func(t *testing.T) { + t.Cleanup(viper.Reset) + viper.Set("SEED_FILE_SIZE_LIMIT", 8) + pending := []string{"testdata/seed.sql.gz"} + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: gzipData(t, testSeed)}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 0") + // Run test + _, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.ErrorContains(t, err, "decompressed seed file exceeds 8 bytes") + }) + + t.Run("hashes gzipped seed from non-seekable fs", func(t *testing.T) { + path := "testdata/seed.sql.gz" + seed, err := NewSeedFile(path, nonSeekFS{ + path: gzipData(t, testSeed), + }) + // Check error + assert.NoError(t, err) + assert.Equal(t, "61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3", seed.Hash) + }) + + t.Run("hashes multistream gzipped seed", func(t *testing.T) { + path := "testdata/seed.sql.gz" + first := "insert into countries(id) values (1);\n" + second := "insert into countries(id) values (2);\n" + seed, err := NewSeedFile(path, fs.MapFS{ + path: &fs.MapFile{Data: append(gzipData(t, first), gzipData(t, second)...)}, + }) + // Check error + assert.NoError(t, err) + assert.Equal(t, hashSQL(first+second), seed.Hash) + }) + t.Run("finds dirty seeds", func(t *testing.T) { // Setup mock postgres conn := pgtest.NewConn() @@ -175,12 +259,12 @@ func TestSeedData(t *testing.T) { conn := pgtest.NewConn() defer conn.Close(t) mockSeedHistory(conn). - Query(testSeed). - Reply("INSERT 0 1"). - Query(UPSERT_SEED_FILE, seed.Path, seed.Hash). + Query(testSeed + `;INSERT INTO supabase_migrations.seed_files(path, hash) VALUES( 'testdata/seed.sql.gz' , '61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3' ) ON CONFLICT (path) DO UPDATE SET hash = EXCLUDED.hash`). Reply("INSERT 0 1") // Run test - err := SeedData(context.Background(), []SeedFile{seed}, conn.MockClient(t), fsys) + err := SeedData(context.Background(), []SeedFile{seed}, conn.MockClient(t, func(cc *pgx.ConnConfig) { + cc.PreferSimpleProtocol = true + }), fsys) // Check error assert.NoError(t, err) }) @@ -233,22 +317,6 @@ func TestSeedGlobals(t *testing.T) { // Check error assert.ErrorContains(t, err, `ERROR: database "postgres" does not exist (SQLSTATE 3D000)`) }) - - t.Run("seeds from gzipped file", func(t *testing.T) { - pending := []string{"testdata/1_globals.sql.gz"} - fsys := fs.MapFS{ - pending[0]: &fs.MapFile{Data: gzipData(t, testGlobals)}, - } - // Setup mock postgres - conn := pgtest.NewConn() - defer conn.Close(t) - conn.Query(testGlobals). - Reply("CREATE ROLE") - // Run test - err := SeedGlobals(context.Background(), pending, conn.MockClient(t), fsys) - // Check error - assert.NoError(t, err) - }) } func gzipData(t *testing.T, input string) []byte { @@ -260,3 +328,69 @@ func gzipData(t *testing.T, input string) []byte { require.NoError(t, writer.Close()) return compressed.Bytes() } + +func hashSQL(sql string) string { + hash := sha256.Sum256([]byte(sql)) + return hex.EncodeToString(hash[:]) +} + +type nonSeekFS map[string][]byte + +func (f nonSeekFS) Open(name string) (stdfs.File, error) { + data, ok := f[name] + if !ok { + return nil, os.ErrNotExist + } + return &nonSeekFile{ + reader: bytes.NewReader(data), + name: name, + size: int64(len(data)), + }, nil +} + +type nonSeekFile struct { + reader *bytes.Reader + name string + size int64 +} + +func (f *nonSeekFile) Read(p []byte) (int, error) { + return f.reader.Read(p) +} + +func (f *nonSeekFile) Close() error { + return nil +} + +func (f *nonSeekFile) Stat() (stdfs.FileInfo, error) { + return fileInfo{name: f.name, size: f.size}, nil +} + +type fileInfo struct { + name string + size int64 +} + +func (f fileInfo) Name() string { + return f.name +} + +func (f fileInfo) Size() int64 { + return f.size +} + +func (f fileInfo) Mode() stdfs.FileMode { + return 0 +} + +func (f fileInfo) ModTime() time.Time { + return time.Time{} +} + +func (f fileInfo) IsDir() bool { + return false +} + +func (f fileInfo) Sys() any { + return nil +} diff --git a/apps/cli/src/shared/init/project-init.templates.ts b/apps/cli/src/shared/init/project-init.templates.ts index e393666499..7a0de0b4e4 100644 --- a/apps/cli/src/shared/init/project-init.templates.ts +++ b/apps/cli/src/shared/init/project-init.templates.ts @@ -67,7 +67,7 @@ schema_paths = [] # If enabled, seeds the database after migrations during a db reset. enabled = true # Specifies an ordered list of seed files to load during db reset. -# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports glob patterns relative to supabase directory: ["./seeds/*.sql", "./seeds/*.sql.gz"] # Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] diff --git a/packages/config/testdata/legacy-config.toml b/packages/config/testdata/legacy-config.toml index 25ddc89ae7..5a53bfc571 100644 --- a/packages/config/testdata/legacy-config.toml +++ b/packages/config/testdata/legacy-config.toml @@ -61,7 +61,7 @@ test_key = "test_value" # If enabled, seeds the database after migrations during a db reset. enabled = true # Specifies an ordered list of seed files to load during db reset. -# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports glob patterns relative to supabase directory: ["./seeds/*.sql", "./seeds/*.sql.gz"] # Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] From 15d8a3565e674dab7ae71dee9177db024dd1e37a Mon Sep 17 00:00:00 2001 From: Julien Goux Date: Thu, 25 Jun 2026 12:33:33 +0200 Subject: [PATCH 3/3] chore(db): raise gzip seed scanner limit --- apps/cli-go/pkg/migration/file.go | 5 +++-- apps/cli-go/pkg/migration/seed_test.go | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/apps/cli-go/pkg/migration/file.go b/apps/cli-go/pkg/migration/file.go index 98edcf1556..5a6c5e3b15 100644 --- a/apps/cli-go/pkg/migration/file.go +++ b/apps/cli-go/pkg/migration/file.go @@ -220,11 +220,12 @@ func openSeedSQL(path string, fsys fs.FS) (io.ReadCloser, int, error) { _ = sql.Close() return nil, 0, errors.Errorf("failed to decompress seed file: %w", err) } + limit := seedFileSizeLimit() return &compressedSQLReader{ gz: gz, file: sql, - maxBytes: seedFileSizeLimit(), - }, 0, nil + maxBytes: limit, + }, safeInt(limit), nil } func setScannerCapacity(scannerBufferSize int) { diff --git a/apps/cli-go/pkg/migration/seed_test.go b/apps/cli-go/pkg/migration/seed_test.go index b07b469307..ec438a6921 100644 --- a/apps/cli-go/pkg/migration/seed_test.go +++ b/apps/cli-go/pkg/migration/seed_test.go @@ -10,6 +10,7 @@ import ( "io" stdfs "io/fs" "os" + "strings" "testing" fs "testing/fstest" "time" @@ -19,6 +20,7 @@ import ( "github.com/spf13/viper" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/supabase/cli/pkg/parser" "github.com/supabase/cli/pkg/pgtest" ) @@ -136,6 +138,25 @@ func TestPendingSeeds(t *testing.T) { assert.ErrorContains(t, err, "decompressed seed file exceeds 8 bytes") }) + t.Run("parses gzipped seed with large statement", func(t *testing.T) { + t.Cleanup(viper.Reset) + originalMaxScannerCapacity := parser.MaxScannerCapacity + parser.MaxScannerCapacity = 256 * 1024 + t.Cleanup(func() { + parser.MaxScannerCapacity = originalMaxScannerCapacity + }) + path := "testdata/seed.sql.gz" + sql := "select '" + strings.Repeat("a", parser.MaxScannerCapacity) + "'" + fsys := fs.MapFS{ + path: &fs.MapFile{Data: gzipData(t, sql)}, + } + // Run test + lines, err := parseSeedFile(path, fsys) + // Check error + assert.NoError(t, err) + assert.Equal(t, []string{sql}, lines) + }) + t.Run("hashes gzipped seed from non-seekable fs", func(t *testing.T) { path := "testdata/seed.sql.gz" seed, err := NewSeedFile(path, nonSeekFS{