diff --git a/apps/cli-go/docs/supabase/db/reset.md b/apps/cli-go/docs/supabase/db/reset.md index acb9b9832b..67e44f35a1 100644 --- a/apps/cli-go/docs/supabase/db/reset.md +++ b/apps/cli-go/docs/supabase/db/reset.md @@ -4,6 +4,6 @@ Resets the local database to a clean state. Requires the local development stack to be started by running `supabase start`. -Recreates the local Postgres container and applies all local migrations found in `supabase/migrations` directory. If test data is defined in `supabase/seed.sql`, it will be seeded after the migrations are run. Any other data or schema changes made during local development will be discarded. +Recreates the local Postgres container and applies all local migrations found in `supabase/migrations` directory. If test data is defined in `supabase/seed.sql` or configured in `[db.seed] sql_paths`, it will be seeded after the migrations are run. Any other data or schema changes made during local development will be discarded. When running db reset with `--linked` or `--db-url` flag, a SQL script is executed to identify and drop all user created entities in the remote database. Since Postgres roles are cluster level entities, any custom roles created through the dashboard or `supabase/roles.sql` will not be deleted by remote reset. diff --git a/apps/cli-go/pkg/config/templates/config.toml b/apps/cli-go/pkg/config/templates/config.toml index 56cc27beac..0416d9ad10 100644 --- a/apps/cli-go/pkg/config/templates/config.toml +++ b/apps/cli-go/pkg/config/templates/config.toml @@ -67,7 +67,8 @@ schema_paths = [] # If enabled, seeds the database after migrations during a db reset. enabled = true # Specifies an ordered list of seed files to load during db reset. -# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports glob patterns relative to supabase directory: ["./seeds/*.sql", "./seeds/*.sql.gz"] +# Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] [db.network_restrictions] diff --git a/apps/cli-go/pkg/config/testdata/config.toml b/apps/cli-go/pkg/config/testdata/config.toml index 1c60b8af17..6daa4b13c4 100644 --- a/apps/cli-go/pkg/config/testdata/config.toml +++ b/apps/cli-go/pkg/config/testdata/config.toml @@ -61,7 +61,8 @@ test_key = "test_value" # If enabled, seeds the database after migrations during a db reset. enabled = true # Specifies an ordered list of seed files to load during db reset. -# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports glob patterns relative to supabase directory: ["./seeds/*.sql", "./seeds/*.sql.gz"] +# Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] [db.network_restrictions] diff --git a/apps/cli-go/pkg/migration/file.go b/apps/cli-go/pkg/migration/file.go index 540c129e33..5a6c5e3b15 100644 --- a/apps/cli-go/pkg/migration/file.go +++ b/apps/cli-go/pkg/migration/file.go @@ -2,9 +2,11 @@ package migration import ( "bytes" + "compress/gzip" "context" "crypto/sha256" "encoding/hex" + stderrors "errors" "fmt" "io" "io/fs" @@ -31,7 +33,12 @@ var ( typeNamePattern = regexp.MustCompile(`type "([^"]+)" does not exist`) ) +const defaultSeedFileSizeLimit = 10 << 30 + func NewMigrationFromFile(path string, fsys fs.FS) (*MigrationFile, error) { + if isCompressedSQL(path) { + return nil, errors.Errorf("compressed SQL files are only supported for seed files: %s", path) + } lines, err := parseFile(path, fsys) if err != nil { return nil, err @@ -53,14 +60,17 @@ func parseFile(path string, fsys fs.FS) ([]string, error) { return nil, errors.Errorf("failed to open migration file: %w", err) } defer sql.Close() - // Unless explicitly specified, Use file length as max buffer size - if !viper.IsSet("SCANNER_BUFFER_SIZE") { - if fi, err := sql.Stat(); err == nil { - if size := int(fi.Size()); size > parser.MaxScannerCapacity { - parser.MaxScannerCapacity = size - } - } + setScannerCapacity(fileSize(sql)) + return parser.SplitAndTrim(sql) +} + +func parseSeedFile(path string, fsys fs.FS) ([]string, error) { + sql, scannerBufferSize, err := openSeedSQL(path, fsys) + if err != nil { + return nil, err } + defer sql.Close() + setScannerCapacity(scannerBufferSize) return parser.SplitAndTrim(sql) } @@ -182,11 +192,13 @@ type SeedFile struct { } func NewSeedFile(path string, fsys fs.FS) (*SeedFile, error) { - sql, err := fsys.Open(path) + sql, _, err := openSeedSQL(path, fsys) if err != nil { - return nil, errors.Errorf("failed to open seed file: %w", err) + return nil, err } defer sql.Close() + // Seed history hashes the decompressed SQL so gzip metadata changes and + // equivalent recompressions do not mark the seed dirty. hash := sha256.New() if _, err := io.Copy(hash, sql); err != nil { return nil, errors.Errorf("failed to hash file: %w", err) @@ -195,9 +207,104 @@ func NewSeedFile(path string, fsys fs.FS) (*SeedFile, error) { return &SeedFile{Path: path, Hash: digest}, nil } +func openSeedSQL(path string, fsys fs.FS) (io.ReadCloser, int, error) { + sql, err := fsys.Open(path) + if err != nil { + return nil, 0, errors.Errorf("failed to open seed file: %w", err) + } + if !isCompressedSQL(path) { + return sql, fileSize(sql), nil + } + gz, err := gzip.NewReader(sql) + if err != nil { + _ = sql.Close() + return nil, 0, errors.Errorf("failed to decompress seed file: %w", err) + } + limit := seedFileSizeLimit() + return &compressedSQLReader{ + gz: gz, + file: sql, + maxBytes: limit, + }, safeInt(limit), nil +} + +func setScannerCapacity(scannerBufferSize int) { + // Unless explicitly specified, use file length as max buffer size. + if !viper.IsSet("SCANNER_BUFFER_SIZE") { + if scannerBufferSize > parser.MaxScannerCapacity { + parser.MaxScannerCapacity = scannerBufferSize + } + } +} + +func fileSize(sql fs.File) int { + info, err := sql.Stat() + if err != nil { + return 0 + } + return safeInt(info.Size()) +} + +func seedFileSizeLimit() int64 { + limit := int64(viper.GetSizeInBytes("SEED_FILE_SIZE_LIMIT")) + if limit <= 0 { + return defaultSeedFileSizeLimit + } + return limit +} + +func safeInt(size int64) int { + if size <= 0 { + return 0 + } + maxInt := int64(^uint(0) >> 1) + if size > maxInt { + return int(maxInt) + } + return int(size) +} + +func isCompressedSQL(path string) bool { + return strings.HasSuffix(strings.ToLower(path), ".sql.gz") +} + +type compressedSQLReader struct { + gz *gzip.Reader + file fs.File + maxBytes int64 + bytesRead int64 +} + +func (r *compressedSQLReader) Read(p []byte) (int, error) { + if r.maxBytes > 0 { + remaining := r.maxBytes - r.bytesRead + if remaining <= 0 { + var probe [1]byte + n, err := r.gz.Read(probe[:]) + if n > 0 { + return 0, errors.Errorf("decompressed seed file exceeds %d bytes", r.maxBytes) + } + return 0, err + } + if int64(len(p)) > remaining { + p = p[:remaining] + } + } + n, err := r.gz.Read(p) + r.bytesRead += int64(n) + if n > 0 && err == io.EOF { + return n, nil + } + return n, err +} + +func (r *compressedSQLReader) Close() error { + return stderrors.Join(r.gz.Close(), r.file.Close()) +} + func (m *SeedFile) ExecBatchWithCache(ctx context.Context, conn *pgx.Conn, fsys fs.FS) error { // Parse each file individually to reduce memory usage - lines, err := parseFile(m.Path, fsys) + lines, err := parseSeedFile(m.Path, fsys) if err != nil { return err } diff --git a/apps/cli-go/pkg/migration/file_test.go b/apps/cli-go/pkg/migration/file_test.go index 703f26954c..76e1a621c2 100644 --- a/apps/cli-go/pkg/migration/file_test.go +++ b/apps/cli-go/pkg/migration/file_test.go @@ -30,6 +30,14 @@ func TestMigrationFile(t *testing.T) { assert.Equal(t, "20220727064247", migration.Version) }) + t.Run("new from gzipped file returns clear error", func(t *testing.T) { + // Run test + migration, err := NewMigrationFromFile("20220727064247_create_table.sql.gz", fs.MapFS{}) + // Check error + assert.ErrorContains(t, err, "compressed SQL files are only supported for seed files") + assert.Nil(t, migration) + }) + t.Run("new from reader errors on max token", func(t *testing.T) { viper.Reset() sql := "\tBEGIN; " + strings.Repeat("a", parser.MaxScannerCapacity) diff --git a/apps/cli-go/pkg/migration/seed_test.go b/apps/cli-go/pkg/migration/seed_test.go index db4337b54c..ec438a6921 100644 --- a/apps/cli-go/pkg/migration/seed_test.go +++ b/apps/cli-go/pkg/migration/seed_test.go @@ -1,16 +1,26 @@ package migration import ( + "bytes" + "compress/gzip" "context" + "crypto/sha256" _ "embed" + "encoding/hex" + "io" + stdfs "io/fs" "os" + "strings" "testing" fs "testing/fstest" + "time" "github.com/jackc/pgerrcode" "github.com/jackc/pgx/v4" + "github.com/spf13/viper" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/supabase/cli/pkg/parser" "github.com/supabase/cli/pkg/pgtest" ) @@ -36,6 +46,139 @@ func TestPendingSeeds(t *testing.T) { assert.False(t, seeds[0].Dirty) }) + t.Run("finds gzipped seeds", func(t *testing.T) { + pending := []string{"testdata/seed.sql.gz"} + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: gzipData(t, testSeed)}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 0") + // Run test + seeds, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.NoError(t, err) + require.Len(t, seeds, 1) + assert.Equal(t, seeds[0].Path, pending[0]) + assert.Equal(t, seeds[0].Hash, "61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3") + assert.False(t, seeds[0].Dirty) + }) + + t.Run("finds dirty gzipped seeds", func(t *testing.T) { + pending := []string{"testdata/seed.sql.gz"} + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: gzipData(t, testSeed)}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 1", SeedFile{Path: pending[0], Hash: "outdated"}) + // Run test + seeds, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.NoError(t, err) + require.Len(t, seeds, 1) + assert.Equal(t, seeds[0].Path, pending[0]) + assert.Equal(t, seeds[0].Hash, "61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3") + assert.True(t, seeds[0].Dirty) + }) + + t.Run("throws error on invalid gzipped seed", func(t *testing.T) { + pending := []string{"testdata/seed.sql.gz"} + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: []byte("not gzip data")}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 0") + // Run test + _, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.ErrorContains(t, err, "failed to decompress seed file") + }) + + t.Run("throws error on truncated gzipped seed", func(t *testing.T) { + pending := []string{"testdata/seed.sql.gz"} + compressed := gzipData(t, testSeed) + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: compressed[:len(compressed)/2]}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 0") + // Run test + _, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.ErrorContains(t, err, "failed to hash file") + assert.ErrorIs(t, err, io.ErrUnexpectedEOF) + }) + + t.Run("throws error when gzipped seed exceeds size limit", func(t *testing.T) { + t.Cleanup(viper.Reset) + viper.Set("SEED_FILE_SIZE_LIMIT", 8) + pending := []string{"testdata/seed.sql.gz"} + fsys := fs.MapFS{ + pending[0]: &fs.MapFile{Data: gzipData(t, testSeed)}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + conn.Query(SELECT_SEED_TABLE). + Reply("SELECT 0") + // Run test + _, err := GetPendingSeeds(context.Background(), pending, conn.MockClient(t), fsys) + // Check error + assert.ErrorContains(t, err, "decompressed seed file exceeds 8 bytes") + }) + + t.Run("parses gzipped seed with large statement", func(t *testing.T) { + t.Cleanup(viper.Reset) + originalMaxScannerCapacity := parser.MaxScannerCapacity + parser.MaxScannerCapacity = 256 * 1024 + t.Cleanup(func() { + parser.MaxScannerCapacity = originalMaxScannerCapacity + }) + path := "testdata/seed.sql.gz" + sql := "select '" + strings.Repeat("a", parser.MaxScannerCapacity) + "'" + fsys := fs.MapFS{ + path: &fs.MapFile{Data: gzipData(t, sql)}, + } + // Run test + lines, err := parseSeedFile(path, fsys) + // Check error + assert.NoError(t, err) + assert.Equal(t, []string{sql}, lines) + }) + + t.Run("hashes gzipped seed from non-seekable fs", func(t *testing.T) { + path := "testdata/seed.sql.gz" + seed, err := NewSeedFile(path, nonSeekFS{ + path: gzipData(t, testSeed), + }) + // Check error + assert.NoError(t, err) + assert.Equal(t, "61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3", seed.Hash) + }) + + t.Run("hashes multistream gzipped seed", func(t *testing.T) { + path := "testdata/seed.sql.gz" + first := "insert into countries(id) values (1);\n" + second := "insert into countries(id) values (2);\n" + seed, err := NewSeedFile(path, fs.MapFS{ + path: &fs.MapFile{Data: append(gzipData(t, first), gzipData(t, second)...)}, + }) + // Check error + assert.NoError(t, err) + assert.Equal(t, hashSQL(first+second), seed.Hash) + }) + t.Run("finds dirty seeds", func(t *testing.T) { // Setup mock postgres conn := pgtest.NewConn() @@ -124,6 +267,28 @@ func TestSeedData(t *testing.T) { // Check error assert.ErrorContains(t, err, `ERROR: null value in column "age" of relation "employees" (SQLSTATE 23502)`) }) + + t.Run("seeds from gzipped file", func(t *testing.T) { + seed := SeedFile{ + Path: "testdata/seed.sql.gz", + Hash: "61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3", + } + fsys := fs.MapFS{ + seed.Path: &fs.MapFile{Data: gzipData(t, testSeed)}, + } + // Setup mock postgres + conn := pgtest.NewConn() + defer conn.Close(t) + mockSeedHistory(conn). + Query(testSeed + `;INSERT INTO supabase_migrations.seed_files(path, hash) VALUES( 'testdata/seed.sql.gz' , '61868484fc0ddca2a2022217629a9fd9a4cf1ca479432046290797d6d40ffcc3' ) ON CONFLICT (path) DO UPDATE SET hash = EXCLUDED.hash`). + Reply("INSERT 0 1") + // Run test + err := SeedData(context.Background(), []SeedFile{seed}, conn.MockClient(t, func(cc *pgx.ConnConfig) { + cc.PreferSimpleProtocol = true + }), fsys) + // Check error + assert.NoError(t, err) + }) } func mockSeedHistory(conn *pgtest.MockConn) *pgtest.MockConn { @@ -174,3 +339,79 @@ func TestSeedGlobals(t *testing.T) { assert.ErrorContains(t, err, `ERROR: database "postgres" does not exist (SQLSTATE 3D000)`) }) } + +func gzipData(t *testing.T, input string) []byte { + t.Helper() + var compressed bytes.Buffer + writer := gzip.NewWriter(&compressed) + _, err := writer.Write([]byte(input)) + require.NoError(t, err) + require.NoError(t, writer.Close()) + return compressed.Bytes() +} + +func hashSQL(sql string) string { + hash := sha256.Sum256([]byte(sql)) + return hex.EncodeToString(hash[:]) +} + +type nonSeekFS map[string][]byte + +func (f nonSeekFS) Open(name string) (stdfs.File, error) { + data, ok := f[name] + if !ok { + return nil, os.ErrNotExist + } + return &nonSeekFile{ + reader: bytes.NewReader(data), + name: name, + size: int64(len(data)), + }, nil +} + +type nonSeekFile struct { + reader *bytes.Reader + name string + size int64 +} + +func (f *nonSeekFile) Read(p []byte) (int, error) { + return f.reader.Read(p) +} + +func (f *nonSeekFile) Close() error { + return nil +} + +func (f *nonSeekFile) Stat() (stdfs.FileInfo, error) { + return fileInfo{name: f.name, size: f.size}, nil +} + +type fileInfo struct { + name string + size int64 +} + +func (f fileInfo) Name() string { + return f.name +} + +func (f fileInfo) Size() int64 { + return f.size +} + +func (f fileInfo) Mode() stdfs.FileMode { + return 0 +} + +func (f fileInfo) ModTime() time.Time { + return time.Time{} +} + +func (f fileInfo) IsDir() bool { + return false +} + +func (f fileInfo) Sys() any { + return nil +} diff --git a/apps/cli/src/shared/init/project-init.templates.ts b/apps/cli/src/shared/init/project-init.templates.ts index 6415e44a22..7a0de0b4e4 100644 --- a/apps/cli/src/shared/init/project-init.templates.ts +++ b/apps/cli/src/shared/init/project-init.templates.ts @@ -67,7 +67,8 @@ schema_paths = [] # If enabled, seeds the database after migrations during a db reset. enabled = true # Specifies an ordered list of seed files to load during db reset. -# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports glob patterns relative to supabase directory: ["./seeds/*.sql", "./seeds/*.sql.gz"] +# Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] [db.network_restrictions] diff --git a/apps/docs/public/cli/config.schema.json b/apps/docs/public/cli/config.schema.json index 28062d0891..7d59025ec7 100644 --- a/apps/docs/public/cli/config.schema.json +++ b/apps/docs/public/cli/config.schema.json @@ -2440,9 +2440,9 @@ "type": "array", "items": { "type": "string", - "description": "Path to a SQL file used to seed the database." + "description": "Path to a SQL or gzipped SQL file used to seed the database." }, - "description": "Ordered list of seed files to load during db reset.", + "description": "Ordered list of seed files to load during db reset. Supports .sql and .sql.gz files.", "default": [ "./seed.sql" ] @@ -5985,9 +5985,9 @@ "type": "array", "items": { "type": "string", - "description": "Path to a SQL file used to seed the database." + "description": "Path to a SQL or gzipped SQL file used to seed the database." }, - "description": "Ordered list of seed files to load during db reset.", + "description": "Ordered list of seed files to load during db reset. Supports .sql and .sql.gz files.", "default": [ "./seed.sql" ] diff --git a/packages/config/src/db.ts b/packages/config/src/db.ts index 1bc6c57c9f..857062a116 100644 --- a/packages/config/src/db.ts +++ b/packages/config/src/db.ts @@ -151,13 +151,14 @@ export const db = Schema.Struct({ }).pipe(Schema.withDecodingDefaultKey(Effect.succeed(defaultSeedEnabled))), sql_paths: Schema.Array( Schema.String.annotate({ - description: "Path to a SQL file used to seed the database.", + description: "Path to a SQL or gzipped SQL file used to seed the database.", tags, }), ) .annotate({ default: defaultSqlPaths, - description: "Ordered list of seed files to load during db reset.", + description: + "Ordered list of seed files to load during db reset. Supports .sql and .sql.gz files.", tags, }) .pipe(Schema.withDecodingDefaultKey(Effect.succeed([...defaultSqlPaths]))), diff --git a/packages/config/testdata/legacy-config.toml b/packages/config/testdata/legacy-config.toml index b228a9c073..5a53bfc571 100644 --- a/packages/config/testdata/legacy-config.toml +++ b/packages/config/testdata/legacy-config.toml @@ -61,7 +61,8 @@ test_key = "test_value" # If enabled, seeds the database after migrations during a db reset. enabled = true # Specifies an ordered list of seed files to load during db reset. -# Supports glob patterns relative to supabase directory: "./seeds/*.sql" +# Supports glob patterns relative to supabase directory: ["./seeds/*.sql", "./seeds/*.sql.gz"] +# Supports gzipped SQL files with ".sql.gz" extension. sql_paths = ["./seed.sql"] [db.network_restrictions]