diff --git a/README.md b/README.md index d0b4776..c90a2da 100644 --- a/README.md +++ b/README.md @@ -1,325 +1,177 @@ -# Segment Event Transformation dbt Package +# Segment Product Analytics -This dbt package unifies your Segment event data into a consistent schema, handling both standard Segment tables (tracks, pages, identifies) and custom event tables. +This dbt project helps analyze Segment event data by unifying all event tables (tracks, pages, identifies) into a single consistent schema, making it easier to analyze user behavior and product usage. -## 🎯 What This Package Does +## Prerequisites -1. Discovers all your Segment event tables automatically -2. Unifies them into a single consistent `allevents` schema -3. Properly handles context properties and user properties -4. Works with both BigQuery and Snowflake -5. Supports custom event tables automatically +- Python 3.8 or higher +- Access to either BigQuery or Snowflake data warehouse +- Segment data exported to your data warehouse -## 📋 Prerequisites - -1. dbt installed (version 1.0.0 or higher) -2. Access to your Segment data in either: - - BigQuery - - Snowflake -3. The following Segment tables should exist in your warehouse: - - tracks - - pages (optional) - - identifies (optional) - - Any custom event tables - -## 🔐 Setting Up Warehouse Credentials - -### BigQuery Setup - -1. **Authentication Methods** - - BigQuery supports two authentication methods: - - a) **OAuth (Recommended for Local Development)** - ```bash - # Install Google Cloud SDK - brew install google-cloud-sdk # macOS - # OR visit https://cloud.google.com/sdk/docs/install for other OS - - # Login to Google Cloud - gcloud auth application-default login - - # Set your project - gcloud config set project your-project-id - ``` - - b) **Service Account (Recommended for Production)** - - Go to Google Cloud Console > IAM & Admin > Service Accounts - - Create a new service account or select existing - - Grant required roles: `BigQuery Data Viewer`, `BigQuery User` - - Create and download JSON key - - Save the JSON key file securely - - Update your profile: - ```yaml - # ~/.dbt/profiles.yml - your_project: - target: dev - outputs: - dev: - type: bigquery - method: service-account - project: your-project-id - dataset: your_segment_dataset - location: US - threads: 4 - keyfile: /path/to/your/keyfile.json # Full path to your keyfile - ``` - -### Snowflake Setup - -1. **Create a Snowflake User and Role** - ```sql - -- Run in Snowflake as ACCOUNTADMIN - -- Create role - CREATE ROLE IF NOT EXISTS TRANSFORMER_ROLE; - GRANT USAGE ON WAREHOUSE your_warehouse TO ROLE TRANSFORMER_ROLE; - GRANT USAGE ON DATABASE your_database TO ROLE TRANSFORMER_ROLE; - GRANT USAGE ON SCHEMA your_database.your_schema TO ROLE TRANSFORMER_ROLE; - GRANT SELECT ON ALL TABLES IN SCHEMA your_database.your_schema TO ROLE TRANSFORMER_ROLE; - - -- Create user - CREATE USER IF NOT EXISTS dbt_user - PASSWORD = 'your-secure-password' - DEFAULT_ROLE = TRANSFORMER_ROLE - DEFAULT_WAREHOUSE = your_warehouse; - - -- Grant role to user - GRANT ROLE TRANSFORMER_ROLE TO USER dbt_user; - ``` - -2. **Configure Credentials** - - Choose one of these methods: - - a) **Direct in profiles.yml (Development Only)** - ```yaml - # ~/.dbt/profiles.yml - your_project(name it `segment_product_analytics`): - target: dev - outputs: - dev: - type: snowflake - account: your-account - user: dbt_user - password: your-secure-password - role: TRANSFORMER_ROLE - database: your_database - warehouse: your_warehouse - schema: your_schema - threads: 4 - ``` - - b) **Using Environment Variables (Recommended)** - ```yaml - # ~/.dbt/profiles.yml - your_project(name it `segment_product_analytics`): - target: dev - outputs: - dev: - type: snowflake - account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}" - user: "{{ env_var('SNOWFLAKE_USER') }}" - password: "{{ env_var('SNOWFLAKE_PASSWORD') }}" - role: "{{ env_var('SNOWFLAKE_ROLE') }}" - database: "{{ env_var('SNOWFLAKE_DATABASE') }}" - warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}" - schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}" - threads: 4 - ``` - - Then set in your shell: - ```bash - export SNOWFLAKE_ACCOUNT=your-account - export SNOWFLAKE_USER=dbt_user - export SNOWFLAKE_PASSWORD=your-secure-password - export SNOWFLAKE_ROLE=TRANSFORMER_ROLE - export SNOWFLAKE_DATABASE=your_database - export SNOWFLAKE_WAREHOUSE=your_warehouse - export SNOWFLAKE_SCHEMA=your_schema - ``` - -### Verifying Your Setup - -After setting up credentials, verify your connection: +## Installation +1. Clone this repository: ```bash -# Test your connection -dbt debug - -# If successful, you should see something like: -# Connection test: OK +git clone [repository-url] +cd segment-product-analytics ``` -## Quick Start - -1. **Setup Environment** +2. Create and activate a virtual environment: +```bash +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +``` - ```bash - # Create and activate virtual environment - python -m venv .venv - source .venv/bin/activate # On Windows, use `.venv\Scripts\activate` - - # Install dbt - pip install dbt-core - # Install warehouse-specific adapter - pip install dbt-bigquery # for BigQuery - # OR - pip install dbt-snowflake # for Snowflake - ``` +3. Install dbt and the required adapter: +```bash +# Install dbt +pip install dbt -2. **Install the Package** +# For BigQuery +pip install dbt-bigquery - Add to your `packages.yml`: - ```yaml - packages: - - git: "https://github.com/your-org/segment-product-analytics.git" - revision: main - ``` +# For Snowflake +pip install dbt-snowflake +``` -3. **Configure Your dbt Profile** - - In `~/.dbt/profiles.yml`: - - For BigQuery: - ```yaml - your_project: - target: dev - outputs: - dev: - type: bigquery - method: oauth # or service_account - project: your-project-id - dataset: your_segment_dataset - location: US # adjust as needed - threads: 4 - ``` +## Configuration - For Snowflake: - ```yaml - your_project: - target: dev - outputs: - dev: - type: snowflake - account: your-account - user: your-user - password: your-password - role: your-role - database: your-database - warehouse: your-warehouse - schema: your-schema - threads: 4 - ``` +### BigQuery Setup -4. **Configure the Package** +1. Set up Google Cloud credentials: + - Create a service account in Google Cloud Console + - Download the JSON key file + - Save it securely on your machine - In your `dbt_project.yml`: - ```yaml - vars: - segment_product_analytics: - segment_schema: "your_segment_schema" # Schema where your Segment tables live - ``` +2. Configure your `~/.dbt/profiles.yml`: +```yaml +segment_product_analytics: + target: dev + outputs: + dev: + type: bigquery + method: service-account + project: [your-project-id] + dataset: [your-dataset] + location: US # or your preferred location + threads: 4 + timeout_seconds: 300 + priority: interactive + keyfile: /path/to/your/service-account.json # Path to your downloaded key file +``` -5. **Run the Transformation** +### Snowflake Setup +1. Set up Snowflake key pair authentication: ```bash - # Install dbt dependencies - dbt deps - - # Run the transformation - dbt run --select segment_product_analytics.allevents + # Generate private key + openssl genrsa 2048 | openssl pkcs8 -topk8 -inform PEM -out rsa_key.p8 -nocrypt + + # Generate public key + openssl rsa -in rsa_key.p8 -pubout -out rsa_key.pub ``` -## 📊 Output Schema - -The package creates a table called `allevents` with the following schema: +2. In Snowflake, create a user and assign the public key: + ```sql + CREATE USER dbt_user; + ALTER USER dbt_user SET RSA_PUBLIC_KEY=''; + ``` -```sql -device_id STRING, -- Anonymous ID from Segment -user_id STRING, -- User ID if identified -event_name STRING, -- Name of the event -event_id STRING, -- Unique event identifier -server_ts TIMESTAMP, -- Server-side timestamp -device_ts TIMESTAMP, -- Client-side timestamp -properties JSON, -- All context properties -user_properties JSON -- User properties (from identifies) +3. Configure your `~/.dbt/profiles.yml`: +```yaml +segment_product_analytics: + target: dev + outputs: + dev: + type: snowflake + account: [your-account-id] # e.g., xy12345.us-east-1 + user: dbt_user + private_key_path: [path/to/rsa_key.p8] + role: [your-role] + database: [your-database] + warehouse: [your-warehouse] + schema: [your-schema] + threads: 4 + client_session_keep_alive: True ``` -## 🔍 Handling Custom Events - -The package automatically discovers and processes any custom event tables in your Segment schema. For example, if you have a custom event table called `button_clicked`, it will: +## Project Configuration -1. Automatically discover the table -2. Extract all CONTEXT_* fields into the properties JSON -3. Set the event_name to either: - - The EVENT column value if it exists - - The table name if no EVENT column - -## 🛠️ Customization - -### Custom Event Name Mapping - -If you need to customize event names, you can override the default naming in your `dbt_project.yml`: +1. Update the source configuration in `models/staging/segment_sources.yml`: +```yaml +sources: + - name: segment_export # This name is used to reference the source in models + database: [your-database] # e.g., LD_PRODUCT_ANALYTICS + schema: [your-schema] # e.g., SEGMENT_EXPORT_SAMPLE + tables: + - name: tracks + # The identifier handles case sensitivity differences between BigQuery and Snowflake + # BigQuery uses lowercase, Snowflake uses uppercase by default + identifier: "{% if target.type == 'bigquery' %}tracks{% else %}TRACKS{% endif %}" + - name: pages + identifier: "{% if target.type == 'bigquery' %}pages{% else %}PAGES{% endif %}" + - name: identifies + identifier: "{% if target.type == 'bigquery' %}identifies{% else %}IDENTIFIES{% endif %}" +``` +2. Update variables in `dbt_project.yml`: ```yaml vars: segment_product_analytics: - event_name_overrides: - button_clicked: "User Button Click" - page_loaded: "Page View" + # This variable is used by the union_event_tables macro to determine which schema + # contains your Segment data. It should match the schema in your source definition above. + segment_schema: [your-schema] # e.g., SEGMENT_EXPORT_SAMPLE ``` -### Adding Tests - -Create a file `models/staging/segment_product_analytics.yml`: +## Running the Project -```yaml -version: 2 - -models: - - name: allevents - columns: - - name: event_id - tests: - - unique - - not_null - - name: server_ts - tests: - - not_null +1. Test the connection: +```bash +dbt debug ``` -## 🔍 Troubleshooting +2. Run the models: +```bash +# For BigQuery +dbt run --target bigquery + +# For Snowflake +dbt run --target snowflake +``` -1. **Missing Tables** - - Ensure your Segment schema is correctly specified - - Check table permissions - - Verify table names match expected casing +## Project Structure -2. **JSON Errors** - - BigQuery: Ensure proper JSON formatting - - Snowflake: Check OBJECT_CONSTRUCT usage +- `macros/`: Contains reusable SQL macros + - `union_event_tables.sql`: Unifies all event tables into a single schema + - `get_event_tables.sql`: Discovers custom event tables +- `models/`: Contains dbt models + - `allevents.sql`: Main model that combines all events + - `staging/`: Contains source definitions and staging models -3. **Performance Issues** - - Consider partitioning by server_ts - - Add appropriate clustering keys - - Optimize JSON queries +## Notes for Snowflake Users -## 📝 Development +1. The project uses `VARIANT` type for JSON columns in Snowflake +2. Table and column names are automatically converted to uppercase +3. Make sure your Snowflake role has access to: + - The database and schema containing Segment data + - The ability to create tables in the target schema + - The ability to read from the source tables -To contribute: +## Troubleshooting -1. Clone the repository -2. Install dependencies: `dbt deps` -3. Make changes -4. Add tests -5. Submit a PR +1. If you get a "Field 'path' of type Path in SnowflakeRelation has invalid value" error: + - Check that your schema and database names are correct + - Ensure case sensitivity matches your Snowflake configuration -## 🤝 Need Help? +2. If you get a "data type mismatch for column 'USER_PROPERTIES'" error: + - Make sure you're using the latest version of the macros + - Verify that `object_construct()` results are cast to `VARIANT` -- Open an issue -- Check existing documentation -- Contact maintainers +3. For connection issues: + - Verify your credentials in `~/.dbt/profiles.yml` + - Check that your IP is whitelisted in Snowflake + - Ensure your service account has the correct permissions in BigQuery -## 📜 License +## Contributing -MIT +Feel free to submit issues and enhancement requests! diff --git a/dbt_project.yml b/dbt_project.yml index ff5b926..50b56ec 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -32,4 +32,4 @@ models: vars: segment_product_analytics: - segment_schema: "test_segment_export" # This is where your Segment tables are located + segment_schema: "SEGMENT_EXPORT_SAMPLE" # This is where your Segment tables are located diff --git a/macros/build_point_in_time_user_properties.sql b/macros/build_point_in_time_user_properties.sql deleted file mode 100644 index 0fbd339..0000000 --- a/macros/build_point_in_time_user_properties.sql +++ /dev/null @@ -1,99 +0,0 @@ -{# - This macro builds point-in-time user properties by maintaining cumulative state. - For each user, it tracks all their properties over time, keeping the last non-null value for each property. - - Args: - schema: The schema containing Segment tables - database: The database to search in (defaults to target.database) - - Returns: - A SQL query that generates user properties as they existed at each identify call -#} -{% macro build_point_in_time_user_properties(schema, database=target.database) %} - {% set identifies_relation = source(schema, 'identifies') %} - {% set cols = adapter.get_columns_in_relation(identifies_relation) %} - {% set user_property_columns = [] %} - - {# Get all user property columns from identifies table #} - {% for col in cols %} - {% if not col.column.startswith('CONTEXT_') and col.column not in ['ANONYMOUS_ID', 'USER_ID', 'ID', 'TIMESTAMP', 'SENT_AT', 'RECEIVED_AT', 'ORIGINAL_TIMESTAMP', 'UUID_TS', 'EVENT', 'EVENT_TEXT'] %} - {% do user_property_columns.append(col.column) %} - {% endif %} - {% endfor %} - - {% if target.type == 'bigquery' %} - WITH user_property_changes AS ( - SELECT - USER_ID as user_id, - TIMESTAMP as event_ts, - ROW_NUMBER() OVER (PARTITION BY USER_ID, TIMESTAMP ORDER BY RECEIVED_AT DESC) as rn_same_ts, - {% for column in user_property_columns %} - LAST_VALUE({{ column }}) OVER ( - PARTITION BY USER_ID - ORDER BY TIMESTAMP, RECEIVED_AT - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW - ) as {{ column | lower }} - {%- if not loop.last %},{% endif -%} - {% endfor %} - FROM {{ identifies_relation }} - WHERE USER_ID IS NOT NULL - ), - deduplicated_changes AS ( - SELECT - user_id, - event_ts, - TO_JSON_STRING( - STRUCT( - {% for column in user_property_columns %} - SAFE_CAST({{ column | lower }} AS STRING) as {{ column | lower }} - {%- if not loop.last %},{% endif -%} - {% endfor %} - ) - ) as user_properties - FROM user_property_changes - WHERE rn_same_ts = 1 - ) - SELECT - user_id, - event_ts, - user_properties - FROM deduplicated_changes - {% else %} - WITH user_property_changes AS ( - SELECT - USER_ID as user_id, - TIMESTAMP as event_ts, - ROW_NUMBER() OVER (PARTITION BY USER_ID, TIMESTAMP ORDER BY RECEIVED_AT DESC) as rn_same_ts, - {% for column in user_property_columns %} - LAST_VALUE({{ column }}) OVER ( - PARTITION BY USER_ID - ORDER BY TIMESTAMP, RECEIVED_AT - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW - ) as {{ column | lower }} - {%- if not loop.last %},{% endif -%} - {% endfor %} - FROM {{ identifies_relation }} - WHERE USER_ID IS NOT NULL - ), - deduplicated_changes AS ( - SELECT - user_id, - event_ts, - TO_VARCHAR( - object_construct( - {% for column in user_property_columns %} - '{{ column | lower }}', TO_VARCHAR({{ column | lower }}) - {%- if not loop.last %}, {% endif -%} - {% endfor %} - ) - ) as user_properties - FROM user_property_changes - WHERE rn_same_ts = 1 - ) - SELECT - user_id, - event_ts, - user_properties - FROM deduplicated_changes - {% endif %} -{% endmacro %} \ No newline at end of file diff --git a/macros/get_event_tables.sql b/macros/get_event_tables.sql index ac3e737..eaa8c24 100644 --- a/macros/get_event_tables.sql +++ b/macros/get_event_tables.sql @@ -33,6 +33,7 @@ {% set relations = [] %} {% if execute %} + {{ log("Found tables: " ~ results.rows, info=True) }} {% for row in results %} {% set relation = api.Relation.create( database=database, diff --git a/macros/union_event_tables.sql b/macros/union_event_tables.sql index 5acfe77..9bab2c8 100644 --- a/macros/union_event_tables.sql +++ b/macros/union_event_tables.sql @@ -11,20 +11,42 @@ database: The database to search in (defaults to target.database) Returns: - A SQL query that unions all event tables with consistent column names and types + A dict with: + - sql: SQL query that unions all event tables with consistent column names and types + - user_columns: list of user property columns from identifies table #} {% macro union_event_tables(schema, database=target.database) %} - {%- set event_relations = get_event_tables(schema=schema) -%} - {# Prevent querying of db in parsing mode. This works because this macro does not create any new refs. -#} - {%- if not execute -%} - {{ return('') }} + {%- set custom_event_relations = get_event_tables(schema=schema) -%} + {%- if not execute -%} + {{ return({'sql': '', 'user_columns': []}) }} {%- endif -%} + {{ log("Custom event relations: " ~ custom_event_relations, info=True) }} + + {%- set user_columns = [] -%} + {%- set union_sqls = [] -%} + + {# Always include standard tables #} + {%- set standard_table_names = ['tracks', 'pages', 'identifies'] -%} + {%- set standard_event_relations = [] -%} + {%- for table_name in standard_table_names -%} + {%- set relation = api.Relation.create( + database=database, + schema=schema, + identifier=table_name if target.type == 'bigquery' else table_name.upper() + ) -%} + {%- do standard_event_relations.append(relation) -%} + {%- endfor -%} + + {{ log("Standard event relations: " ~ standard_event_relations, info=True) }} + + {# Combine standard and custom event tables #} + {%- set event_relations = standard_event_relations + custom_event_relations -%} + {%- for relation in event_relations -%} {%- set cols = adapter.get_columns_in_relation(relation) -%} {%- set context_columns = [] -%} {%- set custom_columns = [] -%} - {%- set user_columns = [] -%} {%- set has_event_text = false -%} {%- set excluded_columns = ['ANONYMOUS_ID', 'USER_ID', 'ID', 'TIMESTAMP', 'SENT_AT', 'RECEIVED_AT', 'ORIGINAL_TIMESTAMP', 'UUID_TS', 'EVENT', 'EVENT_TEXT'] -%} {%- for col in cols -%} @@ -34,7 +56,7 @@ {%- if col.column.startswith('CONTEXT_') -%} {%- do context_columns.append(col.column) -%} {%- elif not col.column.startswith('CONTEXT_') and col.column not in excluded_columns -%} - {%- if relation.identifier == 'identifies' -%} + {%- if relation.identifier | lower == 'identifies' -%} {%- do user_columns.append(col.column) -%} {%- else -%} {%- do custom_columns.append(col.column) -%} @@ -42,27 +64,29 @@ {%- endif -%} {%- endfor %} - SELECT + {{ log("Processing table: " ~ relation.identifier ~ " with event_text: " ~ has_event_text, info=True) }} + + {%- set sql -%} + (SELECT ANONYMOUS_ID as device_id, USER_ID as user_id, - {% if relation.identifier == 'tracks' %} + {% if relation.identifier | lower == 'tracks' %} {% if has_event_text %} EVENT_TEXT {% else %} EVENT {% endif %} - {% elif relation.identifier == 'pages' %} + {% elif relation.identifier | lower == 'pages' %} 'Page Viewed' - {% elif relation.identifier == 'identifies' %} + {% elif relation.identifier | lower == 'identifies' %} 'User Identified' {% else %} COALESCE(EVENT, '{{ relation.identifier }}') {% endif %} as event_name, - ID as event_id, + MESSAGE_ID as event_id, TIMESTAMP as server_ts, SENT_AT as device_ts, {% if target.type == 'bigquery' %} - -- Merge context fields and custom properties into properties JSON TO_JSON( STRUCT( {% for column in context_columns %} @@ -75,8 +99,7 @@ {% endfor %} ) ) as properties, - -- For identifies table, collect user traits into user_properties - {% if relation.identifier == 'identifies' %} + {% if relation.identifier | lower == 'identifies' %} TO_JSON( STRUCT( {% for column in user_columns %} @@ -89,32 +112,33 @@ JSON '{}' as user_properties {% endif %} {% else %} - -- Snowflake version object_construct( {%- for column in context_columns -%} '{{ column[8:] | lower }}', {{ column }} - {%- if not loop.last -%}, {% endif -%} + {%- if not loop.last or custom_columns -%}, {% endif -%} {%- endfor -%} - {%- if context_columns and custom_columns -%}, {% endif -%} {%- for column in custom_columns -%} '{{ column | lower }}', {{ column }} {%- if not loop.last -%}, {% endif -%} {%- endfor -%} - ) as properties, - {% if relation.identifier == 'identifies' %} + )::VARIANT as properties, + {% if relation.identifier | lower == 'identifies' %} object_construct( {%- for column in user_columns -%} '{{ column | lower }}', {{ column }} {%- if not loop.last -%}, {% endif -%} {%- endfor -%} - ) as user_properties + )::VARIANT as user_properties {% else %} - object_construct() as user_properties + object_construct()::VARIANT as user_properties {% endif %} {% endif %} FROM {{ relation }} - {% if not loop.last -%} - UNION ALL - {%- endif -%} + LIMIT 1000) + {%- endset %} + {%- do union_sqls.append(sql) -%} {%- endfor -%} + + {%- set union_sql = union_sqls | join('\nUNION ALL\n') -%} + {{ return({'sql': union_sql, 'user_columns': user_columns}) }} {%- endmacro -%} diff --git a/models/allevents.sql b/models/allevents.sql index e2e74c4..fc96c07 100644 --- a/models/allevents.sql +++ b/models/allevents.sql @@ -1,12 +1,16 @@ {{ config(materialized='table') }} +{% set union_result = union_event_tables(schema=var('segment_schema')) %} +{% set user_columns = union_result['user_columns'] %} + with union_event_stream as ( - {{ union_event_tables(schema=var('segment_schema'))}} + {{ union_result['sql'] }} ), user_properties_timeline as ( SELECT USER_ID as user_id, + ANONYMOUS_ID as anonymous_id, TIMESTAMP as event_ts, {% if target.type == 'bigquery' %} TO_JSON( @@ -25,8 +29,8 @@ user_properties_timeline as ( {%- endfor -%} ) as user_properties {% endif %} - FROM {{ source(var('segment_schema'), 'identifies') }} - WHERE USER_ID IS NOT NULL + FROM {{ source('segment_schema', 'identifies') }} + WHERE USER_ID IS NOT NULL OR ANONYMOUS_ID IS NOT NULL ) SELECT @@ -50,5 +54,5 @@ SELECT END as user_properties FROM union_event_stream e LEFT JOIN user_properties_timeline up - ON up.user_id = e.user_id + ON (up.user_id = e.user_id OR up.anonymous_id = e.device_id) AND up.event_ts <= e.server_ts diff --git a/models/schema.yml b/models/schema.yml deleted file mode 100644 index 791a4b2..0000000 --- a/models/schema.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: 2 - -sources: - - name: "{{ var('segment_schema') }}" - tables: - - name: tracks - identifier: "{% if target.type == 'bigquery' %}tracks{% else %}TRACKS{% endif %}" - - name: pages - identifier: "{% if target.type == 'bigquery' %}pages{% else %}PAGES{% endif %}" - - name: identifies - identifier: "{% if target.type == 'bigquery' %}identifies{% else %}IDENTIFIES{% endif %}" - - name: ADD_NEW_BLOCK_CLICKED diff --git a/models/staging/segment_sources.yml b/models/staging/segment_sources.yml index 74e09f1..766bacc 100644 --- a/models/staging/segment_sources.yml +++ b/models/staging/segment_sources.yml @@ -1,9 +1,9 @@ version: 2 sources: - - name: segment_export - database: ld-product-analytics-dev - schema: test_segment_export + - name: segment_schema + database: LD_PRODUCT_ANALYTICS + schema: SEGMENT_EXPORT_SAMPLE tables: - name: tracks identifier: "{% if target.type == 'bigquery' %}tracks{% else %}TRACKS{% endif %}" diff --git a/~/.dbt/profiles.yml b/~/.dbt/profiles.yml deleted file mode 100644 index 1e6f97e..0000000 --- a/~/.dbt/profiles.yml +++ /dev/null @@ -1,12 +0,0 @@ -segment_product_analytics: - target: dev - outputs: - dev: - type: bigquery - method: oauth - project: ld-product-analytics-dev - dataset: ld_product_analytics_segment__environment - threads: 4 - timeout_seconds: 300 - location: US - priority: interactive \ No newline at end of file