name: dbt-coder description: dbt (data build tool) patterns for model organization, incremental strategies, and testing. allowed-tools: Read Write Edit Grep Glob Bash

dbt-Coder

Name: dbt Coder - Patterns et Organisation
Author: Skills Guide Bot

Patterns for dbt (data build tool) transform layer development.

Project Structure

my_dbt_project/
├── dbt_project.yml
├── profiles.yml
├── models/
│   ├── staging/          # 1:1 with sources, light transforms
│   │   ├── stg_orders.sql
│   │   └── _staging.yml
│   ├── intermediate/     # Joins, business logic
│   │   └── int_orders_enriched.sql
│   └── marts/            # Final consumption layer
│       ├── finance/
│       │   └── fct_revenue.sql
│       └── marketing/
│           └── dim_customers.sql
├── seeds/                # Static lookup data
├── snapshots/            # SCD Type 2
├── macros/               # Reusable SQL
└── tests/                # Custom tests

dbt_project.yml

name: 'my_project'
version: '1.0.0'
config-version: 2

profile: 'my_project'

model-paths: ["models"]
seed-paths: ["seeds"]
test-paths: ["tests"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

models:
  my_project:
    staging:
      +materialized: view
      +schema: staging
    intermediate:
      +materialized: ephemeral
    marts:
      +materialized: table
      +schema: marts

Staging Models

-- models/staging/stg_orders.sql
-- Naming: stg_<source>_<entity>

with source as (
    select * from {{ source('raw', 'orders') }}
),

renamed as (
    select
        -- Rename to consistent naming
        id as order_id,
        customer_id,
        order_date,
        total_amount as order_total,

        -- Type casting
        cast(status as varchar(50)) as order_status,

        -- Timestamps
        created_at,
        updated_at
    from source
)

select * from renamed

Source Definition

# models/staging/_sources.yml
version: 2

sources:
  - name: raw
    database: raw_db
    schema: public
    freshness:
      warn_after: {count: 12, period: hour}
      error_after: {count: 24, period: hour}
    tables:
      - name: orders
        identifier: orders_table
        columns:
          - name: id
            tests:
              - unique
              - not_null
      - name: customers

Intermediate Models

-- models/intermediate/int_orders_enriched.sql
-- Join staging models, apply business logic

with orders as (
    select * from {{ ref('stg_orders') }}
),

customers as (
    select * from {{ ref('stg_customers') }}
),

products as (
    select * from {{ ref('stg_products') }}
)

select
    o.order_id,
    o.order_date,
    o.order_total,

    c.customer_id,
    c.customer_name,
    c.customer_segment,

    -- Business logic
    case
        when o.order_total >= 1000 then 'high_value'
        when o.order_total >= 100 then 'medium_value'
        else 'low_value'
    end as order_tier

from orders o
left join customers c on o.customer_id = c.customer_id

Mart Models

-- models/marts/finance/fct_revenue.sql
-- Final aggregated fact table

{{ config(
    materialized='table',
    partition_by={
      "field": "order_date",
      "data_type": "date",
      "granularity": "month"
    }
) }}

with orders as (
    select * from {{ ref('int_orders_enriched') }}
)

select
    date_trunc('day', order_date) as revenue_date,
    customer_segment,
    order_tier,
    count(*) as order_count,
    sum(order_total) as total_revenue,
    avg(order_total) as avg_order_value
from orders
group by 1, 2, 3

Incremental Models

-- models/marts/fct_events.sql
{{ config(
    materialized='incremental',
    unique_key='event_id',
    incremental_strategy='merge'  -- or 'delete+insert', 'append'
) }}

select
    event_id,
    user_id,
    event_type,
    event_timestamp,
    properties
from {{ source('raw', 'events') }}

{% if is_incremental() %}
    -- Only new/updated rows since last run
    where event_timestamp > (select max(event_timestamp) from {{ this }})
{% endif %}

Snapshots (SCD Type 2)

-- snapshots/snap_customers.sql
{% snapshot snap_customers %}

{{
    config(
      target_schema='snapshots',
      unique_key='customer_id',
      strategy='timestamp',
      updated_at='updated_at',
    )
}}

select * from {{ source('raw', 'customers') }}

{% endsnapshot %}

Tests

# models/marts/_schema.yml
version: 2

models:
  - name: fct_revenue
    description: Daily revenue aggregations
    columns:
      - name: revenue_date
        tests:
          - not_null
      - name: total_revenue
        tests:
          - not_null
          - dbt_utils.accepted_range:
              min_value: 0

    tests:
      # Model-level tests
      - dbt_utils.unique_combination_of_columns:
          combination_of_columns:
            - revenue_date
            - customer_segment
            - order_tier

Custom Tests

-- tests/assert_positive_revenue.sql
-- Returns rows that fail the test

select
    revenue_date,
    total_revenue
from {{ ref('fct_revenue') }}
where total_revenue < 0

Macros

-- macros/cents_to_dollars.sql
{% macro cents_to_dollars(column_name) %}
    round({{ column_name }} / 100.0, 2)
{% endmacro %}

-- Usage in model:
-- select {{ cents_to_dollars('amount_cents') }} as amount_dollars

-- macros/generate_schema_name.sql
{% macro generate_schema_name(custom_schema_name, node) %}
    {% if custom_schema_name %}
        {{ custom_schema_name }}
    {% else %}
        {{ target.schema }}
    {% endif %}
{% endmacro %}

dbt Commands

# Run all models
dbt run

# Run specific model and dependencies
dbt run --select fct_revenue+

# Run models with tag
dbt run --select tag:finance

# Test all
dbt test

# Generate docs
dbt docs generate
dbt docs serve

# Freshness check
dbt source freshness

# Full refresh of incremental
dbt run --full-refresh --select fct_events

# Build (run + test)
dbt build

Best Practices

# 1. Use ref() for model references
# BAD:  select * from schema.stg_orders
# GOOD: select * from {{ ref('stg_orders') }}

# 2. Use source() for raw tables
# BAD:  select * from raw_db.orders
# GOOD: select * from {{ source('raw', 'orders') }}

# 3. Document models
models:
  - name: fct_revenue
    description: |
      Daily revenue by segment. Grain: one row per day/segment/tier.
      Updated daily by the finance_dag.
    meta:
      owner: data-team
      pii: false

Packages

# packages.yml
packages:
  - package: dbt-labs/dbt_utils
    version: 1.1.1
  - package: dbt-labs/codegen
    version: 0.12.1
  - package: calogica/dbt_expectations
    version: 0.10.1

# Install packages
dbt deps

dbt Coder - Patterns et Organisation

Recommandé pour

Notre avis

Points forts

Limites

Analyse de sécurité

Exemples

name: dbt-coder description: dbt (data build tool) patterns for model organization, incremental strategies, and testing. allowed-tools: Read Write Edit Grep Glob Bash

dbt-Coder

Project Structure

dbt_project.yml

Staging Models

Source Definition

Intermediate Models

Mart Models

Incremental Models

Snapshots (SCD Type 2)

Tests

Custom Tests

Macros

dbt Commands

Best Practices

Packages

Ingénierie de Prompts

Visualisation de Données

Architecture RAG