Learn by Directing AI
All materials

dvc-config-template.yaml

yamldvc-config-template.yaml
# DVC Pipeline Configuration Template
# Fill in the placeholder paths to match your project structure.
# Each stage defines: command to run, dependencies, and outputs to track.

stages:
  prepare:
    # Reads raw CSVs and outputs cleaned, validated data
    cmd: python src/prepare.py
    deps:
      - src/prepare.py
      - data/raw/transactions.csv
      - data/raw/products.csv
      - data/raw/customers.csv
    outs:
      - data/prepared/transactions_clean.csv
      - data/prepared/products_clean.csv
      - data/prepared/customers_clean.csv

  features:
    # Reads cleaned data and outputs versioned feature matrices
    # This is the feature computation stage -- separated from training
    cmd: python src/features.py
    deps:
      - src/features.py
      - data/prepared/transactions_clean.csv
      - data/prepared/products_clean.csv
      - data/prepared/customers_clean.csv
    outs:
      - data/features/tabular_features.csv
      - data/features/embedding_features.npy
      - data/features/combined_features.npy

  train:
    # Reads features and outputs model artifacts
    # Training reads from the feature store -- it never computes features
    cmd: python src/train.py
    deps:
      - src/train.py
      - data/features/tabular_features.csv
      - data/features/combined_features.npy
    outs:
      - models/recommendation_model.pkl
    metrics:
      - metrics/train_metrics.json:
          cache: false