All materials
dvc-config-template.yaml
yamldvc-config-template.yaml
# DVC Pipeline Configuration Template
# Fill in the placeholder paths to match your project structure.
# Each stage defines: command to run, dependencies, and outputs to track.
stages:
prepare:
# Reads raw CSVs and outputs cleaned, validated data
cmd: python src/prepare.py
deps:
- src/prepare.py
- data/raw/transactions.csv
- data/raw/products.csv
- data/raw/customers.csv
outs:
- data/prepared/transactions_clean.csv
- data/prepared/products_clean.csv
- data/prepared/customers_clean.csv
features:
# Reads cleaned data and outputs versioned feature matrices
# This is the feature computation stage -- separated from training
cmd: python src/features.py
deps:
- src/features.py
- data/prepared/transactions_clean.csv
- data/prepared/products_clean.csv
- data/prepared/customers_clean.csv
outs:
- data/features/tabular_features.csv
- data/features/embedding_features.npy
- data/features/combined_features.npy
train:
# Reads features and outputs model artifacts
# Training reads from the feature store -- it never computes features
cmd: python src/train.py
deps:
- src/train.py
- data/features/tabular_features.csv
- data/features/combined_features.npy
outs:
- models/recommendation_model.pkl
metrics:
- metrics/train_metrics.json:
cache: false