All materials
feature_pipeline.py
pyfeature_pipeline.py
"""Feature engineering pipeline for coffee yield prediction.
Transforms raw sensor input into model-ready features.
Applies normalization using hardcoded ranges from training data
and creates derived features.
"""
import torch
# Normalization ranges from training data
# Format: (min_value, max_value)
FEATURE_RANGES = {
"temperature": (15.0, 35.0),
"rainfall": (0.0, 80.0),
"soil_moisture": (10.0, 90.0),
"humidity": (40.0, 100.0),
"altitude": (1200.0, 2000.0),
}
REQUIRED_FIELDS = [
"farm_id",
"temperature",
"rainfall",
"soil_moisture",
"humidity",
"altitude",
]
def _normalize(value: float, min_val: float, max_val: float) -> float:
"""Min-max normalize a value to [0, 1] range."""
if max_val == min_val:
return 0.5
return (value - min_val) / (max_val - min_val)
def _validate_input(raw_data: dict) -> None:
"""Validate that all required fields are present."""
missing = [f for f in REQUIRED_FIELDS if f not in raw_data]
if missing:
raise ValueError(f"Missing required fields: {missing}")
def prepare_features(raw_data: dict) -> torch.Tensor:
"""Transform raw sensor input into model-ready tensor.
Takes a dictionary of raw sensor values, validates fields,
normalizes values using training data ranges, and creates
derived features.
Args:
raw_data: Dictionary with keys: farm_id, temperature,
rainfall, soil_moisture, humidity, altitude.
Returns:
torch.Tensor of shape (1, 8) with normalized features:
[temperature, rainfall, soil_moisture, humidity, altitude,
temperature_x_rainfall (interaction),
temperature_rainfall_ratio (derived),
moisture_humidity_index (derived)]
"""
_validate_input(raw_data)
# Extract and normalize raw features
temperature = _normalize(
raw_data["temperature"],
FEATURE_RANGES["temperature"][0],
FEATURE_RANGES["temperature"][1],
)
rainfall = _normalize(
raw_data["rainfall"],
FEATURE_RANGES["rainfall"][0],
FEATURE_RANGES["rainfall"][1],
)
soil_moisture = _normalize(
raw_data["soil_moisture"],
FEATURE_RANGES["soil_moisture"][0],
FEATURE_RANGES["soil_moisture"][1],
)
humidity = _normalize(
raw_data["humidity"],
FEATURE_RANGES["humidity"][0],
FEATURE_RANGES["humidity"][1],
)
altitude = _normalize(
raw_data["altitude"],
FEATURE_RANGES["altitude"][0],
FEATURE_RANGES["altitude"][1],
)
# Interaction feature
temperature_x_rainfall = temperature * rainfall
# Derived features
if rainfall > 0:
temperature_rainfall_ratio = temperature / (rainfall + 0.01)
else:
temperature_rainfall_ratio = temperature / 0.01
moisture_humidity_index = (soil_moisture + humidity) / 2.0
# Assemble feature vector: 6 raw + 2 derived = 8 features
features = [
temperature,
rainfall,
soil_moisture,
humidity,
altitude,
temperature_x_rainfall,
temperature_rainfall_ratio,
moisture_humidity_index,
]
return torch.tensor([features], dtype=torch.float32)