Learn by Directing AI
All materials

feature_pipeline.py

pyfeature_pipeline.py
"""Feature engineering pipeline for coffee yield prediction.

Transforms raw sensor input into model-ready features.
Applies normalization using hardcoded ranges from training data
and creates derived features.
"""

import torch


# Normalization ranges from training data
# Format: (min_value, max_value)
FEATURE_RANGES = {
    "temperature": (15.0, 35.0),
    "rainfall": (0.0, 80.0),
    "soil_moisture": (10.0, 90.0),
    "humidity": (40.0, 100.0),
    "altitude": (1200.0, 2000.0),
}

REQUIRED_FIELDS = [
    "farm_id",
    "temperature",
    "rainfall",
    "soil_moisture",
    "humidity",
    "altitude",
]


def _normalize(value: float, min_val: float, max_val: float) -> float:
    """Min-max normalize a value to [0, 1] range."""
    if max_val == min_val:
        return 0.5
    return (value - min_val) / (max_val - min_val)


def _validate_input(raw_data: dict) -> None:
    """Validate that all required fields are present."""
    missing = [f for f in REQUIRED_FIELDS if f not in raw_data]
    if missing:
        raise ValueError(f"Missing required fields: {missing}")


def prepare_features(raw_data: dict) -> torch.Tensor:
    """Transform raw sensor input into model-ready tensor.

    Takes a dictionary of raw sensor values, validates fields,
    normalizes values using training data ranges, and creates
    derived features.

    Args:
        raw_data: Dictionary with keys: farm_id, temperature,
                  rainfall, soil_moisture, humidity, altitude.

    Returns:
        torch.Tensor of shape (1, 8) with normalized features:
        [temperature, rainfall, soil_moisture, humidity, altitude,
         temperature_x_rainfall (interaction),
         temperature_rainfall_ratio (derived),
         moisture_humidity_index (derived)]
    """
    _validate_input(raw_data)

    # Extract and normalize raw features
    temperature = _normalize(
        raw_data["temperature"],
        FEATURE_RANGES["temperature"][0],
        FEATURE_RANGES["temperature"][1],
    )
    rainfall = _normalize(
        raw_data["rainfall"],
        FEATURE_RANGES["rainfall"][0],
        FEATURE_RANGES["rainfall"][1],
    )
    soil_moisture = _normalize(
        raw_data["soil_moisture"],
        FEATURE_RANGES["soil_moisture"][0],
        FEATURE_RANGES["soil_moisture"][1],
    )
    humidity = _normalize(
        raw_data["humidity"],
        FEATURE_RANGES["humidity"][0],
        FEATURE_RANGES["humidity"][1],
    )
    altitude = _normalize(
        raw_data["altitude"],
        FEATURE_RANGES["altitude"][0],
        FEATURE_RANGES["altitude"][1],
    )

    # Interaction feature
    temperature_x_rainfall = temperature * rainfall

    # Derived features
    if rainfall > 0:
        temperature_rainfall_ratio = temperature / (rainfall + 0.01)
    else:
        temperature_rainfall_ratio = temperature / 0.01

    moisture_humidity_index = (soil_moisture + humidity) / 2.0

    # Assemble feature vector: 6 raw + 2 derived = 8 features
    features = [
        temperature,
        rainfall,
        soil_moisture,
        humidity,
        altitude,
        temperature_x_rainfall,
        temperature_rainfall_ratio,
        moisture_humidity_index,
    ]

    return torch.tensor([features], dtype=torch.float32)