from __future__ import annotations
import json
from typing import Dict, Any, List, Optional, Iterable, Generator, Tuple
import pandas as pd

from app.utils.type_infer import apply_types
from app.core.validator import validate_required_columns, validate_rows, ValidationError

def load_mapping_config(path: Optional[str]) -> Dict[str, Any]:
    if not path:
        return {}
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def normalize_headers(cols: List[str]) -> List[str]:
    out = []
    for c in cols:
        c2 = str(c).strip()
        c2 = c2.replace(" ", "_")
        out.append(c2)
    return out

def apply_mapping(row: Dict[str, Any], rename: Dict[str, str], drop: List[str], defaults: Dict[str, Any]) -> Dict[str, Any]:
    # rename
    out = {}
    for k, v in row.items():
        if k in drop:
            continue
        nk = rename.get(k, k)
        out[nk] = v
    # defaults if missing key
    for k, v in defaults.items():
        if k not in out:
            out[k] = v
    return out

def to_json_array(rows: List[Dict[str, Any]], pretty: bool = True) -> str:
    if pretty:
        return json.dumps(rows, ensure_ascii=False, indent=2)
    return json.dumps(rows, ensure_ascii=False, separators=(",",":"))

def to_ndjson(rows: Iterable[Dict[str, Any]]) -> str:
    # join to a single string (good for UI download). For huge files use streaming writer.
    return "\n".join(json.dumps(r, ensure_ascii=False) for r in rows)

def to_grouped(rows: List[Dict[str, Any]], group_by: str, items_key: str = "items") -> List[Dict[str, Any]]:
    grouped = {}
    for r in rows:
        key = r.get(group_by)
        if key is None:
            key = "UNKNOWN"
        grouped.setdefault(key, []).append(r)

    out = []
    for k, items in grouped.items():
        out.append({group_by: k, items_key: items})
    return out

def convert_dataframe(
    df: pd.DataFrame,
    *,
    mapping: Optional[Dict[str, Any]] = None,
    output_format: str = "json_array",
    group_by: Optional[str] = None,
    items_key: str = "items",
    pretty: bool = True
) -> Tuple[str, Dict[str, Any]]:
    """
    Converts a DataFrame to JSON output.
    Returns (json_text, meta)
    """
    mapping = mapping or {}
    rename = mapping.get("rename", {}) or {}
    drop = mapping.get("drop", []) or []
    defaults = mapping.get("defaults", {}) or {}
    required = mapping.get("required", None)
    types = mapping.get("types", None)

    # normalize headers
    df = df.copy()
    df.columns = normalize_headers([str(c) for c in df.columns])

    # apply rename to required/types keys if user provided old headers:
    # (We assume user mapping already matches normalized headers, but this keeps it tolerant.)
    # validation: required columns should exist after rename is applied per-row.
    # So we validate on original columns first if mapping expects them.
    validate_required_columns(list(df.columns), [c for c in (required or []) if c in df.columns])

    # rows as dict
    raw_rows = df.to_dict(orient="records")

    cleaned_rows: List[Dict[str, Any]] = []
    for r in raw_rows:
        mapped = apply_mapping(r, rename=rename, drop=drop, defaults=defaults)
        typed = apply_types(mapped, types=types)
        cleaned_rows.append(typed)

    # validate per-row required values (after mapping/types)
    invalid_count, sample_errors = validate_rows(cleaned_rows, required=required)

    fmt = output_format.lower()
    if fmt == "grouped":
        if not group_by:
            raise ValidationError("Grouped format requires group_by column.")
        grouped = to_grouped(cleaned_rows, group_by=group_by, items_key=items_key)
        json_text = to_json_array(grouped, pretty=pretty)
    elif fmt == "ndjson":
        json_text = to_ndjson(cleaned_rows)
    else:
        json_text = to_json_array(cleaned_rows, pretty=pretty)

    meta = {
        "rows": len(cleaned_rows),
        "invalid_rows": invalid_count,
        "sample_errors": sample_errors,
        "columns": list(df.columns),
        "format": fmt
    }
    return json_text, meta

def read_csv_safely(file_bytes: bytes, delimiter: Optional[str] = None, encoding: Optional[str] = None) -> pd.DataFrame:
    """
    Robust CSV read for UI uploads.
    """
    import io
    encodings = [encoding] if encoding else []
    encodings += ["utf-8", "utf-8-sig", "latin-1"]

    last_err = None
    for enc in encodings:
        try:
            return pd.read_csv(io.BytesIO(file_bytes), sep=delimiter, encoding=enc)
        except Exception as e:
            last_err = e
    raise last_err
