import csv
import logging
from pathlib import Path
import json
from jsonschema import validate

from google import genai
from google.genai.types import GenerateContentConfigDict

from kyd_dataspec_gen.models import DataSource
from kyd_dataspec_gen.setup_ai import generate_response

logger = logging.getLogger(__name__)


def read_data_dictionary(data_dictionary_path: str | Path) -> list[dict]:
    """
    Reads a data dictionary from a CSV file and returns it as a list of dictionaries.

    Args:
        data_dictionary_path (str | Path): The file path to the data dictionary CSV file.

    Returns:
        list[dict]: A list of dictionaries, each representing a row from the data dictionary CSV file.
    """
    logger.info(f"Reading data dictionary from {data_dictionary_path}")
    current_dir = Path(__file__).parent
    schema_path = current_dir / "schemas" / "data_dictionary.schema.json"
    data_dictionary = []
    with (
        open(data_dictionary_path, "r", encoding="utf-8") as f,
        open(schema_path, "r", encoding="utf-8") as schema_file,
    ):
        reader = csv.DictReader(f)
        schema = json.load(schema_file)
        data_dictionary = [row for row in reader]
        for row in data_dictionary:
            logger.debug(f"Validating data dictionary against schema: {row}")
            validate(instance=row, schema=schema)
    return data_dictionary


def match_data_dictionary(
    data_source: DataSource, ai_client: genai.Client, data_dictionary: list[dict]
) -> DataSource:
    """
    Matches data elements in a given data source against a provided data dictionary using an AI client.
    For each data element in the data source, attempts to find matches in the data dictionary based on descriptions.
    Labels each data element as 'Matched' if a single match is found, 'Multi-matched' if multiple matches are found, or 'New/Missing' if no matches are found.
    Returns the entire data source schema with updated matching labels.

    Args:
        data_source (DataSource): The data source containing datasets and their data elements to be matched.
        ai_client (genai.Client): The AI client used to perform the matching.
        data_dictionary (list[dict]): The data dictionary to match data elements against.

    Returns:
        DataSource: The updated data source with matching labels for each data element.
    """
    prompt = (
        "Given a datasource with a list of datasets and their data elements, "
        "match each data element against the items in the provided data dictionary "
        "with the descriptions."
        "If a match is found, label the data element as 'Matched'. "
        "If multiple matches are found, label it as 'Multi-matched'. "
        "If no matches are found, label it as 'New/Missing'. "
        "If there is no match, propose a closest potential match. Suggest a new element if no close match found. "
        "Return the entire unchanged datasource schema with their new respective matching labels: "
        f"Data Source: {data_source}, Data Dictionary: {data_dictionary}"
    )
    ai_config: GenerateContentConfigDict = {
        "response_mime_type": "application/json",
        "response_schema": DataSource,
    }
    if ai_client:
        logger.info("AI client is set up. Proceeding with data dictionary matching.")
        updated_data_source: DataSource = generate_response(
            ai_client,
            prompt,
            ai_config,
        )
        logger.debug(f"Updated Data Source after matching: {updated_data_source}")
        return updated_data_source
    else:
        logger.warning("AI client is not set up. Skipping data dictionary matching.")
    return data_source
