# Models for description generation output
try:
    from typing import TypedDict
except ImportError:  # Fallback for Python versions < 3.12
    from typing_extensions import TypedDict
from enum import Enum


class DataDictionaryMatch(Enum):
    """Data dictionary match labels"""

    matched = "Matched"  # Column matches a single data dictionary entry
    multi_matched = "Multi-matched"  # Column matches multiple data dictionary entries
    new_missing = "New/Missing"  # Column does not match any data dictionary entries


class DataClassification(Enum):
    """Classification of the column"""

    names = "Names"  # e.g. first name, last name, business name
    addresses = "Addresses"  # e.g. personal address, business address
    individual_identifier_data = "Individual Identifier Data"  # e.g. DOB, email addresses, phone numbers, bank account numbers, credit card numbers, IP address, MAC address, etc.
    sensitive_data = "Sensitive Data"  # e.g. racial or ethnic origin, political opinions, SSN(Social Security Number), health data, Political Opinions, Unique Person Identifier, Gender
    non_classified = "Non Classified"  # e.g. data that does not fall into any of the above categories


class Column(TypedDict):
    """Column description"""

    col_name: str
    description: str
    comment: str
    foreign_key: bool = False  # type: ignore[assignment]
    data_classification: DataClassification
    anonymised_samples: list[str]
    data_dictionary_match: DataDictionaryMatch
    proposed_dd_match: (
        str | None
    )  # Proposed closest/new potential match from data dictionary, if no exact match found


class DataSet(TypedDict):
    """Data set description"""

    data_set_name: str
    description: str
    columns: list[Column]


class DataSetKey(TypedDict):
    """Data set key description"""

    datasetName: str  # Attribute name conform to schema
    key: str


class Relationship(TypedDict):
    """Relationship description"""

    source: DataSetKey
    target: DataSetKey
    cardinality: str
    type: str


class DataSource(TypedDict):
    """Data source description"""

    name: str
    description: str
    location_coverage: list[str]
    data_sets: list[DataSet]
    relationships: list[Relationship]


class CompoundPrimaryKey(TypedDict):
    """Compound primary key"""

    combination: list[str]
    verified: bool = False  # type: ignore[assignment], if compound key is validated as a correct primary key


class DataSetWithCompoundPrimaryKey(TypedDict):
    """Data set with compound primary key description"""

    data_set_name: str
    compound_primary_key: list[CompoundPrimaryKey]  # Top 5 compound primary keys


class DataSetList(TypedDict):
    """List of data sets"""

    data_sets: list[DataSetWithCompoundPrimaryKey]
