"""CLI tool to convert DOCX files to Markdown format using kyd_docx2md."""

import argparse
import json
import logging
import re
from argparse import Namespace
from pathlib import Path
from typing import List

from kyd_docx2md.docx_config import DocxConfig
from kyd_docx2md.kyd_docx2md import Docx2Md

logger = logging.getLogger(__name__)


def validate_hex_colors(colors_str: str | None) -> list[str]:
    """
    Validate and parse comma-separated hex color codes.

    Args:
        colors_str (str | None): Comma-separated hex color codes (e.g. "#FFFFFF,#000000").

    Returns:
        list[str]: List of valid hex color codes (uppercase, without #).

    Raises:
        ValueError: If any color code is in an invalid format.

    """
    if not colors_str:
        return []

    # Pattern for valid hex color: #RRGGBB
    hex_pattern = re.compile(r"^#[0-9A-Fa-f]{6}$")

    colors = [c.strip() for c in colors_str.split(",")]
    validated_colors = []
    invalid_colors = []

    for color in colors:
        if not color:  # Skip empty strings from extra commas
            continue
        if hex_pattern.match(color):
            # Store without # and uppercase for consistency
            validated_colors.append(color[1:].upper())
        else:
            invalid_colors.append(color)

    if invalid_colors:
        error_msg = f"Invalid hex color format: {', '.join(invalid_colors)}. Expected format: #RRGGBB"
        logger.error(error_msg)
        raise ValueError(error_msg)

    return validated_colors


def setup_logger(
    log_level: str | int,
    log_format: str = "%(asctime)s %(levelname)s:%(message)s",
    log_fileName: str = "",
) -> None:  # pragma: no cover
    """
    setup_logger Sets up logging

    Args:
        log_level (str | int): Set the logging level of this handler. level must be an int or a str.
        log_format (_type_): _description_
        log_fileName (str, optional): _description_. Defaults to "".

    """
    # setup the handlers
    handlers: list[logging.Handler] = [
        logging.StreamHandler(),
    ]
    if log_fileName:
        file_handler = logging.FileHandler(log_fileName, encoding="utf-8")
        formatter = logging.Formatter(log_format)
        file_handler.setFormatter(formatter)
        file_handler.setLevel(log_level)
        handlers.append(file_handler)

    logging.basicConfig(
        level=log_level,
        format=log_format,
        handlers=handlers,
    )


def parse_args() -> Namespace:
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser(
        description="Convert DOCX files into Markdown format.",
    )
    parser.add_argument(
        "inputs",
        nargs="+",
        help="Input DOCX files or glob patterns (supports wildcards, e.g. 'docs/*.docx')",
    )
    parser.add_argument(
        "-if",
        "--image-folder",
        required=False,
        default="./images",
        help="Location where the images are stored",
    )
    parser.add_argument(
        "-o",
        "--output",
        required=False,
        default=None,
        help="Generated output Markdown file (default: same basename as input with .md extension)",
    )
    parser.add_argument(
        "-ao",
        "--ascii_only",
        required=False,
        action="store_true",
        default=False,
        help="Convert output to ASCII only",
    )
    parser.add_argument(
        "--no-images",
        action="store_false",
        dest="export_images",
        help="Do not export images from the DOCX file.",
    )
    parser.set_defaults(export_images=True)

    # Add support for removing "wrapping tables" of specific sizes such as 1x1, 1x2, etc. add default for 1x1
    parser.add_argument(
        "-rw",
        "--remove-wrapping-tables",
        required=False,
        nargs="*",
        default=None,
        help="Remove wrapping tables of specific sizes (e.g. 1x1, 1x2)",
    )

    parser.add_argument(
        "-l",
        "--log-level",
        default="WARNING",
        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
        help="Set the logging level. Default is INFO [DEBUG, INFO, WARNING, ERROR, CRITICAL]",
    )
    parser.add_argument(
        "-lf",
        "--log-output",
        help="Set the optional output logging file name.",
    )
    parser.add_argument(
        "-cs",
        "--custom-style",
        required=False,
        help="Set the optional style file to be loaded to support custom styles.",
    )
    parser.add_argument(
        "-ec",
        "--exclude-colors",
        required=False,
        help="Comma-separated list of hex color codes to exclude from Markdown encoding (e.g. #FFFFFF,#000000,#FF5733)",
    )

    args = parser.parse_args()

    if args.remove_wrapping_tables == []:
        args.remove_wrapping_tables = ["1x1"]
    elif args.remove_wrapping_tables:
        valid_sizes = []
        for size in args.remove_wrapping_tables:
            if "x" in size:
                parts = size.split("x")
                if len(parts) == 2 and all(  # noqa: PLR2004
                    part.isdigit() and int(part) > 0 for part in parts
                ):
                    valid_sizes.append(size)
                else:
                    logger.warning(f"Ignoring invalid table size: {size}")
            else:
                logger.warning(f"Ignoring invalid table size: {size}")
        args.remove_wrapping_tables = valid_sizes if valid_sizes else None

    # Validate and parse exclude-colors argument
    if args.exclude_colors:
        args.exclude_colors = validate_hex_colors(args.exclude_colors)
    else:
        args.exclude_colors = None

    return args


def expand_inputs(patterns: List[str]) -> List[str]:
    """
    Expand input file patterns into a list of file paths.

    Args:
        patterns (List[str]): A list of file patterns to expand.

    Returns:
        List[str]: A list of expanded file paths.

    """
    resolved: List[str] = []
    for p in patterns:
        p_expanded = str(Path(p).expanduser())
        if any(ch in p for ch in ["*", "?", "["]):
            matches = list(Path(p_expanded).parent.glob(Path(p_expanded).name))
            matches = [
                str(m)
                for m in matches
                if m.suffix.lower() == ".docx" and not m.name.startswith("~")
            ]
            resolved.extend(matches)
        elif Path(p_expanded).exists():
            resolved.append(p_expanded)
        else:
            resolved.append(p_expanded)
    seen = set()
    resolved_files = []
    for f in resolved:
        if f not in seen:
            seen.add(f)
            resolved_files.append(f)
    return resolved_files


def filter_existing_files(files: List[str]) -> List[str]:
    """
    Filter the list of input files, keeping only those that exist.

    Args:
        files (List[str]): A list of file paths to check.

    Returns:
        List[str]: A list of existing file paths.

    """
    existing_files = []
    for f in files:
        if Path(f).exists():
            existing_files.append(str(Path(f)))
        else:
            logger.warning(f"Input file not found/skipped: {f}")
    return existing_files


def determine_output_files(existing_files: List[str], output: str) -> List[str]:
    """
    Determine the output file paths based on existing input files and the specified output path.

    Args:
        existing_files (List[str]): A list of existing input file paths.
        output (str): The specified output file path or directory.

    Returns:
        List[str]: A list of determined output file paths.

    """
    output_files = []
    if output:
        out_path = Path(output)
        if len(existing_files) == 1 and not out_path.is_dir():
            output_files = [str(out_path)]
            out_dir = out_path.parent
            out_dir.mkdir(parents=True, exist_ok=True)
        else:
            out_dir = out_path
            out_dir.mkdir(parents=True, exist_ok=True)
            output_files = [
                str(out_dir / (Path(f).stem + ".md")) for f in existing_files
            ]
    else:
        output_files = [str(Path(f).with_suffix(".md")) for f in existing_files]
        for of in output_files:
            Path(of).parent.mkdir(parents=True, exist_ok=True)
    return output_files


def load_custom_style(custom_style_path: str) -> dict | None:
    """
    Load a custom style from a JSON file.

    Args:
        custom_style_path (str): The path to the custom style JSON file.

    Returns:
        Optional[Dict[str, Any]]: The loaded custom style or None if loading failed.

    """
    if not custom_style_path.endswith(".json"):
        logger.error(
            f"Custom style file must be a JSON file: {custom_style_path}",
        )
        return None
    if not Path(custom_style_path).exists():
        logger.error(
            f"Custom style file does not exist: {custom_style_path}",
        )
        return None
    try:
        with open(custom_style_path, "r", encoding="utf-8") as f:
            return json.loads(f.read())
    except json.JSONDecodeError:
        logger.exception(
            f"Invalid JSON format in custom style file: {custom_style_path}",
        )
        return None


def process_files(
    existing_files: list[str],
    output_files: list[str],
    args: Namespace,
    custom_style: dict | None,
) -> None:
    """
    Process the conversion of existing DOCX files to Markdown format.

    Args:
        existing_files (List[str]): A list of existing input file paths.
        output_files (List[str]): A list of output file paths.
        args (Namespace): The command-line arguments.
        custom_style (dict | None): The custom style dictionary or None.

    Returns:
        None

    """
    docx_config = DocxConfig()
    docx_config.ascii_only = args.ascii_only
    docx_config.output_image_dir = args.image_folder or "images"
    docx_config.styles = custom_style or {}
    docx_config.export_images = args.export_images

    # Apply exclude-colors if provided
    if args.exclude_colors:
        # Merge default suppress_colors with user-provided exclude_colors
        docx_config.suppress_colors = list(
            set(docx_config.suppress_colors + args.exclude_colors),
        )

    # Convert wrapping tables size to tuples
    if args.remove_wrapping_tables:
        sizes: list[tuple[int, int]] = []
        for raw in args.remove_wrapping_tables:
            try:
                a, b = (int(p) for p in raw.split("x"))
            except (ValueError, TypeError):
                logger.warning(f"Ignoring invalid table size: {raw}")
                continue
            if a > 0 and b > 0:
                sizes.append((a, b))
            else:
                logger.warning(f"Ignoring non-positive table size: {raw}")
        docx_config.remove_wrapping_tables = sizes

    for input_file, outfile in zip(existing_files, output_files, strict=True):
        print(  # noqa: T201
            f"Converting {input_file} to {outfile} [custom style: {args.custom_style}]",
        )
        docx2md = Docx2Md(input_file, docx_config)
        docx2md.convert_docx_2_md(outfile)


def main() -> None:  # pragma: no cover
    """Parse command-line arguments and convert a DOCX file to Markdown format."""
    args = parse_args()
    patterns: List[str] = list(args.inputs or [])
    resolved_files = expand_inputs(patterns)
    setup_logger(
        log_level=args.log_level.upper(),
        log_fileName=args.log_output,
    )
    existing_files = filter_existing_files(resolved_files)
    if not existing_files:
        logger.error("No valid input DOCX files were provided.")
        return
    output_files = determine_output_files(existing_files, args.output)
    custom_style = None
    if args.custom_style:
        custom_style = load_custom_style(args.custom_style)
        if custom_style is None:
            return
    process_files(existing_files, output_files, args, custom_style)


if __name__ == "__main__":
    main()
