"""Utility functions for DOCX processing"""

# Note if pylance is erroring on this pip install lxml-stubs
from lxml import etree

from .docx_constants import (
    MAX_DISPLAY_LEN,
    docx_namespaces,
)


def get_xml_child(
    xml: etree._Element,
    child_name: str,
    attribute: str = "val",
) -> str | None:
    """
    Get the value of a child element.

    Args:
        xml (etree._Element): The XML element to search within.
        child_name (str): The name of the child element to retrieve.
        attribute (str): The attribute to retrieve from the child element.

    Returns:
        str | None: The value of the child element or None if not found.

    """
    child = xml.find(child_name, namespaces=docx_namespaces)
    return (
        child.get(f"{{{docx_namespaces['w']}}}{attribute}")
        if child is not None
        else None
    )


def get_xml_child_int(
    xml: etree._Element,
    child_name: str,
    attribute: str = "val",
) -> int | None:
    """
    Get the value of a child element.

    Args:
        xml (etree._Element): The XML element to search within.
        child_name (str): The name of the child element to retrieve.
        attribute (str): The attribute to retrieve from the child element.

    Returns:
        int | None: The value of the child element or None if not found.

    """
    val = get_xml_child(xml, child_name, attribute)
    return int(val) if val is not None else None


def _large_text(text: str) -> str:
    """
    Check if the text is too large to display for logging purposes.

    Args:
        text (str): The text to check.

    Returns:
        str: The text if it is small enough, otherwise a truncated version.

    """
    return text[:MAX_DISPLAY_LEN] if len(text) > MAX_DISPLAY_LEN else text


def transform_text_for_markdown(para_text: str) -> str:
    """
    Transform the text for Markdown formatting.

    This includes replacing HTML tags with Markdown syntax.

    Args:
        para_text (str): The text to transform.

    Returns:
        str: The transformed text.

    """
    # Remove back to back tags
    para_text = clean_html_tags(para_text)

    # Replace HTML tags with Markdown syntax
    para_text = para_text.replace("<i>", "*")
    para_text = para_text.replace("</i>", "*")
    para_text = para_text.replace("<b>", "**")
    para_text = para_text.replace("</b>", "**")

    return para_text


def clean_html_tags(para_text: str) -> str:
    """
    Remove back to back HTML tags from the text.

    Args:
        para_text (str): The text to clean.

    Returns:
        str: The cleaned text.

    """
    para_text = para_text.replace("</u><u>", "")
    para_text = para_text.replace("</b><b>", "")
    para_text = para_text.replace("</i><i>", "")

    # Remove back to back tags with spaces
    para_text = para_text.replace("</u> <u>", " ")
    para_text = para_text.replace("</b> <b>", " ")
    para_text = para_text.replace("</i> <i>", " ")

    return para_text
