"""Represents a part of a DOCX document"""

# Note if pylance is erroring on this pip install lxml-stubs
import logging

from anyascii import anyascii
from docx.oxml.text.hyperlink import CT_Hyperlink
from docx.oxml.text.paragraph import CT_P
from docx.oxml.text.run import CT_R
from lxml import etree as lxml_etree

from .docx_config import DocxConfig
from .docx_constants import (
    OutputEncodeType,
    docx_namespaces,
    w_ns,
    w_r,
)
from .docx_parts_run import RunPart
from .docx_utils import (
    _large_text,
    get_xml_child,
    get_xml_child_int,
    transform_text_for_markdown,
)
from .numbered_item_tracker import ListNumberTracker, TrackerType

logger = logging.getLogger(__name__)


class ParaPart:
    """Represents a paragraph in the document."""

    def __init__(
        self,
        config: DocxConfig,
        para: lxml_etree._Element,
        depth: int = 0,
    ) -> None:
        """
        Initialize the paragraph part.

        Args:
            config: The configuration settings.
            para: The XML element representing the paragraph.
            depth: The depth of the paragraph in nested structures. Defaults to 0.

        """
        self.config = config
        self.runtime = config.runtime  # Simple optimisation for readability
        self.part = para
        self.style: str = ""
        self.num_id: int | None = None
        self.ilvl: int | None = None
        self.current_run_info: RunPart | None = None
        self.depth = depth
        self.parser = lxml_etree.XMLParser(recover=True)
        # See if <w:pPr> is present
        pPr = para.find(".//w:pPr", namespaces=docx_namespaces)
        if pPr is not None:
            self.style = get_xml_child(pPr, "w:pStyle") or ""
            self.num_id = get_xml_child_int(pPr, ".//w:numId")
            self.ilvl = get_xml_child_int(pPr, ".//w:ilvl")

    def __repr__(self) -> str:
        """Return a string representation of the paragraph part."""
        return f"ParaPart(style={self.style}, num_id={self.num_id}, ilvl={self.ilvl})"

    def extract_metadata_details(
        self,
        xml_string: str,
        encoding: OutputEncodeType = OutputEncodeType.MARKDOWN,
    ) -> tuple[str, str | None, str | None]:
        """
        Extract the metadata details from the run found in the given XML string.

        Args:
        ----
            xml_string (str): The XML string to parse.
            encoding (OutputEncodeType): The encoding type for the output. Defaults to OutputEncodeType.MARKDOWN.

        Returns:
        -------
            str: The text content of the mc:Fallback/w:t element, or None if not found.
            str: The text content of the w:instrText element, or None if not found.
            str: The field type from the w:fldChar element, or None if not found.

        """
        root = lxml_etree.fromstring(
            "<root>" + xml_string + "</root>",
            parser=self.parser,
        )

        # Find the mc:Fallback/w:t value
        fallback_text: str = ""

        fallback_text_obj = root.find(".//mc:Fallback/w:t", namespaces=docx_namespaces)
        if fallback_text_obj is not None:
            fallback_text = str(fallback_text_obj.text) or ""

        in_str_directive = root.find(".//w:instrText", namespaces=docx_namespaces)
        if in_str_directive is not None and in_str_directive.text is not None:
            in_str_directive = str(in_str_directive.text).strip()
        else:
            in_str_directive = None

        field_type = root.find(".//w:fldChar", namespaces=docx_namespaces)
        if field_type is not None:
            field_type = field_type.attrib.get(f"{w_ns}fldCharType")

        # Look for the image reference and extract blip reference and image name
        image_ref = root.find(".//a:blip", namespaces=docx_namespaces)
        if image_ref is not None:
            logger.debug(f"Image Ref: {image_ref}")

            picture_ref = image_ref.attrib.get(f"{w_r}embed")
            picture_text = f"Picture-{picture_ref}"
            picture_name_obj = root.find(".//pic:cNvPr", namespaces=docx_namespaces)
            if picture_name_obj is not None:
                picture_text = picture_name_obj.attrib.get("name", picture_text)

            logger.debug(f"Link Reference: {picture_ref}")

            if self.config.export_images:
                if encoding == OutputEncodeType.HTML:
                    fallback_text += f'<img src="{self.config.runtime.images[picture_ref].as_posix()}" alt="{picture_text}"/>'
                else:
                    fallback_text += f"![{picture_text}]({self.config.runtime.images[picture_ref].as_posix()})"

        logger.debug(
            f"extract_metadata_details: fallback_text={fallback_text}, in_str_directive={in_str_directive}, field_type={field_type}",
        )
        return (
            fallback_text,
            in_str_directive,
            field_type,
        )

    def extract_paragraph_text(  # noqa: PLR0912
        self,
        paragraph_oxml: CT_P,
        output_format: OutputEncodeType,
    ) -> str:
        """
        Extract the text from a paragraph and apply formatting.

        Args:
            paragraph_oxml (docx.oxml.text.paragraph.CT_P): The paragraph object to extract text from.
            output_format (str): The output format to use for the text. Defaults to OutputEncodeType.MARKDOWN.

        Returns:
            str: The extracted text with formatting applied.

        """
        para_text = ""
        anchor_reference_id = None
        state = ""
        in_link = False
        for a in paragraph_oxml.inner_content_elements:
            logger.debug(f"extract_paragraph_text:Inner Element [{a}]")
            if isinstance(a, CT_R):
                # Handle footnote/endnote references
                run_info = RunPart(a)
                self.current_run_info = run_info
                if run_info.footnote_reference is not None:
                    logger.warning(
                        f"Footnote reference found - partial MD extract support: {run_info.footnote_reference}",
                    )
                    para_text += f"[^{run_info.footnote_reference}]"
                    continue

                if run_info.endnote_reference is not None:
                    para_text += f"[^end{run_info.endnote_reference}]"
                    logger.warning(
                        f"Endnote reference found - partial MD extract support: {run_info.endnote_reference}",
                    )
                    continue

                if a.text == "":
                    run_text, directive, state = self.extract_metadata_details(
                        a.xml,
                        output_format,
                    )

                    # Clear out the bookmark reference id if finishing
                    if directive is not None:
                        components = directive.split(" ")
                        if components[0] == "REF":
                            # Extract the bookmark reference id
                            anchor_reference_id = components[1]
                        logger.debug(
                            f"Directive [{components}] [{directive}]n id[{anchor_reference_id}]",
                        )
                else:
                    run_text = a.text

                    # If we have detected a bookmark reference id, add it to the text
                if anchor_reference_id and not in_link:
                    logger.debug(
                        f"Starting bookmark reference id [{anchor_reference_id}]",
                    )
                    para_text += "["
                    in_link = True

                    para_text += self.apply_markdown_formatting(run_text)
                elif run_text is not None:
                    para_text += self.apply_markdown_formatting(run_text)

                    # If we have detected a end bookmark reference id, add closing information
                if state == "end" and anchor_reference_id:
                    logger.debug(
                        f"Ending bookmark reference id [{anchor_reference_id}]",
                    )
                    para_text += f"](#{anchor_reference_id})"
                    anchor_reference_id = None
                    in_link = False

            elif isinstance(a, CT_Hyperlink):
                ref = self.config.runtime.hyperlinks.get(a.rId)
                para_text += f"[{a.text}]({ref['val'] if ref else 'ERROR'})"
            elif a.tag not in self.config.runtime.missing_types:
                self.config.runtime.missing_types[a.tag] = 1
                logger.warning(
                    f"Unknown element type: {a.tag} - {_large_text(a.text)}",
                )
        return para_text

    def apply_markdown_formatting(
        self,
        text: str,
    ) -> str:
        """
        Apply Markdown formatting to the given text based on the specified styles.

        Styles are defined in the self.current_run_info attribute.

        Args:
        ----
            text (str): The text to format.

        Returns:
        -------
            str: The formatted text with Markdown syntax.

        """
        if self.current_run_info is None:
            return text  # pragma: no cover

        run_info = self.current_run_info
        logger.debug(
            f"apply_markdown_formatting: text=[{text}], formatting={run_info}",
        )
        if not text:
            return ""
        leading_space = text[0] == " "
        trailing_space = text[-1] == " "
        text = text.strip()

        # If we had spaces, and now not dont attempt to add formatting
        if not text:
            return " "

        # Apply inner Markdown formatting first
        # The order here defines the nesting order in the final Markdown/HTML
        if run_info.bold:
            text = f"<b>{text}</b>"
        if run_info.strike:
            text = f"~~{text}~~"
        if run_info.italic:
            text = f"<i>{text}</i>"
        if run_info.superscript:
            text = f"<sup>{text}</sup>"
        if run_info.subscript:
            text = f"<sub>{text}</sub>"

        # Apply outer HTML <u> tag for underline last
        # This wraps any Markdown formatting applied above
        if run_info.underline:
            text = f"<u>{text}</u>"

        # Apply leading and trailing spaces if we have added formatting
        # to ensure readability
        if leading_space:
            text = " " + text
        if trailing_space:
            text = text + " "

        # Apply color formatting
        if run_info.color and run_info.color.upper() not in self.config.suppress_colors:
            text = f'<span style="color:{run_info.color};">{text}</span>'

        logger.debug(
            f"apply_markdown_formatting: text=[{text}] formatting: {run_info}",
        )
        return text

    def process_para(  # noqa: PLR0912
        self,
        paragraph_oxml: CT_P,
        markdown_lines: list,
        in_list: bool,
    ) -> bool:
        """
        High-level paragraph processing helper.

        Extracts paragraph text (with runs processing), determines the
        paragraph style and whether it should be treated as a list-style
        paragraph (custom bullets).

        Returns:
            bool: Indicates if the paragraph is part of a list.

        """
        # Extract the paragraph text using the existing extractor
        para_text = self.extract_paragraph_text(
            paragraph_oxml,
            OutputEncodeType.MARKDOWN,
        )

        para_style = self.style

        # If the paragraph style is in the custom bullets, mark it as a list
        custom_style = ""
        if para_style in self.config.styles.get("bullets", {}) or (
            paragraph_oxml.pPr is not None and paragraph_oxml.pPr.numPr is not None
        ):
            custom_style = "ListParagraph"

        if self.config.ascii_only:
            # Fix non-ASCII characters (en-dash, em-dash, etc.)
            para_text = anyascii(para_text)

        # Transform the text for Markdown formatting
        para_text = transform_text_for_markdown(para_text)
        para_text_clean = para_text.strip()

        # Add support for quote/depth-based indentation
        quote_start = ">" * self.depth + " " if self.depth > 0 else ""
        quote_end = "  " if self.depth > 0 else ""

        logger.debug(f"Style [{para_style}] - {para_text}")
        # If a title is found, convert it to a header 0
        if para_style == "Title":
            para_style = "Heading0"
        if para_style.startswith("Heading"):
            # check to see if there is a numbering "override"
            if self.num_id is not None:
                para_text_clean = self.runtime.numbering_parts.get_numbered_text_by_id(
                    self.num_id,
                    self.ilvl or 0,
                    para_text_clean,
                )
            else:
                para_text_clean = (
                    self.runtime.numbering_parts.get_numbered_text_by_style(
                        para_style,
                        para_text_clean,
                    )
                )

            level = int(para_style[-1]) + 1  # Extract heading level

            # Only add headers that are not empty
            if para_text_clean != "":
                markdown_lines.append(f"{'#' * level} {para_text_clean}")
                logger.info(f"Processing Header{level - 1} [{para_text_clean}]")
            in_list = False
        elif para_style == "ListParagraph" or custom_style == "ListParagraph":
            last_list = self

            list_num = last_list.num_id or -1
            list_level = last_list.ilvl or 0

            # Check if the style is in the custom bullets
            if self.style in self.config.styles["bullets"]:
                # If the style is in the custom bullets, use the level from the style
                logger.info(f"Using custom bullet style: {self.style}")
                list_level = self.config.styles["bullets"][self.style]

            # Only add list items that are not empty
            if para_text_clean != "":
                if in_list and self.runtime.list_tracker:
                    # If we are already in a list, just add the new item
                    # to the last line in the list, otherwise add a new line
                    # to the list
                    markdown_lines[-1] += (
                        "\n"
                        + quote_start
                        + self.runtime.list_tracker.get_number(
                            list_level,
                            para_text_clean,
                        )
                        + quote_end
                    )
                else:
                    # If we are not in a list, add the new item as a new list
                    # and create a new list tracker
                    logger.debug(f"Adding new list #: {list_num}")
                    abstract_list = self.runtime.numbering_parts.num_dict.get(
                        list_num,
                    )
                    list_formats = abstract_list.level_types if abstract_list else []

                    self.runtime.list_tracker = ListNumberTracker(
                        TrackerType.MD_LIST,
                        list_formats,
                    )

                    # Check if previous line is empty, if not add empty line
                    if len(markdown_lines) > 0 and markdown_lines[-1] != "":
                        markdown_lines.append("")

                    markdown_lines.append(
                        quote_start
                        + self.runtime.list_tracker.get_number(
                            list_level,
                            para_text_clean,
                        )
                        + quote_end,
                    )
                in_list = True
        elif self.depth > 0 and para_text_clean == "":
            markdown_lines.append(">" * self.depth)
        elif para_text_clean != "":
            in_list = False
            # right strip the text to remove trailing spaces
            markdown_lines.append(quote_start + para_text.rstrip() + quote_end)

        return in_list
