"""
Unit tests for the kyd_docx2md module.

This module contains test cases for the Docx2Md class, which converts .docx files
to markdown format. It includes tests for various features such as tables, images,
styles, hyperlinks, and text formatting.
"""

import logging
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory

from docx import Document
from dotenv import load_dotenv

from kyd_docx2md.docx_config import DocxConfig
from kyd_docx2md.docx_utils import _large_text
from kyd_docx2md.kyd_docx2md import Docx2Md

load_dotenv()  # take environment variables from .env.

logger = logging.getLogger(__name__)


def sample_docx_file(tmp_path: Path) -> Path:
    """
    Create a sample .docx file with predefined content for testing.

    Parameters
    ----------
    tmp_path : Path
        The temporary directory path where the .docx file will be created.

    Returns
    -------
    Path
        The path to the created .docx file.

    """
    # Create a temporary .docx file for testing
    docx_path = tmp_path / "test.docx"
    document = Document()
    document.add_heading("Test Heading 1", level=1)
    document.add_paragraph("This is a test paragraph.")
    document.add_paragraph("This is a list item.", style="List Paragraph")
    document.add_paragraph("Another list item.", style="List Paragraph")
    document.add_heading("Test Heading 2", level=2)
    document.save(str(docx_path))
    return docx_path


test_data_dir = Path(__file__).parent / "test-data"

# Use temporary directory for test data


class TestDocx2Md(unittest.TestCase):
    """
    Test cases for the Docx2Md class.

    This class contains unit tests for various features of the Docx2Md class,
    including conversion of .docx files to markdown format, handling of images,
    tables, styles, hyperlinks, and text formatting.
    """

    def test_kyd_simple_tables(self) -> None:
        """
        Test the creation of a .docx file and its conversion to markdown.

        This test ensures that the .docx file is created correctly and the conversion
        to markdown format is successful.
        """
        simple_tables = test_data_dir / "table-only.docx"
        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_tables, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()

            # Assert the markdown content
            assert "| Header 1 |  | Header 2 |" in md_content
            assert "| Row 3.1  |  | Row 3.2  |" in md_content

    def test_kyd_full_file(self) -> None:
        """
        Test the creation of a .docx file and its conversion to markdown.

        This test ensures that the .docx file is created correctly and the conversion
        to markdown format is successful.
        """
        simple_tables = test_data_dir / "simple-docx.docx"
        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_tables, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()  # noqa: F841

    def test_kyd_created_file(self) -> None:
        """
        Test the creation of a .docx file and its conversion to markdown.

        This test ensures that the .docx file is created correctly and the conversion
        to markdown format is successful.
        """
        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            simple_docx = sample_docx_file(Path(temp_dir))

            config = DocxConfig()
            config.output_image_dir = "."
            config.ascii_only = True
            converter = Docx2Md(simple_docx, config)
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()

            # Assert the markdown content
            assert "Test Heading 1" in md_content
            assert "Another list item." in md_content

    def test_kyd_truncated_logging(self) -> None:
        """
        Test the conversion of a .docx file with large text to markdown.

        This test ensures that large text is truncated correctly and does not exceed the specified limit.
        """
        # Call the convert_docx_2_md method
        short_val = "a" * 1000
        long_val = "a" * 10000

        short_val = _large_text(short_val)
        long_val = _large_text(long_val)
        assert short_val == "a" * 1000
        assert long_val == "a" * 5000

    def test_kyd_docx_headers(self) -> None:
        """
        Test the extraction of headers from a .docx file.

        This test ensures that the styles used in the .docx file are correctly extracted
        """
        # Call the convert_docx_2_md method
        simple_tables = test_data_dir / "simple-heading.docx"

        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_tables, config)
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_headers_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()
                assert "#### 2.1.1. Heading 2.1.1" in md_content

    def test_kyd_simple_text_format(self) -> None:
        """
        Test the conversion of a .docx file with simple text formatting to markdown.

        This test ensures that text formatting such as bold, italic, and underline
        is correctly converted to markdown syntax.
        """
        simple_tables = test_data_dir / "simple-text-formatting.docx"
        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_tables, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()  # noqa: F841

    def test_kyd_simple_text_color(self) -> None:
        """
        Test the conversion of a .docx file with simple text formatting to markdown.

        This test ensures that text formatting such as bold, italic, and underline
        Test the conversion of a .docx file with colored text to markdown.

        This test ensures that text color formatting in the .docx file is correctly
        converted to the appropriate markdown (or HTML) syntax.
        """
        simple_tables = test_data_dir / "simple-text-color.docx"
        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_tables, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()  # noqa: F841

    def test_kyd_simple_hypertext(self) -> None:
        """
        Test the conversion of a .docx file containing hyperlinks to markdown.

        This test ensures that hyperlinks in the .docx file are correctly converted
        to markdown format with proper syntax.
        """
        simple_tables = test_data_dir / "simple-hyperlink.docx"

        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_tables, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()
            assert "[0](#_Ref194476708)" in md_content
            assert "[Header 1](#_Ref194476708)" in md_content
            assert "[Link to KYD](https://kyd.ai)" in md_content

    def test_kyd_simple_image(self) -> None:
        """
        Test the conversion of a .docx file containing images to markdown.

        This test ensures that images in the .docx file are correctly converted
        to markdown format with proper syntax and saved in the specified directory.
        """
        simple_images = test_data_dir / "simple-image.docx"

        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_images, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()
            assert "![Picture 1](images/test_output_rId5.jpeg)" in md_content
            assert "This is the KYD logo" in md_content

    def test_kyd_simple_image_no_export(self) -> None:
        """
        Test the conversion of a .docx file containing images to markdown without exporting images.

        This test ensures that images in the .docx file are not saved and the markdown
        does not contain image links when image export is disabled.
        """
        simple_images = test_data_dir / "simple-image.docx"

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"
            image_dir = Path(temp_dir) / "images"

            config = DocxConfig()
            config.output_image_dir = str(image_dir)
            config.export_images = False
            config.ascii_only = True
            converter = Docx2Md(simple_images, config)

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()

            # Assert that image markdown is not present
            assert "![Picture 1]" not in md_content
            # Assert that the image directory was not created
            assert not image_dir.exists()

    def test_kyd_simple_table(self) -> None:
        """
        Test the conversion of a .docx file containing a simple table to markdown.

        This test ensures that tables in the .docx file are correctly converted
        to markdown format with proper syntax.
        """
        simple_images = test_data_dir / "simple-table.docx"

        config = DocxConfig()
        config.output_image_dir = "images"
        config.ascii_only = True
        converter = Docx2Md(simple_images, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()
            assert (
                "| <ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul> | "
                "<ul><li>Item Col2:1</li>"
                "<ul><li>Col2:1:a</li><li>Col2:1:b</li></ul>"
                "<li>Item Col2:2</li><li>Item Col2:3</li></ul> |" in md_content
            )

    def test_kyd_formatting_bugs_no_1(self) -> None:
        """
        Test the conversion of a .docx file with known formatting bugs to markdown.

        This test ensures that specific formatting issues, such as bold text and spacing,
        are correctly handled and converted to markdown format.
        """
        simple_images = test_data_dir / "formatting-bugs.docx"

        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_images, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()
            assert "Bold space test: Test **bold** and spacing" in md_content
            assert "| <b>Bold in Header</b> 1 |" in md_content
            assert "This is para has leading and trailing spaces\n" in md_content

    def test_kyd_unknown_types(self) -> None:
        """
        Test the conversion of a .docx file with known formatting bugs to markdown.

        This test ensures that specific formatting issues, such as bold text and spacing,
        are correctly handled and converted to markdown format.
        """
        simple_images = test_data_dir / "unknown-types.docx"

        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_images, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()
            assert "This is **BOLD** and this is in *Italics*" in md_content

    def test_kyd_custom_bullet(self) -> None:
        """Test the conversion of a .docx file with custom bullet styles to markdown."""
        simple_images = test_data_dir / "custom-bullets.docx"
        custom_style = {
            "bullets": {
                "Bullet1": 0,
                "Bullet2": 1,
            },
        }

        config = DocxConfig()
        config.styles = custom_style
        config.ascii_only = True
        converter = Docx2Md(simple_images, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()
            assert "* **Hello**\n  * There" in md_content
            assert (
                "| <br/><ul><li>Table-Bul1</li><ul><li>Table-Bul2</li></ul></ul> | "
                "<ul><li>Proper-Bullet1</li><ul><li>Proper-Bullet2</li></ul>Test 1 |"
                in md_content
            )

    def test_kyd_embedded_remove_wrap_tables(self) -> None:
        """Test the conversion of a .docx file with embedded tables to markdown."""
        simple_images = test_data_dir / "embedded-tables.docx"

        config = DocxConfig()
        config.ascii_only = True
        config.remove_wrapping_tables = [(1, 1)]
        converter = Docx2Md(simple_images, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()
            assert (
                "* Level 1\n  * Level 1.1\n    * Level 1.1.1\n  * Level 1.2\n* Level 2"
                in md_content
            )
            assert (
                ">> Table Contents 1.1.2  \n"
                ">>\n"
                ">>> | 1.1.2.1 Table3.1 - Col 1 | 1.1.2.1Table3.1 - Col 2 |"
                in md_content
            )
            assert "!!WARNING!! Embedded table found" not in md_content
            assert config.runtime.embedded_tbl_count == 0

    def test_kyd_detect_embedded_tables(self) -> None:
        """Test the conversion of a .docx file with embedded tables to markdown."""
        simple_images = test_data_dir / "embedded-tables.docx"

        config = DocxConfig()
        config.ascii_only = True
        converter = Docx2Md(simple_images, config)

        # Call the convert_docx_2_md method
        with TemporaryDirectory() as temp_dir:
            output_md_file = Path(temp_dir) / "test_output.md"

            converter.convert_docx_2_md(output_md_file)

            # Read the generated markdown file
            with open(output_md_file, "r", encoding="utf-8") as md_file:
                md_content = md_file.read()
            assert "!!WARNING!! Embedded table found" in md_content
            assert config.runtime.embedded_tbl_count > 0
