import json
from pathlib import Path

import pandas as pd
import pytest
from kyd_dataspec_gen.data_profiler import profile_data

curr_dir = Path(__file__).parent
data_source = "icij"
output_dir = str(curr_dir / "test_output" / "profile" / "icij")
input_dir = str(curr_dir / "test_data")
input_data = pd.read_csv(
    curr_dir / "test_data" / "panama_papers_nodes_entities.csv", header=0
)


@pytest.mark.parametrize(
    (
        "t_data_source",
        "t_data_type",
        "t_output_dir",
        "t_input_dir",
        "t_input_data",
        "t_output_path",
    ),
    [
        pytest.param(
            data_source,
            "csv",
            output_dir,
            input_dir,
            None,
            f"{output_dir}/p_panama_papers_nodes_entities.json",
            id="csv file as import data",
        ),
        pytest.param(
            data_source,
            "pd.dataframe",
            output_dir,
            "",
            input_data,
            f"{output_dir}/p_icij.json",
            id="pandas dataframe as import data",
        ),
    ],
)
def test_profile_data(
    t_data_source, t_data_type, t_output_dir, t_input_dir, t_input_data, t_output_path
):
    """Test the profile_data function."""

    profile_data(
        data_source=t_data_source,
        data_type=t_data_type,
        output_dir=t_output_dir,
        input_dir=t_input_dir,
        input_data=t_input_data,
    )

    with open(t_output_path, "r", encoding="utf-8") as file:
        data = json.load(file)

    assert Path(t_output_path).exists()
    assert data["global_stats"]["row_count"] == 4
    assert data["global_stats"]["column_count"] == 17
    assert len(data["data_stats"]) == 17
