"""
DiploAI Knowledge Graph - Link Builder (Phase 2b)

Hybrid model:
  Sloj A (from WordPress): Toolset, taxonomy->topic, SUBTOPIC_OF, TAGGED_WITH, PUBLISHED_ON
"""

import pandas as pd

from wp_extractor import WPExtractor


def _empty_links() -> pd.DataFrame:
    return pd.DataFrame(columns=['source_id', 'source_name', 'target_id',
                                 'target_name', 'link', 'meta'])


def _combine_nodes(nodes_dict: dict[str, pd.DataFrame]) -> pd.DataFrame:
    """Combine all node DataFrames into one for lookups."""
    dfs = [df for df in nodes_dict.values() if not df.empty]
    if not dfs:
        return pd.DataFrame()
    return pd.concat(dfs, ignore_index=True)


# ------------------------------------------------------------------
# SUBTOPIC_OF
# ------------------------------------------------------------------

def build_subtopic_links(df_topics_raw: pd.DataFrame,
                         site_prefix: str) -> pd.DataFrame:
    children = df_topics_raw[df_topics_raw['parent'] != 0].copy()
    if children.empty:
        return _empty_links()

    def _topic_name(tid):
        match = df_topics_raw[df_topics_raw['term_id'] == tid]
        return match['name'].item() if not match.empty else ''

    df = pd.DataFrame()
    df['source_id'] = site_prefix + '_topic_' + children['term_id'].astype(str)
    df['source_name'] = children['name'].tolist()
    df['target_id'] = site_prefix + '_topic_' + children['parent'].astype(int).astype(str)
    df['target_name'] = children['parent'].apply(_topic_name).tolist()
    df['link'] = 'SUBTOPIC_OF'
    df['meta'] = None

    print(f"[{site_prefix}] SUBTOPIC_OF: {len(df)}")
    return df


# ------------------------------------------------------------------
# Toolset relationships
# ------------------------------------------------------------------

def build_toolset_links(extractor: WPExtractor,
                        wp_ids: list[int]) -> pd.DataFrame:
    prefix = extractor.site_prefix
    df_child, df_parent = extractor.get_toolset_relations(wp_ids)

    frames = []
    for df_raw in [df_child, df_parent]:
        if df_raw.empty:
            continue
        lnk = pd.DataFrame()
        lnk['source_id'] = prefix + '_' + df_raw['parent_type'] + '_' + df_raw['parent_wp_id'].astype(str)
        lnk['source_name'] = df_raw['parent_title']
        lnk['target_id'] = prefix + '_' + df_raw['child_type'] + '_' + df_raw['child_wp_id'].astype(str)
        lnk['target_name'] = df_raw['child_title']
        lnk['link'] = df_raw['relationship_name'].apply(lambda x: x.upper().replace(' ', '_'))
        lnk['meta'] = None
        frames.append(lnk)

    if not frames:
        return _empty_links()

    result = pd.concat(frames, ignore_index=True).drop_duplicates()
    print(f"[{prefix}] Toolset links: {len(result)}")
    return result


# ------------------------------------------------------------------
# Post -> Topic (taxonomy assignment)
# ------------------------------------------------------------------

def build_post_topic_links(extractor: WPExtractor,
                           nodes_dict: dict[str, pd.DataFrame]) -> pd.DataFrame:
    prefix = extractor.site_prefix
    combined = _combine_nodes(nodes_dict)
    if combined.empty:
        return _empty_links()

    name_to_id = dict(combined[['name', 'node_id']].values)

    frames = []
    for pt in extractor.include_post_types:
        raw = extractor.get_post_topic_links(pt)
        if raw.empty:
            continue
        raw['post_type'] = pt
        raw['src_id'] = prefix + '_' + raw['post_type'] + '_' + raw['ID'].astype(str)

        lnk = pd.DataFrame()
        lnk['source_id'] = raw['src_id'].tolist()
        lnk['source_name'] = raw['post_title'].tolist()
        lnk['target_name'] = raw['taxonomy_name'].tolist()
        lnk['link'] = [
            f"RELATED_{r['post_type'].upper()}_&_{r['taxonomy_type'].upper()}"
            for _, r in raw.iterrows()
        ]
        lnk['target_id'] = lnk['target_name'].map(name_to_id)
        lnk = lnk[['source_id', 'source_name', 'target_id', 'target_name', 'link']]
        lnk['meta'] = None
        frames.append(lnk)

    if not frames:
        return _empty_links()

    result = pd.concat(frames, ignore_index=True)
    print(f"[{prefix}] Post-Topic links: {len(result)}")
    return result


# ------------------------------------------------------------------
# TAGGED_WITH
# ------------------------------------------------------------------

def build_tagged_with_links(extractor: WPExtractor,
                            nodes_dict: dict[str, pd.DataFrame]) -> pd.DataFrame:
    prefix = extractor.site_prefix
    combined = _combine_nodes(nodes_dict)
    if combined.empty:
        return _empty_links()

    non_meta = combined[~combined['labels'].apply(lambda x: x == ['Tag'] or x == ['Date'])]
    if non_meta.empty:
        return _empty_links()

    post_ids = non_meta['wp_id'].astype(int).tolist()
    raw = extractor.get_post_tag_links(post_ids)
    if raw.empty:
        return _empty_links()

    wpid_to_nodeid = dict(zip(combined['wp_id'].astype(str), combined['node_id']))
    tag_nodes = combined[combined['labels'].apply(lambda x: x == ['Tag'])]
    tag_to_nodeid = dict(zip(tag_nodes['wp_id'].astype(str), tag_nodes['node_id']))

    df = pd.DataFrame()
    df['source_id'] = raw['post_id'].astype(str).map(wpid_to_nodeid)
    df['source_name'] = None
    df['target_id'] = raw['term_id'].astype(str).map(tag_to_nodeid)
    df['target_name'] = raw['tag_name'].tolist()
    df['link'] = 'TAGGED_WITH'
    df['meta'] = None

    df.dropna(subset=['source_id', 'target_id'], inplace=True)
    print(f"[{prefix}] TAGGED_WITH: {len(df)}")
    return df


# ------------------------------------------------------------------
# PUBLISHED_ON
# ------------------------------------------------------------------

def build_published_on_links(nodes_dict: dict[str, pd.DataFrame],
                             site_prefix: str) -> pd.DataFrame:
    posts = nodes_dict.get('posts', pd.DataFrame())
    if posts.empty or 'date' not in posts.columns:
        return _empty_links()

    dates = pd.to_datetime(posts['date'], errors='coerce')
    valid = dates.notna()
    if not valid.any():
        return _empty_links()

    docs = posts[valid].copy()
    ym = dates[valid].dt.to_period('M').astype(str)

    df = pd.DataFrame()
    df['source_id'] = docs['node_id'].tolist()
    df['source_name'] = docs['name'].tolist()
    df['target_id'] = (site_prefix + '_date_' + ym).tolist()
    df['target_name'] = dates[valid].dt.strftime('%B %Y').tolist()
    df['link'] = 'PUBLISHED_ON'
    df['meta'] = None

    print(f"[{site_prefix}] PUBLISHED_ON: {len(df)}")
    return df


# ------------------------------------------------------------------
# Assemble all links
# ------------------------------------------------------------------

def build_all_links(extractor: WPExtractor,
                    nodes_dict: dict[str, pd.DataFrame],
                    df_topics_raw: pd.DataFrame) -> pd.DataFrame:
    prefix = extractor.site_prefix

    combined = _combine_nodes(nodes_dict)
    wp_ids = combined[combined['wp_id'].apply(lambda x: str(x).isdigit())]['wp_id'].astype(int).tolist()

    subtopic = build_subtopic_links(df_topics_raw, prefix)
    toolset = build_toolset_links(extractor, wp_ids)
    post_topic = build_post_topic_links(extractor, nodes_dict)
    tagged = build_tagged_with_links(extractor, nodes_dict)
    published = build_published_on_links(nodes_dict, prefix)

    df_links = pd.concat([subtopic, toolset, post_topic, tagged, published], ignore_index=True)
    df_links.drop_duplicates(inplace=True)

    print(f"[{prefix}] TOTAL links: {len(df_links)}")
    return df_links