"""
DiploAI Knowledge Graph - diplomacy.edu Pipeline (Phase 2)

Usage:
    python run_diplo.py                # Full pipeline
    python run_diplo.py --csv-only     # Only extract and export CSV
    python run_diplo.py --neo4j-only   # Only load from existing CSV into Neo4j
"""

import sys
from datetime import datetime

import pandas as pd

from config import DIPLO_CONFIG
from wp_extractor import WPExtractor
from node_builder import build_all_nodes
from link_builder import build_all_links
from neo4j_loader import get_driver, load_all_nodes, load_relationships, ensure_indexes, clear_database


def main():
    mode = sys.argv[1] if len(sys.argv) > 1 else '--full'
    cfg = DIPLO_CONFIG
    today = datetime.now().strftime('%d%m%Y')

    if mode in ('--full', '--csv-only'):
        print("=" * 60)
        print(f"DIPLOMACY.EDU KG PIPELINE  ({today})")
        print("=" * 60)

        extractor = WPExtractor(
            host=cfg['mysql_host'],
            database=cfg['mysql_db'],
            site_prefix=cfg['site_prefix'],
            include_post_types=cfg['include_post_types'],
        )

        print("\n--- Extracting posts ---")
        extractor.get_all_posts()
        extractor.get_all_posts_taxonomies()

        print("\n--- Building nodes ---")
        nodes_dict, df_topics_raw = build_all_nodes(extractor)

        print("\n--- Building links ---")
        df_links = build_all_links(extractor, nodes_dict, df_topics_raw)

        all_nodes = pd.concat([df for df in nodes_dict.values() if not df.empty], ignore_index=True)
        all_nodes.to_csv(f'diplomacy_nodes_{today}.csv', index=False)
        df_links.to_csv(f'diplomacy_links_{today}.csv', index=False)
        print(f"\nExported: diplomacy_nodes_{today}.csv ({len(all_nodes)} nodes)")
        print(f"Exported: diplomacy_links_{today}.csv ({len(df_links)} links)")

    if mode in ('--full', '--neo4j-only'):
        print(f"\n--- Loading into Neo4j ({cfg['neo4j_database']}) ---")
        driver = get_driver()

        if mode == '--full':
            clear_database(driver, cfg['neo4j_database'])

        ensure_indexes(driver, cfg['neo4j_database'])
        load_all_nodes(driver, cfg['neo4j_database'], nodes_dict)
        load_relationships(driver, cfg['neo4j_database'], df_links)

        driver.close()
        print("\nDone.")


if __name__ == '__main__':
    main()
