
    i                         d Z ddlZddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZmZmZmZmZ d	efd
Zd Zedk(  r e        yy)ag  
DiploAI Knowledge Graph - dig.watch Pipeline (Phase 2)

Usage:
    python run_dw.py                   # Full pipeline
    python run_dw.py --csv-only        # Only extract and export CSV
    python run_dw.py --neo4j-only      # Only load from existing CSV into Neo4j
    python run_dw.py --discover        # Discover post_types and taxonomies in DW database
    N)datetime)	DW_CONFIG)WPExtractor)build_all_nodes)build_all_links)
get_driverload_all_nodesload_relationshipsensure_indexesclear_database	extractorc                 l   t        d       t        | j                         j                  d             t        d       t        | j                         j                  d             t        d       | j	                  g d      }t        |j
                  s|j                  d             y d       y )Nz
=== POST TYPES ===Findexz
=== TAXONOMIES ===zJ
=== SEARCHING: trend, newsletter, process, approach, technolog, value ===)trend
newsletterprocessapproach	technologvaluezNo matching taxonomies found.)printdiscover_post_types	to_stringdiscover_taxonomiesdiscover_specific_taxonomiesempty)r   specifics     	run_dw.pydiscoverr      s    	
 !	)
'
'
)
3
3%
3
@A	
 !	)
'
'
)
3
3%
3
@A	
WX55LH 
(

5

)eEde    c                  l   t        t        j                        dkD  rt        j                  d   nd} t        }t	        j
                         j                  d      }t        |d   |d   |d   |d         }| d	k(  rt        |       y | d
v r%t        d       t        d| d       t        d       t        d       |j                          |j                          t        d       t        |      \  }}t        d       t        |||      }t        j                  |j!                         D cg c]  }|j"                  r| c}d      }|j%                  d| dd       |j%                  d| dd       t        d| dt        |       d       t        d| dt        |       d       | dv r{t        d|d    d        t'               }	| dk(  rt)        |	|d          t+        |	|d          t-        |	|d          t/        |	|d          |	j1                          t        d!       y y c c}w )"N   --fullz%d%m%Y
mysql_hostmysql_dbsite_prefixinclude_post_types)hostdatabaser&   r'   z
--discover)r#   z
--csv-onlyz<============================================================zDIG.WATCH KG PIPELINE  ()z
--- Extracting posts ---z
--- Building nodes ---z
--- Building links ---T)ignore_index	dw_nodes_z.csvFr   	dw_links_z
Exported: dw_nodes_z.csv (z nodes)zExported: dw_links_z links))r#   z--neo4j-onlyz
--- Loading into Neo4j (neo4j_databasez) ---z
Done.)lensysargvr   r   nowstrftimer   r   r   get_all_postsget_all_posts_taxonomiesr   r   pdconcatvaluesr   to_csvr   r   r   r	   r
   close)
modecfgtodayr   
nodes_dictdf_topics_rawdf_linksdf	all_nodesdrivers
             r   mainrD   #   s#   chh-!+388A;D
CLLN##H-EZ&34	I |''h(q12h*+!**,()$3I$>!
M()"9j-HIIJ,=,=,?P,?brxxr,?P_cd	9UG40>)E7$/u=%eWF3y>2B'JK#E7&XwGH))*3/?+@*AGH863'7#89vs#345vs#34jA63'7#8(Ci * Qs   .H1 H1__main__)__doc__r0   r   pandasr6   configr   wp_extractorr   node_builderr   link_builderr   neo4j_loaderr   r	   r
   r   r   r   rD   __name__ r    r   <module>rO      sO        $ ( ( g g	f 	f1h zF r    