
    7-i                        d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 dZd Zdefd	Zd
edefdZdedeeej"                  f   fdZdej"                  fdZddededee   fdZdedej"                  fdZdefdZdedefdZy)z
DiploAI Knowledge Graph - Neo4j Loader (Phase 2b)

Dual label model:
  - Post/Topic nodes: batch CREATE with multiple labels (e.g. :Document:Blog)
  - Tag/Date nodes: batch MERGE (single label)
  - Relationships: batch MERGE grouped by type
    N)GraphDatabase)tqdm)	NEO4J_URI
NEO4J_USER
NEO4J_PASSi  c                  L    t        j                  t        t        t        f      S )N)auth)r   driverr   r   r        neo4j_loader.py
get_driverr      s    	Z0HIIr   databasec           	          g d}| j                  |      5 }|D ]  \  }}	 |j                  d| d| d         	 ddd       t        d| dt	        |       d	       y# t        $ r Y Qw xY w# 1 sw Y   4xY w)
z>Create indexes on :Document and other labels for fast lookups.))Documentnode_id)r   document_hash)r   wp_id)Tagr   )r   name)Dater   r   z#CREATE INDEX IF NOT EXISTS FOR (n:`z	`) ON (n.)N[z] Indexes ensured (z specs).)sessionrun	Exceptionprintlen)r
   r   specsr   labelprops         r   ensure_indexesr#      s    E 
	*g KE4A%	RVQWWXYZ ! 
+ 
AhZ*3u:,h
?@  	 
+	*s-   	A/A A/ 	A,)A/+A,,A//A8dreturnc                 v    dh}| j                         D ci c]  \  }}||vr||||k(  r|nd c}}S c c}}w )z6Remove NaN/None/labels from properties dict for Neo4j.labels )items)r$   skipkvs       r   _clean_propsr-   4   sO    :D	4!1Qd] Q]qAv25!4 4 4s   5
nodes_dictc           	         t        d |j                         D              }t        d| d| d       | j                  |      5 }t	        |d      }|j                  dt        j                               }|j                  st        |||       |j                  d	t        j                               }|j                  sH|j                         D 	cg c]  \  }}	t        |	j                                 }
}}	t        |d
d|
|       |j                  dt        j                               }|j                  st        |||       |j                  dt        j                               }|j                  sH|j                         D 	cg c]  \  }}	t        |	j                                 }
}}	t        |dd|
|       |j                          ddd       t        d| d       yc c}	}w c c}	}w # 1 sw Y   %xY w)z
    Load all node types:
      - posts/topics: CREATE with dual labels from 'labels' column
      - tags/dates: MERGE with single label
    c              3   L   K   | ]  }|j                   rt        |        y wN)emptyr   ).0dfs     r   	<genexpr>z!load_all_nodes.<locals>.<genexpr>E   s     F"5BRXXB"5s   $$Loading z nodes into 'z'...r   Nodestotaldesctopicstagsr   r   postsdatesr   NDone loading z nodes.)sumvaluesr   r   r   getpd	DataFramer2   _load_labeled_nodesiterrowsr-   to_dict_batch_mergeclose)r
   r   r.   r9   r   pbarr;   r<   _row
props_listr=   r>   s                r   load_all_nodesrN   ?   s    F*"3"3"5FFE	HUG=
$
78		*g%g. ",,.9||6 ~~fblln5zzDHMMOTO&!S,s{{}5OJT%JE w7{{5 w7{{DINNDTUDT&!S,s{{}5DTJU&)ZF

3 
+6 
M%
()! U V- 
+	*s,   BG1#G%6BG1#G++"G1%G11G:r4   c                     i }|j                         D ]S  \  }}|d   }dj                  |      }|j                  |g       j                  t	        |j                                      U |j                         D ]  \  }}	dj                  |j                  d            }
d|
 d}t        dt        |	      t              D ];  }|	||t        z    }| j                  ||       |j                  t        |             =  y)	zMMERGE nodes grouped by their label combination (supports dual/triple labels).r'   :z`:`!UNWIND $batch AS props MERGE (n:`z*` {node_id: props.node_id}) SET n += propsr   batchN)rF   join
setdefaultappendr-   rG   r)   splitranger   
BATCH_SIZEr   update)r   r4   rJ   label_groupsrK   rL   r'   	label_key	label_strrM   label_cyphercypherirS   s                 r   rE   rE   f   s    *,L++-3XHHV$		2.55l3;;=6QR  
 ".!3!3!5	:zz)//#"674\NBnoq#j/:6AqZ0EKKeK,KKE
# 7 "6r   r!   	merge_keyrM   c                     d| d| d| d}t        dt        |      t              D ]>  }|||t        z    }| j                  ||       |s%|j	                  t        |             @ y)z0Batch MERGE nodes on a single label + merge_key.rQ   z` {z: props.z}) SET n += propsr   rR   N)rX   r   rY   r   rZ   )r   r!   ra   rM   rJ   r_   r`   rS   s           r   rH   rH   w   sq     'i[ D F 1c*oz21Q^,F%(KKE
#	 3r   df_linksc                    |j                  g d      }i }|j                         D ]2  \  }}|j                  |d   g       j                  |d   |d   d       4 t	        |      }t        d| d| d	t	        |       d
t         d	       d}g }	| j                  |      5 }
t        |d      }|j                         D ]r  \  }}d| d}t        dt	        |      t              D ]J  }|||t        z    }	 |
j                  ||       |t	        |      z  }|j                  t	        |             L t |j                          ddd       t        d| d       |	r4t        dt	        |	       d       |	D ]  \  }}t        d| d|         yy# t        $ rD}||	D cg c]  }|d   	 nc c}w c}vr|	j                  |t        |      dd f       Y d}~d}~ww xY w# 1 sw Y   xY w)z*Batch MERGE relationships grouped by type.)	source_id	target_idlink)subsetrg   re   rf   )re   rf   r6   z relationships into 'z' (z types, batch=z)...r   r   Relsr8   z
                UNWIND $batch AS row
                MATCH (s {node_id: row.source_id}), (t {node_id: row.target_id})
                MERGE (s)-[r:`z`]->(t)
            rR   NP   r?   z relationships.z  Failed types (z):z    z: )dropnarF   rU   rV   r   r   rY   r   r   r)   rX   r   r   strrZ   rI   )r
   r   rc   df_clean
rel_groupsrK   rL   r9   loadedfailed_typesr   rJ   rel_typepairsr_   r`   rS   eftrterrs                        r   load_relationshipsrw      s   &HIH(*J##%3c&k2.55[)[)7
 	 & ME	HUG0
 ;*onZL> ? FL		*g%f-)//1OHe  (j )F
 1c%j*5aJ/EKKeK4 #e*$CJ' 6  2 	

# 
+& 
M&
12 \!2 3267#GBDBse$% $  ! E'E"1'EE$++Xs1vcr{,CDE 
+	*sC   "AG#6F	<G#	G 	G	!F.-)G	G#G 	 G##G,c                     | j                  |      5 }|j                  d       ddd       t        d| d       y# 1 sw Y   xY w)z#Delete all nodes and relationships.r   zMATCH (n) DETACH DELETE nNzCleared all data from 'z'.)r   r   r   )r
   r   r   s      r   clear_databasery      s?    		*g/0 
+	#H:R
01 
+	*s	   =Ar_   c                     | j                  |      5 }|j                  |      }|D cg c]  }| c}cddd       S c c}w # 1 sw Y   yxY w)z1Run an arbitrary Cypher query and return results.r   N)r   r   )r
   r   r_   r   resultrecords         r   	run_queryr}      sF    		*gV$%+,V6V, 
+	*, 
+	*s   A	>AAAr1   )__doc__mathpandasrC   neo4jr   r   configr   r   r   rY   r   rl   r#   dictr-   rD   rN   rE   listrH   rw   ry   r}   r   r   r   <module>r      s        4 4
JAS A.4D 4T 4$*S $*d3;L6M $*N$R\\ $"	$ 	$ 	$$t* 	$ (& (& (&^2S 2- -S -r   