From 8a52dd804fdd169cf9f7ca07ca2902d3738e4b0a Mon Sep 17 00:00:00 2001
From: Duy Kha <duykha121314888@gmail.com>
Date: Tue, 29 Jul 2025 22:05:19 -0500
Subject: [PATCH 1/8] yaml config file

---
 GraphGeneration/encoder.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 GraphGeneration/encoder.yaml

diff --git a/GraphGeneration/encoder.yaml b/GraphGeneration/encoder.yaml
new file mode 100644
index 0000000..ed82350
--- /dev/null
+++ b/GraphGeneration/encoder.yaml
@@ -0,0 +1,19 @@
+model:
+  name: topoGED
+  node2vec_setup:
+    node2vec_dimensions: 64 # We add features onto the end since Node2Vec doesn't embed features
+    node2vec_walk_length: 50 # Number of nodes visited per walk (Higher is more global, smaller is local)
+    node2vec_num_walks: 10 # Number of walks to start per node (Higher is more detailed and stable)
+    node2vec_p: 1.0 # Return parameter, the likelihood of revisiting a node (Higher is less backtracking)
+    node2vec_q: 1.0 # The walk bias for determining direction (Higher is more DFS-like; lower is BFS-like)
+    node2vec_window: 10 # The context size (Higher is broader learning)
+    node2vec_min_count: 1 # Minimum number of occurrences for a node to be considered (Higher will ignore more rare nodes)
+    node2vec_batch_words: 4 # The batch size for when Word2Vec is used (Higher will train faster; but with more memory)
+    node2vec_workers: 1 # Number of workers (threads)
+
+training:
+  batch_size: 64
+  lr: 0.001
+  epochs: 500
+
+dataset: CollegeMsg

From e0f28f82612458b9e31ba82c575927db4bde1f4e Mon Sep 17 00:00:00 2001
From: Duy Kha <duykha121314888@gmail.com>
Date: Wed, 30 Jul 2025 12:16:29 -0500
Subject: [PATCH 2/8] use yaml in file

---
 GraphGeneration/encoder.yaml                  |  2 +
 GraphGeneration/scripts/compute_embedding.py  | 37 ++++------
 GraphGeneration/scripts/topoGED_end_to_end.py | 74 ++++++++++++++-----
 3 files changed, 73 insertions(+), 40 deletions(-)

diff --git a/GraphGeneration/encoder.yaml b/GraphGeneration/encoder.yaml
index ed82350..b701a10 100644
--- a/GraphGeneration/encoder.yaml
+++ b/GraphGeneration/encoder.yaml
@@ -10,6 +10,7 @@ model:
     node2vec_min_count: 1 # Minimum number of occurrences for a node to be considered (Higher will ignore more rare nodes)
     node2vec_batch_words: 4 # The batch size for when Word2Vec is used (Higher will train faster; but with more memory)
     node2vec_workers: 1 # Number of workers (threads)
+  hidden_dim: 64
 
 training:
   batch_size: 64
@@ -17,3 +18,4 @@ training:
   epochs: 500
 
 dataset: CollegeMsg
+seed: 1024
diff --git a/GraphGeneration/scripts/compute_embedding.py b/GraphGeneration/scripts/compute_embedding.py
index 0e71d47..26dee0d 100644
--- a/GraphGeneration/scripts/compute_embedding.py
+++ b/GraphGeneration/scripts/compute_embedding.py
@@ -4,17 +4,11 @@
 import torch
 from node2vec import Node2Vec
 from GraphGeneration.models.temporal_gnn.script.config import args
+import yaml
 
-# Node2Vec Parameters
-node2vec_dimensions = args.nfeat  # We add features onto the end since Node2Vec doesn't embed features 
-node2vec_walk_length = 50  # Number of nodes visited per walk (Higher is more global, smaller is local)
-node2vec_num_walks = 10  # Number of walks to start per node (Higher is more detailed and stable)
-node2vec_p = 1.0  # Return parameter, the likelihood of revisiting a node (Higher is less backtracking)
-node2vec_q = 1.0  # The walk bias for determining direction (Higher is more DFS-like; lower is BFS-like)
-node2vec_window = 10  # The context size (Higher is broader learning)
-node2vec_min_count = 1  # Minimum number of occurrences for a node to be considered (Higher will ignore more rare nodes)
-node2vec_batch_words = 4  # The batch size for when Word2Vec is used (Higher will train faster; but with more memory)
-node2vec_workers = 1  # Number of workers (threads)
+# Load YAML config
+with open("GraphGeneration/encoder.yaml", "r") as file:
+    encoder_config = yaml.safe_load(file)
 
 def compute_linear_gnn_embeddings(G: nx.DiGraph, device):
     """
@@ -61,19 +55,19 @@ def compute_node2vec_embeddings(G: nx.DiGraph, device):
     """
     node2vec = Node2Vec(
         G,
-        dimensions=node2vec_dimensions,
-        walk_length=node2vec_walk_length,
-        num_walks=node2vec_num_walks,
-        workers=node2vec_workers,
-        p=node2vec_p,
-        q=node2vec_q,
+        dimensions=encoder_config["model"]["node2vec_setup"]["node2vec_setup"]["node2vec_dimensions"],
+        walk_length=encoder_config["model"]["node2vec_setup"]["node2vec_walk_length"],
+        num_walks=encoder_config["model"]["node2vec_setup"]["node2vec_num_walks"],
+        workers=encoder_config["model"]["node2vec_setup"]["node2vec_workers"],
+        p=encoder_config["model"]["node2vec_setup"]["node2vec_p"],
+        q=encoder_config["model"]["node2vec_setup"]["node2vec_q"],
         quiet=True
     )
     
     model = node2vec.fit(
-        window=node2vec_window, 
-        min_count=node2vec_min_count, 
-        batch_words=node2vec_batch_words
+        window=encoder_config["model"]["node2vec_setup"]["node2vec_window"], 
+        min_count=encoder_config["model"]["node2vec_setup"]["node2vec_min_count"], 
+        batch_words=encoder_config["model"]["node2vec_setup"]["node2vec_batch_words"]
     )  # Perform Node2Vec
 
     # Used to generate an embedding for isolated nodes
@@ -110,7 +104,8 @@ def compute_node_embeddings_LSTM(graph_snapshots, lstm_model, device):
     # Collect per-timestep node embeddings
     node_history = defaultdict(list)
     old_nodes = set()
-    null_embed = torch.tensor([0]*(node2vec_dimensions + node2vec_batch_words), dtype=torch.float32).to(device)
+    null_embed = torch.tensor([0]*(encoder_config["model"]["node2vec_setup"]["node2vec_dimensions"] + encoder_config["model"]["node2vec_setup"]["node2vec_batch_words"]),
+                              dtype=torch.float32).to(device)
     for G in graph_snapshots:
         snapshot_embeddings = compute_node2vec_embeddings(G, device)
         for node, emb in snapshot_embeddings.items():
@@ -149,7 +144,7 @@ def get_GCN_data(graph_snapshots):
     x_list = []
     edge_index_list = []
 
-    F = node2vec_dimensions # number of features per node (change this if you want more features)
+    F = encoder_config["model"]["node2vec_setup"]["node2vec_dimensions"] # number of features per node (change this if you want more features)
 
     for G in graph_snapshots:
         node2vec_embeddings = compute_node2vec_embeddings(G)
diff --git a/GraphGeneration/scripts/topoGED_end_to_end.py b/GraphGeneration/scripts/topoGED_end_to_end.py
index 18243ad..66a37ae 100644
--- a/GraphGeneration/scripts/topoGED_end_to_end.py
+++ b/GraphGeneration/scripts/topoGED_end_to_end.py
@@ -9,11 +9,13 @@
 import pandas as pd
 import os
 import sys
+import yaml
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 
 from GraphGeneration.utils.Evaluator import Evaluator
 from GraphGeneration.models.temporal_gnn.script.config import args
 from load_data import load_data, generate_training_data_cached, generate_validation_data_cached
+from GraphGeneration.utils.casting_type import to_tensor
 from GraphGeneration.utils.sampling_edges_utils import predict_edges
 from GraphGeneration.utils.graph_construction_utils import compute_reappearance_probabilities, get_node_features, update_degrees
 from create_sub_graphs import create_nn_graph, create_on_graph
@@ -22,18 +24,13 @@
 from GraphGeneration.models.model import setupMLP, load_encoder_model
 
 # Import all node embedding methods
-from compute_embedding import compute_embedding, node2vec_batch_words
+from compute_embedding import compute_embedding
 from process_data import modifyGraphIds, build_edgebanks_from_start
 from torch.utils.data import DataLoader
 
 # Import Loss fn
 from GraphGeneration.scripts.composite_graphlet_loss_fn import GraphletLoss
 
-# Set seeds
-global_seed = args.seed
-random.seed(global_seed)
-np.random.seed(global_seed) 
-
 # Set up device
 try:
     if torch.cuda.is_available():
@@ -46,9 +43,17 @@
     device = torch.device("cpu")
     print("Using CPU")  
 
+# Load YAML config
+with open("GraphGeneration/encoder.yaml", "r") as file:
+    encoder_config = yaml.safe_load(file)
+
+# Set seeds
+random.seed(encoder_config["seed"])
+np.random.seed(encoder_config["seed"]) 
+
 class Runner(object):
     def __init__(self):      
-        self.seed = global_seed
+        self.seed = encoder_config["seed"]
         
         # Set up Evaluator
         self.evaluator = Evaluator()
@@ -68,7 +73,8 @@ def __init__(self):
         self.all_edge_types = ['o-o-bank', 'o-o-nobank', 'o-n', 'n-n']
         
         # Load the global encoder & decoder model
-        self.encoder_model, input_dim = load_encoder_model(args, device=device, node2vec_dimensions=args.nfeat + node2vec_batch_words, hidden_dim=64)
+        self.encoder_model, input_dim = load_encoder_model(args, device=device, node2vec_dimensions=encoder_config["model"]["node2vec_setup"]["node2vec_dimensions"] + encoder_config["model"]["node2vec_setup"]["node2vec_batch_words"]
+                                                           , hidden_dim=encoder_config["model"]["hiden_dim"])
         self.link_prediction_decoder = setupMLP(embedding_dim=input_dim*2, embedding=args.embedding, mlpEncoding=args.mlpEncoding, embedOld=args.embedOld)
         self.link_prediction_decoder.to(device)
         
@@ -89,12 +95,12 @@ def __init__(self):
         # Convert number of snapshots to integer
         self.num_snapshots = len(self.probabilities)
         self.train_end = int(0.8 * self.num_snapshots)
-        val_end = int(0.9 * self.num_snapshots)
+        self.val_end = int(0.9 * self.num_snapshots)
 
         # Assign snapshots
         self.training_graphs = [self.target_graphs[i][-1] for i in range(self.train_end)]
-        self.validation_graphs = [self.target_graphs[i][-1] for i in range(self.train_end, val_end)]
-        self.test_graphs = [self.target_graphs[i][-1] for i in range(val_end, self.num_snapshots)]
+        self.validation_graphs = [self.target_graphs[i][-1] for i in range(self.train_end, self.val_end)]
+        self.test_graphs = [self.target_graphs[i][-1] for i in range(self.val_end, self.num_snapshots)]
 
     # ======================= TRAIN MODEL =======================
     def train_multi_head(self, training_samples, validation_samples, epochs=250, batch_size=64, training_new_edges_count=0):
@@ -137,11 +143,17 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                 "old_nodes": set(self.training_graphs[0].nodes()),
                 "new_nodes": set()
             } 
+            
             for snapshot in range(1, len(self.training_graphs)):
+                # Prepare current target old nodes for building graph
+                self.current_target_old_nodes = set().union(*[g.nodes() for g in self.training_graphs[:snapshot]])
+                
+                # Prepare the edge type counts for current target graph
                 self.current_target_count = {
                     edge_type: self.probabilities[snapshot][j + 2]
                     for j, edge_type in enumerate(self.all_edge_types)
                 }
+                
                 for flag in self.all_edge_types:
                     curr_X_train = training_samples[flag]['X'][snapshot]
                     curr_y_train = training_samples[flag]['y'][snapshot]
@@ -183,23 +195,29 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                             if n not in node_embeddings and flag in ['o-n', 'n-n']:
                                 node_types["new_nodes"].add(n)
                         
+                        # Assign embeddings for source nodes
+                        # Assign zero vectors for new nodes for on and nn edge type
                         src_embed = torch.stack([
                             node_embeddings[n] if n in node_embeddings and flag in ['o-n', 'n-n']
                             else torch.zeros(embed_dim, device=node_embeddings[any_node].device)
                             for n in src_nodes
                         ])
 
+                        # Assign embeddings for dest nodes
+                        # Assign zero vectors for new nodes for on and nn edge type
                         dst_embed = torch.stack([
                             node_embeddings[n] if n in node_embeddings and flag in ['o-n', 'n-n']
                             else torch.zeros(embed_dim, device=node_embeddings[any_node].device)
                             for n in dst_nodes
                         ])
 
+                        # Converting dim
                         if src_embed.dim() == 1:
                             src_embed = src_embed.unsqueeze(1)  
                         if dst_embed.dim() == 1:
                             dst_embed = dst_embed.unsqueeze(1) 
-                            
+                        
+                        # Get predictions for link
                         preds = self.link_prediction_decoder(src_embed=src_embed, dst_embed=dst_embed, edge_type=flag)
                         
                         if preds.dim() == 0:
@@ -218,7 +236,7 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                         train_preds.extend(preds.detach().cpu().numpy())
                         train_labels.extend(y.detach().cpu().numpy())
 
-                    # Assign embeddings for all the training_nodes
+                    # Constructing temp graph
                     curr_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=training_graphs, encoder_model=self.encoder_model, device=device)
                     sampled_edges = predict_edges(tmp_graph, edge_type=flag, node_types=node_types, edgebank=self.all_edgebanks[snapshot], link_prediction_decoder=self.link_prediction_decoder, 
                                 old_node_embeddings=curr_embeddings, top_k=self.current_target_count[flag], graph_num=snapshot, device=device)
@@ -235,7 +253,22 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                     
                     node_types["old_nodes"].update(self.training_graphs[snapshot].nodes())  # Add the old nodes
                     node_types["new_nodes"] = set() # reset new nodes
-                        
+                
+                # Compute the graphlet loss
+                # Step 1: Constructing the predicted graph
+                self.current_target_snapshot = snapshot
+                pred_graph, _ = self.build_accumulating_filtration_sequence_with_edgebank()
+                pred_graph = pred_graph[-1]
+                
+                # Step 2: Computing the graphlet loss
+                pred_kernel, true_kernel, distance = self.evaluator.evaluateOrca(pred_graph, self.target_graphs[snapshot])
+                graphlet_loss = graphlet_loss_fn(to_tensor(pred_kernel).unsqueeze(0), to_tensor(true_kernel).unsqueeze(0))
+                graphlet_loss.backward()
+                optimizer.step()
+                
+            
+            # Validation
+            
             for flag in self.all_edge_types:
                 if (epoch + 1) % 100 == 0 or epoch == 0:
                     epochMessage = f"Epoch {epoch+1:02d} | Edge Type: {flag} | Train Loss: {np.mean(train_loss[flag]):.4f} | Train AUCROC {np.mean(train_auc[flag]):.4f}"
@@ -263,16 +296,16 @@ def train_models(self):
         
         # Prepare training data
         training_sorted_samples, training_new_edges_count = generate_training_data_cached(training_graphs=self.training_graphs,
-                                                all_edgebanks=self.all_edgebanks, MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=global_seed, saved_data_file_path=self.saved_input)
+                                                all_edgebanks=self.all_edgebanks, MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=self.seed, saved_data_file_path=self.saved_input)
 
         # Prepare validation data
         # We pass all_edgebanks of the training snapshots edgebanks
         validation_sorted_samples, training_new_edges_count = generate_validation_data_cached(training_graphs=self.validation_graphs, old_training_nodes=old_training_nodes, 
-                                                all_edgebanks=self.all_edgebanks[self.train_end], MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=global_seed, type_data="validation", saved_data_file_path=self.saved_input)
+                                                all_edgebanks=self.all_edgebanks[self.train_end], MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=self.seed, type_data="validation", saved_data_file_path=self.saved_input)
         # Prepare test data
         # We pass all_edgebanks of the training snapshots edgebanks
         test_sorted_samples, training_new_edges_count = generate_validation_data_cached(training_graphs=self.test_graphs, old_training_nodes=old_training_nodes, 
-                                                all_edgebanks=self.all_edgebanks[self.train_end], MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=global_seed, type_data="test", saved_data_file_path=self.saved_input)
+                                                all_edgebanks=self.all_edgebanks[self.val_end], MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=self.seed, type_data="test", saved_data_file_path=self.saved_input)
         
         print('Training') 
     
@@ -304,6 +337,9 @@ def build_accumulating_filtration_sequence_with_edgebank(self):
         edgebank = self.all_edgebanks[self.current_target_snapshot]
         current_target_graph_description = self.graph_descriptions[self.current_target_snapshot]
         
+        # Prepare the graphs we have known
+        known_graphs = self.training_graphs[:self.current_target_snapshot]
+        
         V_total = int(current_target_graph_description[-1][0])
         E_total = int(current_target_graph_description[-1][1])
         W_total = current_target_graph_description[-1][2] 
@@ -345,7 +381,7 @@ def build_accumulating_filtration_sequence_with_edgebank(self):
         get_node_features(tmp_graph, self.thresholds, current_target_graph_description, old_nodes, new_nodes)  
 
         # Assign embeddings for all the training_nodes
-        curr_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=self.training_graphs, encoder_model=self.encoder_model, device=device)
+        curr_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=known_graphs, encoder_model=self.encoder_model, device=device)
         
         # Assign zero vector for new nodes
         for new_node in new_nodes:
@@ -362,7 +398,7 @@ def build_accumulating_filtration_sequence_with_edgebank(self):
         
             tmp_graph.add_edges_from(sampled_edges)
             update_degrees(tmp_graph)
-            new_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=self.training_graphs + [tmp_graph], encoder_model=self.encoder_model, device=device)
+            new_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=known_graphs + [tmp_graph], encoder_model=self.encoder_model, device=device)
             curr_embeddings.update(new_embeddings)  # Recompute old node embeddings
         
             edge_pool = edge_pool.extend(sampled_edges)

From 774b74857f30913a2c74d90294fe24e72136e9fb Mon Sep 17 00:00:00 2001
From: Duy Kha <duykha121314888@gmail.com>
Date: Thu, 31 Jul 2025 13:37:01 -0500
Subject: [PATCH 3/8] switch to using yaml

---
 GraphGeneration/encoder.yaml                  |  7 ++-
 GraphGeneration/models/model.py               | 28 ++-------
 GraphGeneration/scripts/compute_embedding.py  | 22 +++----
 GraphGeneration/scripts/load_data.py          | 18 +++---
 GraphGeneration/scripts/topoGED_end_to_end.py | 63 ++++++++++---------
 .../utils/graph_construction_utils.py         |  1 -
 6 files changed, 64 insertions(+), 75 deletions(-)

diff --git a/GraphGeneration/encoder.yaml b/GraphGeneration/encoder.yaml
index b701a10..9174ae8 100644
--- a/GraphGeneration/encoder.yaml
+++ b/GraphGeneration/encoder.yaml
@@ -1,4 +1,4 @@
-model:
+encoder_model:
   name: topoGED
   node2vec_setup:
     node2vec_dimensions: 64 # We add features onto the end since Node2Vec doesn't embed features
@@ -11,6 +11,11 @@ model:
     node2vec_batch_words: 4 # The batch size for when Word2Vec is used (Higher will train faster; but with more memory)
     node2vec_workers: 1 # Number of workers (threads)
   hidden_dim: 64
+  nodeEmbeddingType: "LSTM"
+  addOnFeature: "Position"
+
+decoder_model:
+  encode_links: "Concat"
 
 training:
   batch_size: 64
diff --git a/GraphGeneration/models/model.py b/GraphGeneration/models/model.py
index dc454d1..49a1391 100644
--- a/GraphGeneration/models/model.py
+++ b/GraphGeneration/models/model.py
@@ -1,14 +1,9 @@
 # Models in use
-import os
-import torch
 from GraphGeneration.models.MultiHeadedEdgePredictor import MultiHeadedEdgePredictor
-from GraphGeneration.models.EdgePredictor import EdgePredictorMLP
 from GraphGeneration.models.temporal_gnn.script.utils.util import logger
-from GraphGeneration.models.temporal_gnn.script.models.HTGN import HTGN
-from GraphGeneration.models.GCLSTM import GCLSTM
 from GraphGeneration.models.SimpleNodeLSTM import SimpleNodeLSTM
 
-def setupMLP(embedding_dim, embedding, mlpEncoding, embedOld):   
+def setupMLP(embedding_dim, mlpEncoding):   
     """
     Set up the MLP based on the arguments provided in the command line starter
     
@@ -21,28 +16,15 @@ def setupMLP(embedding_dim, embedding, mlpEncoding, embedOld):
     input_dim = embedding_dim  # Starting input dimension (two 32-dim node embeddings)
         
     # Set up the MLPs according to arguments
-    if embedOld == 'True':
-        flags = ['o-o-bank', 'o-o-nobank', 'o-n', 'n-n']
-    else:
-        flags = ['o-o-nobank', 'o-n', 'n-n']
+    flags = ['o-o-bank', 'o-o-nobank', 'o-n', 'n-n']
     mlp = MultiHeadedEdgePredictor(in_channels=input_dim, hidden_channels=32, edge_types=flags, input_type=mlpEncoding)
     
     return mlp
 
-def load_encoder_model(args, device, node2vec_dimensions, hidden_dim=64, HTGN_nodelist=[]):       
-    if args.embeddingType == 'LSTM':
+def load_encoder_model(encoder_config, device, node2vec_dimensions, hidden_dim=64, HTGN_nodelist=[]):       
+    if encoder_config["encoder_model"]["nodeEmbeddingType"] == 'LSTM':
         model = SimpleNodeLSTM(input_dim=node2vec_dimensions, hidden_dim=hidden_dim).to(device)
-    elif args.embeddingType == 'GCLSTM':
-        model = GCLSTM(in_channels=node2vec_dimensions, hidden_channels=64).to(device)
-        model.device = device
-    elif args.embeddingType == 'HTGN':
-        args.num_nodes = len(HTGN_nodelist)
-        args.nfeat = node2vec_dimensions
-        args.nhid = 64
-        args.nout = 64
-        model = HTGN(args).to(device)
-        model.device = device
     else:
         raise Exception('pls define the model')
-    logger.info('using model {} '.format(args.embeddingType))
+    logger.info('using model {} '.format(encoder_config["encoder_model"]["nodeEmbeddingType"]))
     return model, hidden_dim
diff --git a/GraphGeneration/scripts/compute_embedding.py b/GraphGeneration/scripts/compute_embedding.py
index b97248d..7b41e4a 100644
--- a/GraphGeneration/scripts/compute_embedding.py
+++ b/GraphGeneration/scripts/compute_embedding.py
@@ -56,19 +56,19 @@ def compute_node2vec_embeddings(G: nx.DiGraph, device):
     """
     node2vec = Node2Vec(
         G,
-        dimensions=encoder_config["model"]["node2vec_setup"]["node2vec_dimensions"],
-        walk_length=encoder_config["model"]["node2vec_setup"]["node2vec_walk_length"],
-        num_walks=encoder_config["model"]["node2vec_setup"]["node2vec_num_walks"],
-        workers=encoder_config["model"]["node2vec_setup"]["node2vec_workers"],
-        p=encoder_config["model"]["node2vec_setup"]["node2vec_p"],
-        q=encoder_config["model"]["node2vec_setup"]["node2vec_q"],
+        dimensions=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_dimensions"],
+        walk_length=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_walk_length"],
+        num_walks=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_num_walks"],
+        workers=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_workers"],
+        p=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_p"],
+        q=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_q"],
         quiet=True
     )
     
     model = node2vec.fit(
-        window=encoder_config["model"]["node2vec_setup"]["node2vec_window"], 
-        min_count=encoder_config["model"]["node2vec_setup"]["node2vec_min_count"], 
-        batch_words=encoder_config["model"]["node2vec_setup"]["node2vec_batch_words"]
+        window=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_window"], 
+        min_count=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_min_count"], 
+        batch_words=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_batch_words"]
     )  # Perform Node2Vec
 
     # Used to generate an embedding for isolated nodes
@@ -105,7 +105,7 @@ def compute_node_embeddings_LSTM(graph_snapshots, lstm_model, device):
     # Collect per-timestep node embeddings
     node_history = defaultdict(list)
     old_nodes = set()
-    null_embed = torch.tensor([0]*(encoder_config["model"]["node2vec_setup"]["node2vec_dimensions"]),
+    null_embed = torch.tensor([0]*(encoder_config["encoder_model"]["node2vec_setup"]["node2vec_dimensions"]),
                               dtype=torch.float32).to(device)
     for G in graph_snapshots:
         snapshot_embeddings = compute_node2vec_embeddings(G, device)
@@ -146,7 +146,7 @@ def get_GCN_data(graph_snapshots):
     x_list = []
     edge_index_list = []
 
-    F = encoder_config["model"]["node2vec_setup"]["node2vec_dimensions"] # number of features per node (change this if you want more features)
+    F = encoder_config["encoder_model"]["node2vec_setup"]["node2vec_dimensions"] # number of features per node (change this if you want more features)
 
     for G in graph_snapshots:
         node2vec_embeddings = compute_node2vec_embeddings(G)
diff --git a/GraphGeneration/scripts/load_data.py b/GraphGeneration/scripts/load_data.py
index 5bffaea..3042b7f 100644
--- a/GraphGeneration/scripts/load_data.py
+++ b/GraphGeneration/scripts/load_data.py
@@ -10,7 +10,7 @@
 from utils.loader import Loader
 
 
-def load_data(dataset, strategy, embedding, mlpEncoding, embedOld, trainingStyle, embeddingType):
+def load_data(dataset, embedding, mlpEncoding, embeddingType):
     my_loader = Loader()
     output_dir = os.path.abspath(f'data/input/cached/{dataset}')
     
@@ -19,14 +19,14 @@ def load_data(dataset, strategy, embedding, mlpEncoding, embedOld, trainingStyle
     cached_data_dataset_folder = os.path.join(output_dir, 'saved_data/')
 
     # Construct output evaluation csv
-    structure_pred_file_path = f'GraphGeneration/output/results/structure/{dataset}/model_gen_retrain_{strategy}_embedding{embedding}_mlpEncoding{mlpEncoding}_embedOld{embedOld}_trainingStyle{trainingStyle}_embeddingType{embeddingType}/structure_pred.csv'
-    structure_true_file_path = f'GraphGeneration/output/results/structure/{dataset}/model_gen_retrain_{strategy}_embedding{embedding}_mlpEncoding{mlpEncoding}_embedOld{embedOld}_trainingStyle{trainingStyle}_embeddingType{embeddingType}/structure_true.csv'
-    structure_diff_file_path = f'GraphGeneration/output/results/structure/{dataset}/model_gen_retrain_{strategy}_embedding{embedding}_mlpEncoding{mlpEncoding}_embedOld{embedOld}_trainingStyle{trainingStyle}_embeddingType{embeddingType}/structure_diff.csv'
-    kernel_pred_file_path = f'GraphGeneration/output/results/kernel/{dataset}/model_gen_retrain_{strategy}_embedding{embedding}_mlpEncoding{mlpEncoding}_embedOld{embedOld}_trainingStyle{trainingStyle}_embeddingType{embeddingType}/kernel_pred.csv'
-    kernel_true_file_path = f'GraphGeneration/output/results/kernel/{dataset}/model_gen_retrain_{strategy}_embedding{embedding}_mlpEncoding{mlpEncoding}_embedOld{embedOld}_trainingStyle{trainingStyle}_embeddingType{embeddingType}/kernel_true.csv'
-    edge_file_path = f'GraphGeneration/output/results/structure/{dataset}/model_gen_retrain_{strategy}_embedding{embedding}_mlpEncoding{mlpEncoding}_embedOld{embedOld}_trainingStyle{trainingStyle}_embeddingType{embeddingType}/edge_analysis.csv'
-    topER_file_path = f'GraphGeneration/output/results/topER/{dataset}/model_gen_retrain_{strategy}_embedding{embedding}_mlpEncoding{mlpEncoding}_embedOld{embedOld}_trainingStyle{trainingStyle}_embeddingType{embeddingType}/toper_diff.csv'
-    animation_path = f'GraphGeneration/output/results/animations/{dataset}/model_gen_retrain_{strategy}_embedding{embedding}_mlpEncoding{mlpEncoding}_embedOld{embedOld}_trainingStyle{trainingStyle}_embeddingType{embeddingType}/pred_vs_true.mp4'
+    structure_pred_file_path = f'GraphGeneration/output/results/structure/{dataset}/topoGED_embedding{embedding}_mlpEncoding{mlpEncoding}_embeddingType{embeddingType}/structure_pred.csv'
+    structure_true_file_path = f'GraphGeneration/output/results/structure/{dataset}/topoGED_embedding{embedding}_mlpEncoding{mlpEncoding}_embeddingType{embeddingType}/structure_true.csv'
+    structure_diff_file_path = f'GraphGeneration/output/results/structure/{dataset}/topoGED_embedding{embedding}_mlpEncoding{mlpEncoding}_embeddingType{embeddingType}/structure_diff.csv'
+    kernel_pred_file_path = f'GraphGeneration/output/results/kernel/{dataset}/topoGED_embedding{embedding}_mlpEncoding{mlpEncoding}_embeddingType{embeddingType}/kernel_pred.csv'
+    kernel_true_file_path = f'GraphGeneration/output/results/kernel/{dataset}/topoGED_embedding{embedding}_mlpEncoding{mlpEncoding}_embeddingType{embeddingType}/kernel_true.csv'
+    edge_file_path = f'GraphGeneration/output/results/structure/{dataset}/topoGED_embedding{embedding}_mlpEncoding{mlpEncoding}_embeddingType{embeddingType}/edge_analysis.csv'
+    topER_file_path = f'GraphGeneration/output/results/topER/{dataset}/topoGED_embedding{embedding}_mlpEncoding{mlpEncoding}_embeddingType{embeddingType}/toper_diff.csv'
+    animation_path = f'GraphGeneration/output/results/animations/{dataset}/topoGED_embedding{embedding}_mlpEncoding{mlpEncoding}_embeddingType{embeddingType}/pred_vs_true.mp4'
 
     # Create file paths if needed
     for path in [structure_pred_file_path, structure_true_file_path, structure_diff_file_path, kernel_pred_file_path, 
diff --git a/GraphGeneration/scripts/topoGED_end_to_end.py b/GraphGeneration/scripts/topoGED_end_to_end.py
index 30c81f5..7e0bea7 100644
--- a/GraphGeneration/scripts/topoGED_end_to_end.py
+++ b/GraphGeneration/scripts/topoGED_end_to_end.py
@@ -14,7 +14,6 @@
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 
 from GraphGeneration.utils.Evaluator import Evaluator
-from GraphGeneration.models.temporal_gnn.script.config import args
 from load_data import load_data, generate_training_data_cached, generate_validation_data_cached
 from GraphGeneration.utils.casting_type import to_tensor
 from GraphGeneration.utils.sampling_edges_utils import predict_edges
@@ -63,11 +62,11 @@ def __init__(self):
         
         # Some default file path
         self.file_visualization_path = "GraphGeneration/scripts/Visualize"
-        self.saved_input = os.path.abspath(f'data/input/cached/{args.dataset}/saved_data')
-        common_suffix = f"multiMLP_{args.strategy}_embedding{args.embedding}_mlpEncoding{args.mlpEncoding}_embeddingType{args.embeddingType}"
-        self.structure_dir = f"GraphGeneration/output/results/structure/{args.dataset}/{common_suffix}"
-        self.kernel_dir = f"GraphGeneration/output/results/kernel/{args.dataset}/{common_suffix}"
-        self.topER_dir = f"GraphGeneration/output/results/topER/{args.dataset}/{common_suffix}"
+        self.saved_input = os.path.abspath(f'data/input/cached/{encoder_config["dataset"]}/saved_data')
+        common_suffix = f'topoGED_embedding{encoder_config["encoder_model"]["addOnFeature"]}_mlpEncoding{encoder_config["decoder_model"]["encode_links"]}_embeddingType{encoder_config["encoder_model"]["nodeEmbeddingType"]}'
+        self.structure_dir = f'GraphGeneration/output/results/structure/{encoder_config["dataset"]}/{common_suffix}'
+        self.kernel_dir = f'GraphGeneration/output/results/kernel/{encoder_config["dataset"]}/{common_suffix}'
+        self.topER_dir = f'GraphGeneration/output/results/topER/{encoder_config["dataset"]}/{common_suffix}'
 
         # Current target snapshot we want to predict
         self.current_target_snapshot = 2
@@ -76,13 +75,19 @@ def __init__(self):
         self.all_edge_types = ['o-o-bank', 'o-o-nobank', 'o-n', 'n-n']
         
         # Load the global encoder & decoder model
-        self.encoder_model, input_dim = load_encoder_model(args, device=device, node2vec_dimensions=encoder_config["model"]["node2vec_setup"]["node2vec_dimensions"], 
-                                                           hidden_dim=encoder_config["model"]["hidden_dim"])
-        self.link_prediction_decoder = setupMLP(embedding_dim=input_dim*2, embedding=args.embedding, mlpEncoding=args.mlpEncoding, embedOld=args.embedOld)
+        self.encoder_model, input_dim = load_encoder_model(encoder_config, device=device, node2vec_dimensions=encoder_config["encoder_model"]["node2vec_setup"]["node2vec_dimensions"], 
+                                                           hidden_dim=encoder_config["encoder_model"]["hidden_dim"])
+        
+        # Check if there is any add-on features we will plug at the end of encoder embedding
+        if encoder_config["encoder_model"]["addOnFeature"] in ['NodeType', 'Position']:
+            input_dim += 1
+        
+        self.link_prediction_decoder = setupMLP(embedding_dim=input_dim*2, mlpEncoding=encoder_config["decoder_model"]["encode_links"])
         self.link_prediction_decoder.to(device)
         
         # Load all the snapshot true data 
-        self.probabilities, self.graph_descriptions, self.thresholds, self.target_graphs = load_data(args.dataset, args.strategy, args.embedding, args.mlpEncoding, args.embedOld, args.trainingStyle, args.embeddingType)
+        self.probabilities, self.graph_descriptions, self.thresholds, self.target_graphs = load_data(encoder_config["dataset"], encoder_config["encoder_model"]["addOnFeature"], 
+                                                                                                     encoder_config["decoder_model"]["encode_links"], encoder_config["encoder_model"]["nodeEmbeddingType"])
         
         # Modify the graph ids to 1,2,3,...
         self.target_graphs, _ = modifyGraphIds(self.target_graphs, self.thresholds)
@@ -106,7 +111,7 @@ def __init__(self):
         self.test_graphs = [self.target_graphs[i][-1] for i in range(self.val_end, self.num_snapshots)]
 
     # ======================= TRAIN MODEL =======================
-    def train_multi_head(self, training_samples, validation_samples, epochs=250, batch_size=64, training_new_edges_count=0):
+    def train_multi_head(self, training_samples, validation_samples, training_new_edges_count=0):
         """
         Train a MultiHeaded MLP Neural Network for use in edge predictions
         
@@ -114,19 +119,17 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
             model (MultiheadedMLP): The Multiheaded MLP to train now
             training_samples: The dictionary store the pos, neg edges of each snapshot, using for training
             validation_samples: The dictionary store the pos, neg edges of each snapshot, using for validation
-            epochs (int): The number of epochs to train for
-            batch_size (int): The batch size to use for the training data
         Returns:
             link_prediction_decoder (Multiheaded MLP): The trained MLP
         """
-        lr = args.lr
+        lr = encoder_config["training"]["lr"]
         self.link_prediction_decoder.train()
         optimizer = torch.optim.Adam(list(self.encoder_model.parameters()) + list(self.link_prediction_decoder.parameters()), lr=lr)
         loss_fn = nn.BCELoss()
         graphlet_loss_fn = GraphletLoss()
         
         # Train
-        for epoch in range(epochs):
+        for epoch in range(encoder_config["training"]["epochs"]):
             train_loss = {
                     'o-o-bank': [],
                     'o-o-nobank': [],
@@ -179,14 +182,14 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                     X_train_curr, curr_y_train = shuffle(curr_X_train, curr_y_train, random_state=self.seed)
                     temp_X_train = torch.tensor(X_train_curr, dtype=torch.float32).to(device)
                     temp_y_train = torch.tensor(curr_y_train, dtype=torch.float32).to(device)
-                    train_loader = DataLoader(TensorDataset(temp_X_train, temp_y_train), batch_size=batch_size, shuffle=True)
+                    train_loader = DataLoader(TensorDataset(temp_X_train, temp_y_train), batch_size=encoder_config["training"]["batch_size"], shuffle=True)
                     
                     # Training graphs for predicting current snapshot
                     training_graphs = self.training_graphs[:snapshot]
                     
                     for (x, y) in train_loader:
                         optimizer.zero_grad()
-                        node_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=training_graphs, encoder_model=self.encoder_model, device=device)
+                        node_embeddings = compute_embedding(embeddingType=encoder_config["encoder_model"]["nodeEmbeddingType"], graphs=training_graphs, encoder_model=self.encoder_model, device=device)
                         
                         # Get current embeddings
                         src_nodes = [int(n) for n in x[:, 0].tolist()]                
@@ -253,7 +256,7 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                         train_labels.extend(y.detach().cpu().numpy())
 
                     # Constructing temp graph
-                    curr_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=training_graphs, encoder_model=self.encoder_model, device=device)
+                    curr_embeddings = compute_embedding(embeddingType=encoder_config["encoder_model"]["nodeEmbeddingType"], graphs=training_graphs, encoder_model=self.encoder_model, device=device)
                     constructing_graph = get_node_features(constructing_graph.copy(), self.training_graphs[:snapshot], self.thresholds, self.graph_descriptions[snapshot], node_types["old_nodes"], node_types["new_nodes"])
                     sampled_edges = predict_edges(constructing_graph, edge_type=flag, node_types=node_types, edgebank=self.all_edgebanks[snapshot], link_prediction_decoder=self.link_prediction_decoder, 
                                 old_node_embeddings=curr_embeddings, top_k=self.current_target_count[flag], graph_num=snapshot, device=device)
@@ -275,7 +278,7 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                 if (epoch + 1) % 100 == 0 or epoch == 0:
                     epochMessage = f"Epoch {epoch+1:02d} | Edge Type: {flag} | Train Loss: {np.mean(train_loss[flag]):.4f} | Train AUCROC {np.mean(train_auc[flag]):.4f}"
                     print(epochMessage)
-                    with open(rf"{self.file_visualization_path}\{args.dataset}\{args.embeddingType}\multiheadMLP_performance.txt", "a") as f:
+                    with open(rf'{self.file_visualization_path}\{encoder_config["dataset"]}\{encoder_config["encoder_model"]["nodeEmbeddingType"]}\multiheadMLP_performance.txt', "a") as f:
                         f.write(epochMessage + "\n")
             
 
@@ -298,21 +301,21 @@ def train_models(self):
         
         # Prepare training data
         training_sorted_samples, training_new_edges_count = generate_training_data_cached(training_graphs=self.training_graphs,
-                                                all_edgebanks=self.all_edgebanks, MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=self.seed, saved_data_file_path=self.saved_input)
+                                                all_edgebanks=self.all_edgebanks, MAX_SAMPLES=MAX_SAMPLES, dataset=encoder_config["dataset"], seed=self.seed, saved_data_file_path=self.saved_input)
 
         # Prepare validation data
         # We pass all_edgebanks of the training snapshots edgebanks
         validation_sorted_samples, training_new_edges_count = generate_validation_data_cached(training_graphs=self.validation_graphs, old_training_nodes=old_training_nodes, 
-                                                all_edgebanks=self.all_edgebanks[self.train_end], MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=self.seed, type_data="validation", saved_data_file_path=self.saved_input)
+                                                all_edgebanks=self.all_edgebanks[self.train_end], MAX_SAMPLES=MAX_SAMPLES, dataset=encoder_config["dataset"], seed=self.seed, type_data="validation", saved_data_file_path=self.saved_input)
         # Prepare test data
         # We pass all_edgebanks of the training snapshots edgebanks
         test_sorted_samples, training_new_edges_count = generate_validation_data_cached(training_graphs=self.test_graphs, old_training_nodes=old_training_nodes, 
-                                                all_edgebanks=self.all_edgebanks[self.val_end], MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=self.seed, type_data="test", saved_data_file_path=self.saved_input)
+                                                all_edgebanks=self.all_edgebanks[self.val_end], MAX_SAMPLES=MAX_SAMPLES, dataset=encoder_config["dataset"], seed=self.seed, type_data="test", saved_data_file_path=self.saved_input)
         
         print('Training') 
     
         self.link_prediction_decoder = self.train_multi_head(training_samples=training_sorted_samples, validation_samples=validation_sorted_samples, 
-                                                                 epochs=500, batch_size=64, training_new_edges_count=training_new_edges_count)
+                                                            training_new_edges_count=training_new_edges_count)
         
         return self.link_prediction_decoder
             
@@ -376,7 +379,7 @@ def build_accumulating_filtration_sequence_with_edgebank(self, current_target_sn
         constructing_graph = get_node_features(constructing_graph, prev_graphs, self.thresholds, current_target_graph_description, old_nodes, new_nodes)  
 
         # Assign embeddings for all the training_nodes
-        curr_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=prev_graphs, encoder_model=self.encoder_model, device=device)
+        curr_embeddings = compute_embedding(embeddingType=encoder_config["encoder_model"]["nodeEmbeddingType"], graphs=prev_graphs, encoder_model=self.encoder_model, device=device)
         
         # Assign zero vector for new nodes
         for new_node in new_nodes:
@@ -393,7 +396,7 @@ def build_accumulating_filtration_sequence_with_edgebank(self, current_target_sn
         
             constructing_graph.add_edges_from(sampled_edges)
             update_degrees(constructing_graph)
-            new_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=prev_graphs + [constructing_graph], encoder_model=self.encoder_model, device=device)
+            new_embeddings = compute_embedding(embeddingType=encoder_config["encoder_model"]["nodeEmbeddingType"], graphs=prev_graphs + [constructing_graph], encoder_model=self.encoder_model, device=device)
             curr_embeddings.update(new_embeddings)  # Recompute old node embeddings
         
             edge_pool = edge_pool + sampled_edges
@@ -437,7 +440,7 @@ def evaluate(self, pred_graph, true_graph, node_types):
         
         on_kl_divergence_results = self.evaluator.kl_divergence_graphs(pred_on_graph, true_on_graph, mode="total")
 
-        with open(rf"{self.file_visualization_path}\{args.dataset}\{args.embeddingType}\kl_results_on.txt", "a") as f:
+        with open(rf'{self.file_visualization_path}\{encoder_config["dataset"]}\{encoder_config["encoder_model"]["nodeEmbeddingType"]}\kl_results_on.txt', "a") as f:
             f.write(f"{self.current_target_snapshot + 1}, {on_kl_divergence_results:.6f}\n")
             
         # Evaluate the graph of n-n 
@@ -446,7 +449,7 @@ def evaluate(self, pred_graph, true_graph, node_types):
 
         nn_kl_divergence_results = self.evaluator.kl_divergence_graphs(pred_nn_graph, true_nn_graph, mode="total")
 
-        with open(rf"{self.file_visualization_path}\{args.dataset}\{args.embeddingType}\kl_results_nn.txt", "a") as f:
+        with open(rf'{self.file_visualization_path}\{encoder_config["dataset"]}\{encoder_config["encoder_model"]["nodeEmbeddingType"]}\kl_results_nn.txt', "a") as f:
             f.write(f"{self.current_target_snapshot + 1}, {nn_kl_divergence_results:.6f}\n")
             
         # Evaluate the graph of old nodes
@@ -465,8 +468,8 @@ def evaluate(self, pred_graph, true_graph, node_types):
         pd.DataFrame([true_kernel]).to_csv(f"{self.kernel_dir}/kernel_true.csv", mode='a', header=False, index=False)
         
     def run(self):             
-        print("INFO: Dataset: {}".format(args.dataset))
-        encoder_model_path = os.path.join(self.saved_input, rf"saved_models/encoder_{args.embeddingType}_{self.seed}")
+        print("INFO: Dataset: {}".format(encoder_config["dataset"]))
+        encoder_model_path = os.path.join(self.saved_input, rf'saved_models/encoder_{encoder_config["encoder_model"]["nodeEmbeddingType"]}_{self.seed}')
         decoder_model_path = os.path.join(self.saved_input, rf"saved_data/decoder_MLP_{self.seed}")
 
         if os.path.exists(encoder_model_path) and os.path.exists(decoder_model_path):
@@ -528,4 +531,4 @@ def run(self):
     runner.run()
 
 # To run the script
-# python GraphGeneration/scripts/topoGED_end_to_end.py --embeddingType=LSTM --dataset=CollegeMsg --nfeat=64
\ No newline at end of file
+# python GraphGeneration/scripts/topoGED_end_to_end.py --embeddingType=LSTM 
\ No newline at end of file
diff --git a/GraphGeneration/utils/graph_construction_utils.py b/GraphGeneration/utils/graph_construction_utils.py
index 8c6513a..de53d54 100644
--- a/GraphGeneration/utils/graph_construction_utils.py
+++ b/GraphGeneration/utils/graph_construction_utils.py
@@ -131,7 +131,6 @@ def update_degrees(graph: nx.DiGraph):
         if 'feat' not in graph.nodes[node]:
             graph.nodes[node]['feat'] = {'id': node}  
             graph.nodes[node]['feat']['currDegree'] = 0
-            graph.nodes[node]['feat']['maxDegree'] = assigned_degree
         else:
             graph.nodes[node]['feat']['currDegree'] = graph.degree(node)
      
\ No newline at end of file

From ca05d64dcfe256bd399ddaa9dff7afb1c8dfc02e Mon Sep 17 00:00:00 2001
From: Duy Kha <duykha121314888@gmail.com>
Date: Thu, 31 Jul 2025 13:46:55 -0500
Subject: [PATCH 4/8] remove args

---
 GraphGeneration/models/temporal_gnn/script/config.py | 8 --------
 GraphGeneration/scripts/topoGED_end_to_end.py        | 2 +-
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/GraphGeneration/models/temporal_gnn/script/config.py b/GraphGeneration/models/temporal_gnn/script/config.py
index 174e3a0..8bb01f6 100644
--- a/GraphGeneration/models/temporal_gnn/script/config.py
+++ b/GraphGeneration/models/temporal_gnn/script/config.py
@@ -50,14 +50,6 @@
 parser.add_argument('--fixed_curvature', type=int, default=1, help='fixed (1) curvature or not (0)')
 parser.add_argument('--aggregation', type=str, default='deg', help='aggregation method: [deg, att]')
 
-parser.add_argument("--strategy", type=str, required=False, default='MultiheadedMLP', choices=['MultiheadedMLP', 'SingleMLP'], help="The type of MLP NN to use")
-parser.add_argument("--embedding", type=str, required=False, default='Position', choices=['Position', 'NodeType', 'Position+NodeType', 'None'], help="Allows appending positional encodings or an integer node type onto the end of the embeddings")
-parser.add_argument("--mlpEncoding", type=str, required=False, default='Concat', choices=['Concat', 'Product', 'Addition', 'Subtraction'], help="How you want to input node embeddings to the MLP")  # Product and addition lead to potential noise as we use directed graphs
-parser.add_argument("--embedOld", type=str, required=False, default='True', choices=['True', 'False'], help="If you want to let the MLP predict edge type \'o-o-bank\', otherwise these edges are randomly added")
-parser.add_argument("--oldDegree", type=str, required=False, default='False' ,choices=['True', 'False'], help="If you want reappearing nodes to reuse their most recent degree")
-parser.add_argument("--trainingStyle", type=str, required=False, default='TrueGraphs', choices=['TrueGraphs', 'PredGraphs', 'MixedGraphs'], help="When training the MLP, decides if you use real graphs, predicted graphs (with first real as starter), or real then pred for MLP training")
-parser.add_argument("--embeddingType", type=str, required=False, default='Node2Vec', choices=['Linear', 'Node2Vec', 'LSTM', 'GCLSTM', 'HTGN'], help="How nodes should be embedded. Either with Node2Vec or with a Linear mutliplication of adjacency matrix by node feature matrix")
-
 # TopoGED mode
 parser.add_argument('--use_predict_probs', action='store_true', help='Use prediction probabilities to predict the next snapshot')
 parser.add_argument('--use_predict_graph_prediction', action='store_true', help='Use prediction graph description to predict the next snapshot')
diff --git a/GraphGeneration/scripts/topoGED_end_to_end.py b/GraphGeneration/scripts/topoGED_end_to_end.py
index 7e0bea7..6967473 100644
--- a/GraphGeneration/scripts/topoGED_end_to_end.py
+++ b/GraphGeneration/scripts/topoGED_end_to_end.py
@@ -531,4 +531,4 @@ def run(self):
     runner.run()
 
 # To run the script
-# python GraphGeneration/scripts/topoGED_end_to_end.py --embeddingType=LSTM 
\ No newline at end of file
+# python GraphGeneration/scripts/topoGED_end_to_end.py 
\ No newline at end of file

From fdc405279cb9456a88c9f7f285ba220863e9df8b Mon Sep 17 00:00:00 2001
From: Duy Kha <duykha121314888@gmail.com>
Date: Fri, 1 Aug 2025 21:42:51 -0500
Subject: [PATCH 5/8] comments

---
 GraphGeneration/encoder.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GraphGeneration/encoder.yaml b/GraphGeneration/encoder.yaml
index 9174ae8..53ab881 100644
--- a/GraphGeneration/encoder.yaml
+++ b/GraphGeneration/encoder.yaml
@@ -18,7 +18,7 @@ decoder_model:
   encode_links: "Concat"
 
 training:
-  batch_size: 64
+  batch_size: 64 # play around
   lr: 0.001
   epochs: 500
 

From 6bf6325a6756d394b0dbe61d4a2e392d6bb86a84 Mon Sep 17 00:00:00 2001
From: Duy Kha <duykha121314888@gmail.com>
Date: Mon, 4 Aug 2025 00:16:59 -0500
Subject: [PATCH 6/8] draft

---
 GraphGeneration/scripts/load_data.py          |  12 +-
 GraphGeneration/scripts/topoGED_end_to_end.py | 184 ++++++++++++++++--
 2 files changed, 176 insertions(+), 20 deletions(-)

diff --git a/GraphGeneration/scripts/load_data.py b/GraphGeneration/scripts/load_data.py
index 5bffaea..1a86e55 100644
--- a/GraphGeneration/scripts/load_data.py
+++ b/GraphGeneration/scripts/load_data.py
@@ -282,10 +282,14 @@ def generate_validation_data(training_graphs, old_training_nodes, all_edgebanks,
                 print(f"[FATAL] Unexpected failure at outer loop for edge ({u}, {v}): {type(e).__name__} - {e}")
             
         # Generate an equal amount of negative labels for each type of edge
-        negative_edges_oo = generate_negative_edges(graph, new_edges_count['o-o-bank'], edge_type='o-o-bank', edgebank=all_edgebanks, old_nodes=old_training_nodes)
-        negative_edges_oon = generate_negative_edges(graph, new_edges_count['o-o-nobank'], edge_type='o-o-nobank', edgebank=all_edgebanks, old_nodes=old_training_nodes)
-        negative_edges_on = generate_negative_edges(graph, new_edges_count['o-n'], edge_type='o-n', edgebank=all_edgebanks, old_nodes=old_training_nodes)
-        negative_edges_nn = generate_negative_edges(graph, new_edges_count['n-n'], edge_type='n-n', edgebank=all_edgebanks, old_nodes=old_training_nodes)
+        negative_edges_oo = generate_negative_edges(graph, new_edges_count['o-o-bank'], edge_type='o-o-bank', 
+                                                    edgebank=all_edgebanks, old_nodes=old_training_nodes)
+        negative_edges_oon = generate_negative_edges(graph, new_edges_count['o-o-nobank'], edge_type='o-o-nobank', 
+                                                     edgebank=all_edgebanks, old_nodes=old_training_nodes)
+        negative_edges_on = generate_negative_edges(graph, new_edges_count['o-n'], edge_type='o-n', 
+                                                    edgebank=all_edgebanks, old_nodes=old_training_nodes)
+        negative_edges_nn = generate_negative_edges(graph, new_edges_count['n-n'], edge_type='n-n', 
+                                                    edgebank=all_edgebanks, old_nodes=old_training_nodes)
 
         tmp_samples_oo = [torch.tensor([u, v]) for u, v in negative_edges_oo]
         tmp_samples_oon = [torch.tensor([u, v]) for u, v in negative_edges_oon]
diff --git a/GraphGeneration/scripts/topoGED_end_to_end.py b/GraphGeneration/scripts/topoGED_end_to_end.py
index 8e7941d..afdc99a 100644
--- a/GraphGeneration/scripts/topoGED_end_to_end.py
+++ b/GraphGeneration/scripts/topoGED_end_to_end.py
@@ -24,7 +24,7 @@
 from GraphGeneration.models.model import setupMLP, load_encoder_model
 
 # Import all node embedding methods
-from compute_embedding import compute_embedding, node2vec_batch_words
+from compute_embedding import compute_embedding
 from process_data import modifyGraphIds, build_edgebanks_from_start
 from torch.utils.data import DataLoader
 
@@ -51,6 +51,7 @@
 class Runner(object):
     def __init__(self):      
         self.seed = global_seed
+        self.best_validation_model_auc = 0
         
         # Set up Evaluator
         self.evaluator = Evaluator()
@@ -75,6 +76,7 @@ def __init__(self):
         # Check if there is any add-on features we will plug at the end of encoder embedding
         if args.embedding in ['NodeType', 'Position']:
             input_dim += 1
+            args.nfeat += 1
              
         self.link_prediction_decoder = setupMLP(embedding_dim=input_dim*2, embedding=args.embedding, mlpEncoding=args.mlpEncoding, embedOld=args.embedOld)
         self.link_prediction_decoder.to(device)
@@ -96,14 +98,155 @@ def __init__(self):
         # Convert number of snapshots to integer
         self.num_snapshots = len(self.probabilities)
         self.train_end = int(0.8 * self.num_snapshots)
-        val_end = int(0.9 * self.num_snapshots)
+        self.val_end = int(0.9 * self.num_snapshots)
 
         # Assign snapshots
         self.training_graphs = [self.target_graphs[i][-1] for i in range(self.train_end)]
-        self.validation_graphs = [self.target_graphs[i][-1] for i in range(self.train_end, val_end)]
-        self.test_graphs = [self.target_graphs[i][-1] for i in range(val_end, self.num_snapshots)]
+        self.validation_graphs = [self.target_graphs[i][-1] for i in range(self.train_end, self.val_end)]
+        self.test_graphs = [self.target_graphs[i][-1] for i in range(self.val_end, self.num_snapshots)]
 
     # ======================= TRAIN MODEL =======================
+    def run_validation(self, validation_samples, batch_size, epoch):
+        train_auc = {
+                'o-o-bank': [],
+                'o-o-nobank': [],
+                'o-n': [],
+                'n-n': [],
+            }
+        # For computing AUC Scores
+        train_preds = []
+        train_labels = []
+        
+        for i in range(1):
+            snapshot = i + len(self.training_graphs) + 1
+            self.encoder_model.eval()
+            self.link_prediction_decoder.eval()
+            with torch.no_grad():
+                print("INFO: Validation on snapshot", snapshot)
+                
+                node_types = { 
+                    "old_nodes": set().union(*(graph.nodes() for graph in self.training_graphs)),
+                    "new_nodes": set()
+                } 
+                
+                # Prepare current target graph count
+                self.current_target_count_old_nodes = self.probabilities[snapshot][0]
+                self.current_target_count_new_nodes = self.probabilities[snapshot][1]
+                self.current_target_count = {
+                    edge_type: self.probabilities[snapshot][j + 2]
+                    for j, edge_type in enumerate(self.all_edge_types)
+                }
+                
+                constructing_graph = nx.DiGraph() # Graph we try to predict
+                    
+                # Adding old nodes to constructing_graph
+                constructing_graph.add_nodes_from(node_types['old_nodes'])
+                
+                for flag in self.all_edge_types:
+                    curr_X_train = validation_samples[flag]['X'][i]
+                    curr_y_train = validation_samples[flag]['y'][i]
+                    
+                    if len(curr_X_train) == 0 or len(curr_y_train) == 0:
+                        print(f'No samples for edge type: {flag}')
+                        continue
+                    
+                    curr_X_train = [x.cpu().detach().numpy() if torch.is_tensor(x) else x for x in curr_X_train]
+                    curr_X_train = np.array(curr_X_train)
+                    curr_y_train = np.array(curr_y_train)
+
+                    X_train_curr, curr_y_train = shuffle(curr_X_train, curr_y_train, random_state=self.seed)
+                    temp_X_train = torch.tensor(X_train_curr, dtype=torch.float32).to(device)
+                    temp_y_train = torch.tensor(curr_y_train, dtype=torch.float32).to(device)
+                    train_loader = DataLoader(TensorDataset(temp_X_train, temp_y_train), batch_size=batch_size, shuffle=True)
+                    
+                    # Training graphs for predicting current snapshot
+                    validation_graphs = self.training_graphs
+                    
+                    for (x, y) in train_loader:
+                        node_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=validation_graphs, encoder_model=self.encoder_model, device=device)
+                        
+                        # Get current embeddings
+                        src_nodes = [int(n) for n in x[:, 0].tolist()]                
+                        dst_nodes = [int(n) for n in x[:, 1].tolist()]
+                        
+                        # Add new nodes to the node_types
+                        for n in src_nodes:
+                            if n not in node_embeddings and flag in ['o-n', 'n-n']:
+                                node_types["new_nodes"].add(n)
+                                constructing_graph.add_node(n)
+                                node_embeddings[n] = torch.zeros(args.nfeat, device=device)
+                                
+                        for n in dst_nodes:
+                            if n not in node_embeddings and flag in ['o-n', 'n-n']:
+                                node_types["new_nodes"].add(n)
+                                constructing_graph.add_node(n)
+                                node_embeddings[n] = torch.zeros(args.nfeat, device=device)
+                        
+                        src_embed = torch.stack([
+                            node_embeddings[n] for n in src_nodes
+                        ])
+
+                        dst_embed = torch.stack([
+                            node_embeddings[n] for n in dst_nodes
+                        ])
+
+                        if src_embed.dim() == 1:
+                            src_embed = src_embed.unsqueeze(1)  
+                        if dst_embed.dim() == 1:
+                            dst_embed = dst_embed.unsqueeze(1) 
+                        
+                        preds = self.link_prediction_decoder(src_embed=src_embed, dst_embed=dst_embed, edge_type=flag)
+                        
+                        if preds.dim() == 0:
+                            preds = preds.unsqueeze(0)
+                        if y.dim() == 0:  # scalar value like torch.tensor(0.5)
+                            y = y.unsqueeze(0)  # make it [1]
+                        elif y.dim() == 2 and y.size(1) == 1:  # shape [batch_size, 1]
+                            y = y.view(-1)
+                                                
+                        # Add to our labels for evaluation
+                        train_preds.extend(preds.detach().cpu().numpy())
+                        train_labels.extend(y.detach().cpu().numpy())
+
+                    # Assign embeddings for all the training_nodes
+                    curr_embeddings = compute_embedding(embeddingType=args.embeddingType, graphs=validation_graphs, encoder_model=self.encoder_model, device=device)
+                    constructing_graph = get_node_features(constructing_graph.copy(), self.training_graphs, self.thresholds, self.graph_descriptions[snapshot], node_types["old_nodes"], node_types["new_nodes"])
+                    sampled_edges = predict_edges(constructing_graph, edge_type=flag, node_types=node_types, edgebank=self.all_edgebanks[snapshot], link_prediction_decoder=self.link_prediction_decoder, 
+                                old_node_embeddings=curr_embeddings, top_k=self.current_target_count[flag], graph_num=snapshot, device=device)
+                    constructing_graph.add_edges_from(list(sampled_edges))
+                    update_degrees(constructing_graph)
+                    
+                    # Update the training_graphs to involve with the constructing graph
+                    if flag == 'o-o-nobank':
+                        validation_graphs.append(constructing_graph)
+                    else:
+                        validation_graphs[-1] = constructing_graph 
+                    
+                    if len(np.unique(train_labels)) < 2:
+                        train_auc.append(0)
+                    else:
+                        train_auc[flag].append(roc_auc_score(train_labels, train_preds))  # Calculate scores
+        
+        # Record the Training Loss, AUC 
+        current_model_auc = 0 #we take average of all edge types
+        
+        for flag in self.all_edge_types:
+            epochMessage = f"Epoch {epoch+1:02d} | Edge Type: {flag}  | Validation AUCROC {np.mean(train_auc[flag]):.4f}"
+            current_model_auc += np.mean(train_auc[flag])
+            print(epochMessage)
+            with open(rf"{self.file_visualization_path}\{args.dataset}\{args.embeddingType}\multiheadMLP_performance.txt", "a") as f:
+                f.write(epochMessage + "\n")
+                
+        # We check and cache if it has the best auc
+        if current_model_auc/4 >= self.best_validation_model_auc:
+            self.best_validation_model_auc = current_model_auc
+            
+            print("INFO: Saving the model...")
+            torch.save(self.link_prediction_decoder.state_dict(), self.model_path)
+            torch.save(self.encoder_model.state_dict(), self.model_path)
+            print("INFO: The model is saved. Done.")
+            
+    
     def train_multi_head(self, training_samples, validation_samples, epochs=250, batch_size=64, training_new_edges_count=0):
         """
         Train a MultiHeaded MLP Neural Network for use in edge predictions
@@ -141,11 +284,11 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
             train_preds = []
             train_labels = []
             
-            for snapshot in range(2, len(self.training_graphs)):
+            for snapshot in range(2, 3):
                 print("INFO: Training on snapshot", snapshot)
                 
                 node_types = { 
-                    "old_nodes": set().union(*(graph.nodes() for graph in self.training_graphs[:snapshot])),
+                    "old_nodes": set().union(*(graph.nodes() for graph in self.training_graphs[max(0, snapshot - 5):snapshot])),
                     "new_nodes": set()
                 } 
                 
@@ -180,7 +323,7 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                     train_loader = DataLoader(TensorDataset(temp_X_train, temp_y_train), batch_size=batch_size, shuffle=True)
                     
                     # Training graphs for predicting current snapshot
-                    training_graphs = self.training_graphs[:snapshot]
+                    training_graphs = self.training_graphs[max(0, snapshot - 5):snapshot]
                     
                     for (x, y) in train_loader:
                         optimizer.zero_grad()
@@ -235,7 +378,6 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                         graphlet_loss = graphlet_loss_fn(to_tensor(pred_kernel, device=device).unsqueeze(0), to_tensor(true_kernel, device=device).unsqueeze(0))
                         
                         loss = 0.5*loss_fn(preds, y) + 0.5*graphlet_loss
-                        # loss = loss_fn(preds, y)
                         loss.backward()
                         optimizer.step()
                         train_loss[flag].append(loss.item())
@@ -253,7 +395,7 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                     update_degrees(constructing_graph)
                     
                     # Update the training_graphs to involve with the constructing graph
-                    if len(training_graphs) == snapshot:
+                    if flag == 'o-o-nobank':
                         training_graphs.append(constructing_graph)
                     else:
                         training_graphs[-1] = constructing_graph 
@@ -263,6 +405,10 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                     else:
                         train_auc[flag].append(roc_auc_score(train_labels, train_preds))  # Calculate scores
                         
+            # Validation
+            self.run_validation(validation_samples=validation_samples, batch_size=batch_size, epoch=epoch)
+            
+            # Record the Training Loss, AUC 
             for flag in self.all_edge_types:
                 if (epoch + 1) % 100 == 0 or epoch == 0:
                     epochMessage = f"Epoch {epoch+1:02d} | Edge Type: {flag} | Train Loss: {np.mean(train_loss[flag]):.4f} | Train AUCROC {np.mean(train_auc[flag]):.4f}"
@@ -271,7 +417,7 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
                         f.write(epochMessage + "\n")
             
 
-        return self.link_prediction_decoder
+        return self.link_prediction_decoder, self.encoder_model
 
     def train_models(self):
         """
@@ -290,23 +436,29 @@ def train_models(self):
         
         # Prepare training data
         training_sorted_samples, training_new_edges_count = generate_training_data_cached(training_graphs=self.training_graphs,
-                                                all_edgebanks=self.all_edgebanks, MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=global_seed, saved_data_file_path=self.saved_input)
+                                                all_edgebanks=self.all_edgebanks, MAX_SAMPLES=MAX_SAMPLES, 
+                                                dataset=args.dataset, seed=global_seed, 
+                                                saved_data_file_path=self.saved_input)
 
         # Prepare validation data
         # We pass all_edgebanks of the training snapshots edgebanks
         validation_sorted_samples, training_new_edges_count = generate_validation_data_cached(training_graphs=self.validation_graphs, old_training_nodes=old_training_nodes, 
-                                                all_edgebanks=self.all_edgebanks[self.train_end], MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=global_seed, type_data="validation", saved_data_file_path=self.saved_input)
+                                                all_edgebanks=self.all_edgebanks[self.train_end], MAX_SAMPLES=MAX_SAMPLES, 
+                                                dataset=args.dataset, seed=global_seed, 
+                                                type_data="validation", saved_data_file_path=self.saved_input)
         # Prepare test data
         # We pass all_edgebanks of the training snapshots edgebanks
         test_sorted_samples, training_new_edges_count = generate_validation_data_cached(training_graphs=self.test_graphs, old_training_nodes=old_training_nodes, 
-                                                all_edgebanks=self.all_edgebanks[self.train_end], MAX_SAMPLES=MAX_SAMPLES, dataset=args.dataset, seed=global_seed, type_data="test", saved_data_file_path=self.saved_input)
+                                                all_edgebanks=self.all_edgebanks[self.val_end], MAX_SAMPLES=MAX_SAMPLES, 
+                                                dataset=args.dataset, seed=global_seed, 
+                                                type_data="test", saved_data_file_path=self.saved_input)
         
         print('Training') 
     
         self.link_prediction_decoder = self.train_multi_head(training_samples=training_sorted_samples, validation_samples=validation_sorted_samples, 
                                                                  epochs=500, batch_size=64, training_new_edges_count=training_new_edges_count)
         
-        return self.link_prediction_decoder
+        return self.link_prediction_decoder, self.encoder_model
             
     # ======================= BUILD GRAPH =======================
     def build_accumulating_filtration_sequence_with_edgebank(self, current_target_snapshot):
@@ -330,7 +482,7 @@ def build_accumulating_filtration_sequence_with_edgebank(self, current_target_sn
         # Get the edgebank up to the current target snapshot
         edgebank = self.all_edgebanks[current_target_snapshot]
         current_target_graph_description = self.graph_descriptions[current_target_snapshot]
-        prev_graphs = [graph[-1] for graph in self.target_graphs[:current_target_snapshot]]
+        prev_graphs = [graph[-1] for graph in self.target_graphs[max(0, current_target_snapshot - 5):current_target_snapshot]]
         
         V_total = int(current_target_graph_description[-1][0])
         E_total = int(current_target_graph_description[-1][1])
@@ -472,7 +624,7 @@ def run(self):
         else:
             # Train the Decoder and Encoder model
             print('Training the Link Prediction Decoder and Encoder')
-            self.link_prediction_decoder = self.train_models()
+            self.link_prediction_decoder, self.encoder_model = self.train_models()
             print('Finished training the Link Prediction Decoder and Encoder; Start Graph Construction')
             
             # saving the trained model

From a425fc932a89ccaa7fc3bce7dd403e7c4a959ee4 Mon Sep 17 00:00:00 2001
From: Duy Kha <duykha121314888@gmail.com>
Date: Tue, 5 Aug 2025 15:58:01 -0500
Subject: [PATCH 7/8] rewrite loading model message + add  gpu memory alloc

---
 GraphGeneration/scripts/topoGED_end_to_end.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/GraphGeneration/scripts/topoGED_end_to_end.py b/GraphGeneration/scripts/topoGED_end_to_end.py
index 0cbacf9..5365140 100644
--- a/GraphGeneration/scripts/topoGED_end_to_end.py
+++ b/GraphGeneration/scripts/topoGED_end_to_end.py
@@ -255,7 +255,8 @@ def run_validation(self, validation_samples, batch_size, epoch):
         current_model_auc = 0 #we take average of all edge types
         
         for flag in self.all_edge_types:
-            epochMessage = f"Epoch {epoch+1:02d} | Edge Type: {flag}  | Validation AUCROC {np.mean(train_auc[flag]):.4f}"
+            gpu_mem_alloc = torch.cuda.max_memory_allocated() / 1000000 if torch.cuda.is_available() else 0
+            epochMessage = f"Epoch {epoch+1:02d} | Edge Type: {flag}  | Validation AUCROC {np.mean(train_auc[flag]):.4f} | GPU: {gpu_mem_alloc:.1f}MiB"
             current_model_auc += np.mean(train_auc[flag])
             print(epochMessage)
             with open(rf'{self.file_visualization_path}\{encoder_config["dataset"]}\{encoder_config["encoder_model"]["nodeEmbeddingType"]}\multiheadMLP_performance.txt', "a") as f:
@@ -306,7 +307,7 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
             train_preds = []
             train_labels = []
             
-            for snapshot in range(2, 15):
+            for snapshot in range(2, 16):
                 print("INFO: Training on snapshot", snapshot)
                 
                 # Prepare current target graph count
@@ -437,9 +438,10 @@ def train_multi_head(self, training_samples, validation_samples, epochs=250, bat
             self.run_validation(validation_samples=validation_samples, batch_size=encoder_config["training"]["batch_size"], epoch=epoch)
             
             # Record the Training Loss, AUC 
+            gpu_mem_alloc = torch.cuda.max_memory_allocated() / 1000000 if torch.cuda.is_available() else 0
             for flag in self.all_edge_types:
                 if (epoch + 1) % 20 == 0 or epoch == 0:
-                    epochMessage = f"Epoch {epoch+1:02d} | Edge Type: {flag} | Train Loss: {np.mean(train_loss[flag]):.4f} | Train AUCROC {np.mean(train_auc[flag]):.4f}"
+                    epochMessage = f"Epoch {epoch+1:02d} | Edge Type: {flag} | Train Loss: {np.mean(train_loss[flag]):.4f} | Train AUCROC {np.mean(train_auc[flag]):.4f} | GPU: {gpu_mem_alloc:.1f}MiB"
                     print(epochMessage)
                     with open(rf'{self.file_visualization_path}\{encoder_config["dataset"]}\{encoder_config["encoder_model"]["nodeEmbeddingType"]}\multiheadMLP_performance.txt', "a") as f:
                         f.write(epochMessage + "\n")
@@ -644,8 +646,8 @@ def run(self):
             
             self.link_prediction_decoder.eval()
             self.encoder_model.eval()
-            print(f"✅ Link Prediction Decoder loaded from: {decoder_model_path}")
-            print(f"✅ Ecoder loaded from: {encoder_model_path}")
+            print(f"Link Prediction Decoder loaded from: {decoder_model_path}")
+            print(f"Encoder loaded from: {encoder_model_path}")
         else:
             # Train the Decoder and Encoder model
             print('Training the Link Prediction Decoder and Encoder')

From 699dca7e1a936cef14ee5237b4dd779614316efa Mon Sep 17 00:00:00 2001
From: Duy Kha <duykha121314888@gmail.com>
Date: Tue, 5 Aug 2025 15:59:08 -0500
Subject: [PATCH 8/8] remove saving model after train_models

---
 GraphGeneration/scripts/topoGED_end_to_end.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/GraphGeneration/scripts/topoGED_end_to_end.py b/GraphGeneration/scripts/topoGED_end_to_end.py
index 5365140..d3bbedb 100644
--- a/GraphGeneration/scripts/topoGED_end_to_end.py
+++ b/GraphGeneration/scripts/topoGED_end_to_end.py
@@ -653,12 +653,6 @@ def run(self):
             print('Training the Link Prediction Decoder and Encoder')
             self.link_prediction_decoder, self.encoder_model = self.train_models()
             print('Finished training the Link Prediction Decoder and Encoder; Start Graph Construction')
-            
-            # saving the trained model
-            print("INFO: Saving the model...")
-            torch.save(self.link_prediction_decoder.state_dict(), self.model_path)
-            torch.save(self.encoder_model.state_dict(), self.model_path)
-            print("INFO: The model is saved. Done.")
        
         # Old graphs that we know up to now
         self.old_graphs = [self.target_graphs[0], self.target_graphs[1]]