From 14839e97d976f62bb38226e3e68e91dd1a68b3bc Mon Sep 17 00:00:00 2001
From: GuillaumePELLUET <guillaume@graybx.com>
Date: Wed, 24 Jun 2026 17:50:11 +0200
Subject: [PATCH 1/5] Fix dataframe issue with index

---
 weightslab/utils/tools.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/weightslab/utils/tools.py b/weightslab/utils/tools.py
index 4e8fa025..b2442f95 100644
--- a/weightslab/utils/tools.py
+++ b/weightslab/utils/tools.py
@@ -34,10 +34,12 @@ def safe_reset_index(df: "pd.DataFrame") -> "pd.DataFrame":
     """
     import pandas as _pd
     if not isinstance(df, _pd.DataFrame) or not isinstance(df.index, _pd.MultiIndex):
-        # Single-level index: only reset if the name isn't already a column.
-        if df.index.name and df.index.name in df.columns:
-            return df
-        return df.reset_index()
+        # Single-level index: only promote if the name is meaningful and missing.
+        # Unnamed index (None) has nothing to promote; drop it to avoid inserting
+        # a spurious 'index' / 'level_0' column when one already exists.
+        if df.index.name and df.index.name not in df.columns:
+            return df.reset_index()
+        return df.reset_index(drop=True)
     missing = [n for n in df.index.names if n and n not in df.columns]
     if not missing:
         # All levels already present as columns — nothing to promote.

From 58e63eb345c99e96fc9e8e2793bd64cf323ad5c7 Mon Sep 17 00:00:00 2001
From: GuillaumePELLUET <guillaume@graybx.com>
Date: Wed, 24 Jun 2026 18:14:38 +0200
Subject: [PATCH 2/5] feat: add __discard_by_tag__ handler in EditDataSample
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New special stat_name that discards all samples carrying a given tag across
the whole dataset without requiring the frontend to enumerate sample IDs.
Uses the existing upsert_df path — same as per-sample discard — grouped by
origin for correctness.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 weightslab/trainer/services/data_service.py | 56 ++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/weightslab/trainer/services/data_service.py b/weightslab/trainer/services/data_service.py
index b12495c3..5b518ee6 100755
--- a/weightslab/trainer/services/data_service.py
+++ b/weightslab/trainer/services/data_service.py
@@ -3765,10 +3765,64 @@ def EditDataSample(self, request, context):
                         message=f"Failed to delete metadata column: {str(e)}",
                     )
 
+        if request.stat_name == "__discard_by_tag__":
+            tag_name = str(request.string_value or "").strip()
+            if not tag_name:
+                return pb2.DataEditsResponse(
+                    success=False,
+                    message="Missing tag name for discard-by-tag operation.",
+                )
+
+            with self._watched_lock("_lock[EditDataSample/__discard_by_tag__]"):
+                try:
+                    self._slowUpdateInternals()
+                    if self._all_datasets_df is None or self._all_datasets_df.empty:
+                        return pb2.DataEditsResponse(
+                            success=False,
+                            message="No dataframe available.",
+                        )
+
+                    tag_col = f"{SampleStatsEx.TAG.value}:{tag_name}"
+                    df = safe_reset_index(self._all_datasets_df)
+                    if tag_col not in df.columns:
+                        return pb2.DataEditsResponse(
+                            success=True,
+                            message=f"No samples found with tag '{tag_name}'.",
+                        )
+
+                    tagged = df[df[tag_col] == 1]
+                    if tagged.empty:
+                        return pb2.DataEditsResponse(
+                            success=True,
+                            message=f"No samples found with tag '{tag_name}'.",
+                        )
+
+                    for origin, origin_df in tagged.groupby(SampleStatsEx.ORIGIN.value, sort=False):
+                        sample_ids = origin_df.index.astype(str).tolist()
+                        rows = [
+                            {"sample_id": sid, SampleStatsEx.ORIGIN.value: origin, SampleStatsEx.DISCARDED.value: True}
+                            for sid in sample_ids
+                        ]
+                        df_update = pd.DataFrame(rows).set_index("sample_id")
+                        self._df_manager.upsert_df(df_update, origin=origin, force_flush=True)
+
+                    count = len(tagged)
+                    self._slowUpdateInternals(force=True)
+                    return pb2.DataEditsResponse(
+                        success=True,
+                        message=f"Discarded {count} samples with tag '{tag_name}'.",
+                    )
+                except Exception as e:
+                    logger.error(f"[EditDataSample] discard_by_tag failed: {e}", exc_info=True)
+                    return pb2.DataEditsResponse(
+                        success=False,
+                        message=f"Failed to discard by tag: {str(e)}",
+                    )
+
         if not request.stat_name or not request.stat_name.startswith(SampleStatsEx.TAG.value) and request.stat_name not in [SampleStatsEx.DISCARDED.value]:
             return pb2.DataEditsResponse(
                 success=False,
-                message="Only 'tags', 'discarded', '__copy_metadata__', '__delete_metadata__', '__save_data_state__', and '__force_h5_write_and_save__' edits are supported.",
+                message="Only 'tags', 'discarded', '__copy_metadata__', '__delete_metadata__', '__save_data_state__', '__force_h5_write_and_save__', and '__discard_by_tag__' edits are supported.",
             )
 
         # =====================================================================

From ff42584bfdd603310ad39fd90a44091c59147600 Mon Sep 17 00:00:00 2001
From: GuillaumePELLUET <guillaume@graybx.com>
Date: Thu, 25 Jun 2026 12:52:51 +0200
Subject: [PATCH 3/5] fix: metadata clone naming (@hash format) + delete no
 longer blocks gRPC thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- build_metadata_copy_column_names: first clone of original field uses
  {source}@{hash} (no _{n}); conflicts get _{n} suffix;
  cloning an already-cloned field reuses the same @hash, only increments _{n}
- is_copy_metadata_column_name: updated regex to match new .+@.+ format
- __delete_metadata__ handler: replace blocking _slowUpdateInternals(force=True)
  with non-blocking background kick — view is already up-to-date from manual
  drop, no need to stall the gRPC response for 5-10s

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 weightslab/trainer/services/data_service.py | 49 ++++++++++++++++-----
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/weightslab/trainer/services/data_service.py b/weightslab/trainer/services/data_service.py
index 5b518ee6..c2f97d2b 100755
--- a/weightslab/trainer/services/data_service.py
+++ b/weightslab/trainer/services/data_service.py
@@ -72,19 +72,39 @@ def normalize_metadata_copy_source_name(source_name: str, experiment_hash: str =
     return name
 
 
-def build_metadata_copy_column_names(existing_columns, experiment_hash: str, source_name: str):
-    """Build backend/ui copied metadata names with incrementing suffix _1, _2, ..."""
-    exp_hash = str(experiment_hash or "current_experiment_hash").strip() or "current_experiment_hash"
-    normalized_source = normalize_metadata_copy_source_name(source_name, exp_hash)
+def build_metadata_copy_column_names(existing_columns, experiment_hash: str, source_name: str) -> str:
+    """Build backend/ui copied metadata column names.
+
+    Naming rules:
+    - Cloning an original field (no '@'): first copy is ``{source}@{hash}``;
+      subsequent copies of the same source get ``{source}@{hash}_1``, ``_2``, ...
+    - Cloning an already-cloned field (contains '@'): strip any trailing ``_N``
+      suffix to find the base name, then append ``_1``, ``_2``, ... — no new hash.
+    """
     existing_iterable = [] if existing_columns is None else existing_columns
     existing = {str(col) for col in existing_iterable}
 
-    index = 1
-    while True:
-        copy_name = f"{normalized_source}_{index}@{exp_hash}"
-        if copy_name not in existing:
-            return copy_name
-        index += 1
+    if "@" in source_name:
+        # Cloning a clone: reuse the same @hash suffix, just increment _{n}
+        base = re.sub(r"_\d+$", "", source_name)
+        n = 1
+        while True:
+            candidate = f"{base}_{n}"
+            if candidate not in existing:
+                return candidate
+            n += 1
+    else:
+        # Cloning an original field: first copy has no index, then _1, _2, ...
+        exp_hash = str(experiment_hash or "current_experiment_hash").strip() or "current_experiment_hash"
+        base = f"{source_name}@{exp_hash}"
+        if base not in existing:
+            return base
+        n = 1
+        while True:
+            candidate = f"{base}_{n}"
+            if candidate not in existing:
+                return candidate
+            n += 1
 
 
 def duplicate_metadata_column_in_dataframe(df: pd.DataFrame, source_column: str, experiment_hash: str):
@@ -99,7 +119,9 @@ def duplicate_metadata_column_in_dataframe(df: pd.DataFrame, source_column: str,
 
 def is_copy_metadata_column_name(column_name: str) -> bool:
     name = str(column_name or "").strip()
-    return bool(re.match(r".+_\d+@.+$", name))
+    # Matches any column that was produced by a clone operation: contains '@'
+    # with at least one character on each side (e.g. "loss@abc123" or "loss@abc123_2").
+    return bool(re.match(r".+@.+", name))
 
 
 def detect_bbox_format(bboxes: np.ndarray) -> str:
@@ -3752,7 +3774,10 @@ def EditDataSample(self, request, context):
                     if target_column in self._all_datasets_df.columns:
                         self._all_datasets_df = self._all_datasets_df.drop(columns=[target_column])
 
-                    self._slowUpdateInternals(force=True)
+                    # Kick a background view-refresh (non-blocking) — the in-memory view
+                    # is already consistent after the drop above, so blocking inline rebuild
+                    # is unnecessary and causes the gRPC response to stall for 5-10 s.
+                    self._slowUpdateInternals()
 
                     return pb2.DataEditsResponse(
                         success=True,

From f198af2e11bb610de39a68631e6e8fe6dfb1ea3d Mon Sep 17 00:00:00 2001
From: GuillaumePELLUET <guillaume@graybx.com>
Date: Thu, 25 Jun 2026 14:06:44 +0200
Subject: [PATCH 4/5] fix: always serialize/deserialize prediction and target
 as lists in H5

Write path: serialize_value now handles numpy arrays of ALL ndim (not
just <= 1). A 2D array was falling through to str() which produced
numpy repr strings (space-separated, no commas) that aren't valid JSON.

Read path: deserialize_value for MODEL_INOUT_LIST columns now:
- maps "nan"/"none"/"" strings to np.nan instead of returning them as-is
- removes the single-element unwrap (a [0.5] list stays [0.5], not 0.5)
- adds a fallback regex pass to recover old data stored as numpy repr

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 weightslab/data/h5_dataframe_store.py | 37 +++++++++++++++++----------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/weightslab/data/h5_dataframe_store.py b/weightslab/data/h5_dataframe_store.py
index e2271995..def47790 100644
--- a/weightslab/data/h5_dataframe_store.py
+++ b/weightslab/data/h5_dataframe_store.py
@@ -1,4 +1,5 @@
 import os
+import re
 import json
 import time
 import logging
@@ -344,10 +345,8 @@ def serialize_value(val):
             if not isinstance(val, (list, set, np.ndarray)) and pd.isna(val):
                 return np.nan
 
-            if isinstance(val, np.ndarray) and val.ndim <= 1:
-                if val.ndim == 0:
-                    val = val.reshape(-1)
-                val = val.tolist()
+            if isinstance(val, np.ndarray):
+                val = val.item() if val.ndim == 0 else val.tolist()
 
             if isinstance(val, (list, dict)):
                 try:
@@ -407,18 +406,30 @@ def _normalize_for_read(self, df: pd.DataFrame, origin: str) -> pd.DataFrame:
         # Handle deserialization of nested objects (lists, dicts) stored as JSON strings
         cols_to_deserialize = [col for col in SampleStats.MODEL_INOUT_LIST if col in df.columns]
         if cols_to_deserialize:
+            _MISSING = {"nan", "none", "<na>", ""}
+
             def deserialize_value(val):
-                if not isinstance(val, str) or not (val.startswith('[') or val.startswith('{')):
+                if not isinstance(val, str):
                     return val
-                try:
-                    obj = json.loads(val)
-                except Exception:
+                stripped = val.strip()
+                if stripped.lower() in _MISSING:
+                    return np.nan
+                if not (stripped.startswith('[') or stripped.startswith('{')):
                     return val
-
-                # Unwrap single-element lists to scalars for consistency with active training data
-                if isinstance(obj, list) and len(obj) == 1:
-                    return obj[0]
-                return obj
+                try:
+                    return json.loads(stripped)
+                except json.JSONDecodeError:
+                    # Fallback: numpy repr uses spaces as delimiters without commas.
+                    # E.g. "[0.1 0.2]" or "[[0.1 0.2]\n [0.3 0.4]]"
+                    try:
+                        normalized = re.sub(
+                            r'(?<=[0-9.])\s+(?=[-0-9.\[])',
+                            ', ',
+                            stripped.replace('\n', ' '),
+                        )
+                        return json.loads(normalized)
+                    except Exception:
+                        return val
 
             for col in cols_to_deserialize:
                 df[col] = df[col].apply(deserialize_value)

From 70d11e1dfc5c73288fe73b28ed053ce4e713da4c Mon Sep 17 00:00:00 2001
From: GuillaumePELLUET <guillaume@graybx.com>
Date: Thu, 25 Jun 2026 16:39:41 +0200
Subject: [PATCH 5/5] Fix ultralytics BB rendering on UI side and weightslab
 examples

---
 weightslab/data/data_utils.py                 |  8 +-
 weightslab/data/dataframe_manager.py          |  2 +-
 .../examples/PyTorch/ws-detection/main.py     | 56 ++++++------
 .../PyTorch/ws-segmentation/config.yaml       |  3 +-
 .../examples/PyTorch/ws-segmentation/main.py  | 89 +++++++++++--------
 .../PyTorch/ws-segmentation/utils/data.py     | 27 ++++--
 .../Ultralytics/ws-detection/config.yaml      |  4 +-
 .../ws-2d-lidar-detection/config.yaml         |  1 +
 weightslab/src.py                             | 13 ---
 weightslab/trainer/services/data_service.py   |  6 +-
 10 files changed, 114 insertions(+), 95 deletions(-)

diff --git a/weightslab/data/data_utils.py b/weightslab/data/data_utils.py
index 9ce19864..2d65ba52 100644
--- a/weightslab/data/data_utils.py
+++ b/weightslab/data/data_utils.py
@@ -5,6 +5,8 @@
 
 from PIL import Image
 
+from weightslab.data.array_proxy import ArrayH5Proxy
+
 
 __all__ = [
     "_detect_dataset_split",
@@ -21,9 +23,6 @@
 logger = logging.getLogger(__name__)
 
 
-
-
-
 def _to_uint8_image(img_float: np.ndarray) -> np.ndarray:
     """
     Convert float image to uint8 for visualization.
@@ -222,6 +221,9 @@ def _downsample_nn(arr: np.ndarray, max_hw: int = 96) -> np.ndarray:
 
 
 def to_numpy_safe(x):
+    if isinstance(x, ArrayH5Proxy):
+        return np.asanyarray(x)
+
     if isinstance(x, (int, float)):
         return np.array([x])
 
diff --git a/weightslab/data/dataframe_manager.py b/weightslab/data/dataframe_manager.py
index 07644190..21e3fd91 100644
--- a/weightslab/data/dataframe_manager.py
+++ b/weightslab/data/dataframe_manager.py
@@ -683,7 +683,7 @@ def upsert_df(self, df_local: List | pd.DataFrame, origin: str = None, force_flu
                     # "Cannot setitem on a Categorical with a new category". The
                     # _optimize_dataframe_memory pass below re-applies categorical dtypes.
                     for col in all_cols:
-                        if col in self._df.columns and isinstance(self._df[col].dtype, pd.CategoricalDtype):
+                        if col in self._df.columns and col != SampleStatsEx.ORIGIN and isinstance(self._df[col].dtype, pd.CategoricalDtype):
                             self._df[col] = self._df[col].astype(object)
                     self._df.loc[existing_idx, all_cols] = df_norm.loc[existing_idx, all_cols]
 
diff --git a/weightslab/examples/PyTorch/ws-detection/main.py b/weightslab/examples/PyTorch/ws-detection/main.py
index 7da1d950..0f6d9320 100644
--- a/weightslab/examples/PyTorch/ws-detection/main.py
+++ b/weightslab/examples/PyTorch/ws-detection/main.py
@@ -49,7 +49,7 @@ def train(loader, model, optimizer, sig, device, grid_size, conf_thresh):
         targets = [t.to(device) for t in targets]
 
         optimizer.zero_grad()
-        outputs = model(inputs)  # [B, S, S, 5 + num_classes]
+        outputs = model(inputs) # [B, S, S, 5 + num_classes]
 
         # Decoded boxes for the UI overlay (detached — display only).
         preds = decode_predictions(outputs.detach(), grid_size, conf_thresh=conf_thresh)
@@ -90,7 +90,7 @@ def test(loader, model, sig, device, grid_size, conf_thresh, test_loader_len):
 
     loss = float((losses / test_loader_len).detach().cpu().item())
     iou = float((ious / test_loader_len).detach().cpu().item())
-    return loss, iou * 100.0  # Return mean IoU as percentage
+    return loss, iou * 100.0 # Return mean IoU as percentage
 
 
 # =============================================================================
@@ -111,7 +111,7 @@ def test(loader, model, sig, device, grid_size, conf_thresh, test_loader_len):
     parameters.setdefault("training_steps_to_do", 500)
     parameters.setdefault("eval_full_to_train_steps_ratio", 50)
     parameters.setdefault("number_of_workers", 4)
-    parameters.setdefault("num_classes", 1)       # Penn-Fudan: single class (person)
+    parameters.setdefault("num_classes", 1) # Penn-Fudan: single class (person)
     parameters.setdefault("image_size", 256)
     parameters.setdefault("grid_size", 8)
     parameters.setdefault("conf_thresh", 0.3)
@@ -119,40 +119,40 @@ def test(loader, model, sig, device, grid_size, conf_thresh, test_loader_len):
     parameters.setdefault("freeze_backbone", True)
     parameters.setdefault("compute_natural_sort", True)
 
+    # --- 2) Register hyperparameters ---
     exp_name = parameters["experiment_name"]
+    wl.watch_or_edit(
+        parameters,
+        flag="hyperparameters",
+        name=exp_name,
+        defaults=parameters,
+        poll_interval=1.0,
+    )
+
     num_classes = int(parameters["num_classes"])
     image_size = int(parameters["image_size"])
     grid_size = int(parameters["grid_size"])
     conf_thresh = float(parameters["conf_thresh"])
 
-    # --- 2) Device selection ---
+    # --- 3) Device selection ---
     if parameters.get("device", "auto") == "auto":
         parameters["device"] = torch.device(
             "cuda" if torch.cuda.is_available() else "cpu"
         )
     device = parameters["device"]
 
-    # --- 3) Logging directory ---
+    # --- 4) Logging directory ---
     if not parameters.get("root_log_dir"):
         tmp_dir = tempfile.mkdtemp()
         parameters["root_log_dir"] = tmp_dir
         print(f"No root_log_dir specified, using temporary directory: {parameters['root_log_dir']}")
     os.makedirs(parameters["root_log_dir"], exist_ok=True)
-
     log_dir = parameters["root_log_dir"]
     max_steps = parameters["training_steps_to_do"]
     eval_full_to_train_steps_ratio = parameters["eval_full_to_train_steps_ratio"]
     verbose = parameters.get("verbose", True)
     tqdm_display = parameters.get("tqdm_display", True)
 
-    # --- 4) Register hyperparameters ---
-    wl.watch_or_edit(
-        parameters,
-        flag="hyperparameters",
-        name=exp_name,
-        defaults=parameters,
-        poll_interval=1.0,
-    )
 
     # --- 5) Data (Penn-Fudan pedestrians, downloaded on first run) ---
     default_data_root = os.path.abspath(
@@ -255,7 +255,7 @@ def compute_class_weights(dataset, num_classes, max_samples=200):
         class_counts = np.zeros(num_classes, dtype=np.float64)
         num_samples = min(len(dataset), max_samples)
 
-        for idx in tqdm.tqdm(range(num_samples), desc="📊 Analyzing Distribution"):
+        for idx in tqdm.tqdm(range(num_samples), desc=" Analyzing Distribution"):
             _, _, target, _ = dataset.get_items(idx, include_labels=True)
             if target is None or len(target) == 0:
                 continue
@@ -263,10 +263,10 @@ def compute_class_weights(dataset, num_classes, max_samples=200):
                 if 0 <= c < num_classes:
                     class_counts[c] += 1
 
-        class_counts = np.maximum(class_counts, 1)  # Avoid div by zero
+        class_counts = np.maximum(class_counts, 1) # Avoid div by zero
         total = class_counts.sum()
         class_weights = total / (num_classes * class_counts)
-        class_weights = class_weights / class_weights.mean()  # Normalize
+        class_weights = class_weights / class_weights.mean() # Normalize
 
         print("\nClass distribution and weights:", flush=True)
         for c in range(num_classes):
@@ -287,16 +287,16 @@ def compute_class_weights(dataset, num_classes, max_samples=200):
     )
 
     print("=" * 60)
-    print("🚀 STARTING PENN-FUDAN PEDESTRIAN DETECTION TRAINING")
-    print(f"📈 Total steps: {max_steps}")
-    print(f"🔄 Evaluation every {eval_full_to_train_steps_ratio} steps")
-    print(f"💾 Logs will be saved to: {log_dir}")
-    print(f"📂 Data root: {data_root}")
+    print(" STARTING PENN-FUDAN PEDESTRIAN DETECTION TRAINING")
+    print(f" Total steps: {max_steps}")
+    print(f" Evaluation every {eval_full_to_train_steps_ratio} steps")
+    print(f" Logs will be saved to: {log_dir}")
+    print(f" Data root: {data_root}")
     print("=" * 60 + "\n")
 
-    # ================
-    # Training Loop
-    wl.start_training(timeout=3)  # Blocks and keeps the main thread alive while background services run. Optionally set a timeout (seconds) to auto-stop.
+    # # ================
+    # # Training Loop
+    # wl.start_training(timeout=3) # Blocks and keeps the main thread alive while background services run. Optionally set a timeout (seconds) to auto-stop.
 
     # ================
     train_range = tqdm.tqdm(itertools.count(), desc="Training") if tqdm_display else itertools.count()
@@ -310,7 +310,7 @@ def compute_class_weights(dataset, num_classes, max_samples=200):
 
         # Test
         if age == 0 or age % eval_full_to_train_steps_ratio == 0:
-            test_loader_len = len(test_loader)  # Store length before wrapping with tqdm
+            test_loader_len = len(test_loader) # Store length before wrapping with tqdm
             test_loader_it = tqdm.tqdm(test_loader, desc="Evaluating") if tqdm_display else test_loader
             test_loss, test_metric = test(test_loader_it, model, test_sig, device, grid_size, conf_thresh, test_loader_len)
 
@@ -332,8 +332,8 @@ def compute_class_weights(dataset, num_classes, max_samples=200):
             )
 
     print("\n" + "=" * 60)
-    print(f"✅ Training completed in {time.time() - start_time:.2f} seconds")
-    print(f"💾 Logs saved to: {log_dir}")
+    print(f" Training completed in {time.time() - start_time:.2f} seconds")
+    print(f" Logs saved to: {log_dir}")
     print("=" * 60)
 
     # Keep the main thread alive to allow background serving threads to run
diff --git a/weightslab/examples/PyTorch/ws-segmentation/config.yaml b/weightslab/examples/PyTorch/ws-segmentation/config.yaml
index 5054332b..3fd60d3b 100644
--- a/weightslab/examples/PyTorch/ws-segmentation/config.yaml
+++ b/weightslab/examples/PyTorch/ws-segmentation/config.yaml
@@ -28,7 +28,8 @@ ledger_flush_interval: 60.0
 
 # Data
 num_classes: 6
-image_size: 180
+class_names: ["Background", "Ego Road", "Driveable Area", "Lane Line 1", "Lane Line 2", "Lane Line 3"]
+image_size: 128
 data_root: .\BDD_subset  # Bdd format
 data:
   train_loader:
diff --git a/weightslab/examples/PyTorch/ws-segmentation/main.py b/weightslab/examples/PyTorch/ws-segmentation/main.py
index f3a94d47..0df3795a 100644
--- a/weightslab/examples/PyTorch/ws-segmentation/main.py
+++ b/weightslab/examples/PyTorch/ws-segmentation/main.py
@@ -45,13 +45,13 @@ def _instance_batch_idx(labels):
 def _run_instance_signals(sig, outputs, labels, ids, preds, return_metric=False):
     """Compute + log/save the per-sample AND per-instance Dice (metric) and BCE (loss)."""
     bce_sample = sig["bce_sample"](outputs, labels, batch_ids=ids, preds=preds)
-    dice_sample = sig["dice_sample"](outputs, labels, batch_ids=ids)  # Register processed predictions one time only
+    dice_sample = sig["dice_sample"](outputs, labels, batch_ids=ids) # Register processed predictions one time only
 
-    sig["dice_instance"](outputs, labels, batch_ids=ids)  # Register processed predictions one time only
+    sig["dice_instance"](outputs, labels, batch_ids=ids) # Register processed predictions one time only
     sig["bce_instance"](outputs, labels, batch_ids=ids)
 
     avg_loss = 0.5 * dice_sample + 0.5 * bce_sample
-    wl.save_signals({"combined_bce_dice_per_sample": avg_loss}, ids)  # Save the per-sample aggregate loss for backward step
+    wl.save_signals({"combined_bce_dice_per_sample": avg_loss}, ids) # Save the per-sample aggregate loss for backward step
     if return_metric:
         return avg_loss, dice_sample
     return avg_loss
@@ -91,11 +91,11 @@ def train(loader, model, optimizer, sig, device):
     with guard_training_context:
         (inputs, ids, labels, _) = next(loader)
         inputs = inputs.to(device)
-        labels = [[m.to(device) for m in insts] for insts in labels]  # per-sample list of instances
+        labels = [[m.to(device) for m in insts] for insts in labels] # per-sample list of instances
 
         optimizer.zero_grad()
-        outputs = model(inputs)  # [B,C,H,W]
-        preds = outputs.argmax(dim=1)  # [B,H,W]
+        outputs = model(inputs) # [B,C,H,W]
+        preds = outputs.argmax(dim=1) # [B,H,W]
 
         # Per-instance + per-sample Dice/BCE (tracked & saved at annotation level).
         loss_per_sample = _run_instance_signals(sig, outputs, labels, ids, preds=preds)
@@ -110,7 +110,7 @@ def train(loader, model, optimizer, sig, device):
         wl.save_signals(
             _user_custom_signals(preds, labels),
             ids
-        )  # Save the per-sample predictions for visualization
+        ) # Save the per-sample predictions for visualization
 
     return float(loss.detach().cpu().item())
 
@@ -122,23 +122,23 @@ def test(loader, model, sig, device, test_loader_len):
     with guard_testing_context, torch.no_grad():
         for inputs, ids, labels, _ in loader:
             inputs = inputs.to(device)
-            labels = [[m.to(device) for m in insts] for insts in labels]  # per-sample list of instances
+            labels = [[m.to(device) for m in insts] for insts in labels] # per-sample list of instances
 
             outputs = model(inputs)
-            preds = outputs.argmax(dim=1)  # [B,H,W]
+            preds = outputs.argmax(dim=1) # [B,H,W]
 
             # Per-instance + per-sample Dice/BCE (tracked & saved at annotation level).
             loss_per_sample, dice_sample = _run_instance_signals(sig, outputs, labels, ids, preds=preds, return_metric=True)
 
-            losses += torch.mean(loss_per_sample)  # Average over the batch and accumulate
-            dices += torch.mean(dice_sample)  # Average over the batch and accumulate
+            losses += torch.mean(loss_per_sample) # Average over the batch and accumulate
+            dices += torch.mean(dice_sample) # Average over the batch and accumulate
 
             # I want to see in the UI the per-sample classes predicted by the model
-            wl.save_signals(_user_custom_signals(preds, labels), ids)  # Save the per-sample predictions for visualization
+            wl.save_signals(_user_custom_signals(preds, labels), ids) # Save the per-sample predictions for visualization
 
     loss = float((losses / test_loader_len).detach().cpu().item())
     dice = float((dices / test_loader_len).detach().cpu().item())
-    return loss, dice * 100.0  # Return average Dice as percentage
+    return loss, dice * 100.0 # Return average Dice as percentage
 
 
 # =============================================================================
@@ -159,13 +159,23 @@ def test(loader, model, sig, device, test_loader_len):
     parameters.setdefault("training_steps_to_do", 500)
     parameters.setdefault("eval_full_to_train_steps_ratio", 50)
     parameters.setdefault("number_of_workers", 4)
-    parameters.setdefault("num_classes", 6)      # adjust to your label set
-    parameters.setdefault("ignore_index", 255)   # if you have void pixels
+    parameters.setdefault("num_classes", 6) # adjust to your label set
+    parameters.setdefault("class_names", None) # adjust to your label set
+    parameters.setdefault("ignore_index", 255) # if you have void pixels
     parameters.setdefault("image_size", 256)
     parameters.setdefault("compute_natural_sort", True)
 
+    # --- 4) Register hyperparameters ---
     exp_name = parameters["experiment_name"]
+    wl.watch_or_edit(
+        parameters,
+        flag="hyperparameters",
+        name=exp_name,
+        defaults=parameters,
+        poll_interval=1.0,
+    )
     num_classes = int(parameters["num_classes"])
+    class_names = parameters["class_names"]
     ignore_index = int(parameters["ignore_index"])
     image_size = int(parameters["image_size"])
 
@@ -186,18 +196,10 @@ def test(loader, model, sig, device, test_loader_len):
     log_dir = parameters["root_log_dir"]
     max_steps = parameters["training_steps_to_do"]
     eval_full_to_train_steps_ratio = parameters["eval_full_to_train_steps_ratio"]
+    write_export_ratio = parameters.get("write_export_ratio", 100)
     verbose = parameters.get("verbose", True)
     tqdm_display = parameters.get("tqdm_display", True)
 
-    # --- 4) Register hyperparameters ---
-    wl.watch_or_edit(
-        parameters,
-        flag="hyperparameters",
-        name=exp_name,
-        defaults=parameters,
-        poll_interval=1.0,
-    )
-
     # --- 5) Data (BDD100k reduced) ---
     default_data_root = os.path.abspath(
         os.path.join(os.path.dirname(__file__), "..", "..", "data", "BDD100k_reduced")
@@ -211,17 +213,19 @@ def test(loader, model, sig, device, test_loader_len):
         root=data_root,
         split="train",
         num_classes=num_classes,
+        class_names=class_names,
         ignore_index=ignore_index,
         image_size=image_size,
-        max_samples=train_cfg.get("max_samples", None)  # Optionally limit number of samples for faster testing
+        max_samples=train_cfg.get("max_samples", None) # Optionally limit number of samples for faster testing
     )
     _val_dataset = BDD100kSegDataset(
         root=data_root,
         split="val",
         num_classes=num_classes,
+        class_names=class_names,
         ignore_index=ignore_index,
         image_size=image_size,
-        max_samples=test_cfg.get("max_samples", None)  # Optionally limit number of samples for faster testing
+        max_samples=test_cfg.get("max_samples", None) # Optionally limit number of samples for faster testing
     )
 
     train_loader = wl.watch_or_edit(
@@ -300,8 +304,8 @@ def compute_class_weights(dataset, num_classes, ignore_index=255, max_samples=10
         class_counts = np.zeros(num_classes, dtype=np.float64)
         num_samples = min(len(dataset), max_samples)
 
-        for idx in tqdm.tqdm(range(num_samples), desc="📊 Analyzing Distribution"):
-            _, _, label, _ = dataset.get_items(idx, include_labels=True)  # Get the label/mask for this sample
+        for idx in tqdm.tqdm(range(num_samples), desc=" Analyzing Distribution"):
+            _, _, label, _ = dataset.get_items(idx, include_labels=True) # Get the label/mask for this sample
             label_np = label.numpy() if hasattr(label, 'numpy') else np.array(label)
             for c in range(num_classes):
                 class_counts[c] += (label_np == c).sum()
@@ -330,33 +334,38 @@ def compute_class_weights(dataset, num_classes, ignore_index=255, max_samples=10
     )
 
     print("=" * 60)
-    print("🚀 STARTING BDD100k SEGMENTATION TRAINING")
-    print(f"📈 Total steps: {max_steps}")
-    print(f"🔄 Evaluation every {eval_full_to_train_steps_ratio} steps")
-    print(f"💾 Logs will be saved to: {log_dir}")
-    print(f"📂 Data root: {data_root}")
+    print(" STARTING BDD100k SEGMENTATION TRAINING")
+    print(f" Total steps: {max_steps}")
+    print(f" Evaluation every {eval_full_to_train_steps_ratio} steps")
+    print(f" Logs will be saved to: {log_dir}")
+    print(f" Data root: {data_root}")
     print("=" * 60 + "\n")
 
     # ================
     # Training Loop
-    wl.start_training(timeout=3)  # This will block and keep the main thread alive while background services run. You can optionally set a timeout (in seconds) to automatically stop after a certain duration.
+    # wl.start_training(timeout=3) # This will block and keep the main thread alive while background services run. You can optionally set a timeout (in seconds) to automatically stop after a certain duration.
 
     # ================
     train_range = tqdm.tqdm(itertools.count(), desc="Training") if tqdm_display else itertools.count()
     test_loss, test_metric = None, None
     start_time = time.time()
     for train_step in train_range:
-        age = model.get_age() if hasattr(model, "get_age") else train_step  # Get model age in steps (not necessarily equal to train_step if model was reloaded or has seen more data than training steps)
+        age = model.get_age() if hasattr(model, "get_age") else train_step # Get model age in steps (not necessarily equal to train_step if model was reloaded or has seen more data than training steps)
 
         # Train
         train_loss = train(train_loader, model, optimizer, train_sig, device)
 
         # Test
         if age == 0 or age % eval_full_to_train_steps_ratio == 0:
-            test_loader_len = len(test_loader)  # Store length before wrapping with tqdm
+            test_loader_len = len(test_loader) # Store length before wrapping with tqdm
             test_loader_it = tqdm.tqdm(test_loader, desc="Evaluating") if tqdm_display else test_loader
             test_loss, test_metric = test(test_loader_it, model, test_sig, device, test_loader_len)
 
+        # Periodic history + dataframe export (JSON/CSV snapshots to root_log_dir)
+        if age > 0 and age % write_export_ratio == 0:
+            wl.write_history()
+            wl.write_dataframe()
+
         # Verbose
         if verbose and not tqdm_display:
             print(
@@ -375,9 +384,13 @@ def compute_class_weights(dataset, num_classes, ignore_index=255, max_samples=10
             )
 
     print("\n" + "=" * 60)
-    print(f"✅ Training completed in {time.time() - start_time:.2f} seconds")
-    print(f"💾 Logs saved to: {log_dir}")
+    print(f" Training completed in {time.time() - start_time:.2f} seconds")
+    print(f" Logs saved to: {log_dir}")
     print("=" * 60)
 
+    # Final export of signal history and data grid to root_log_dir
+    wl.write_history()
+    wl.write_dataframe()
+
     # Keep the main thread alive to allow background serving threads to run
     wl.keep_serving()
diff --git a/weightslab/examples/PyTorch/ws-segmentation/utils/data.py b/weightslab/examples/PyTorch/ws-segmentation/utils/data.py
index 864c6853..fcc14cc5 100644
--- a/weightslab/examples/PyTorch/ws-segmentation/utils/data.py
+++ b/weightslab/examples/PyTorch/ws-segmentation/utils/data.py
@@ -32,6 +32,7 @@ def __init__(
         root,
         split="train",
         num_classes=6,
+        class_names=None,
         ignore_index=255,
         image_size=256,
         max_samples=None
@@ -40,6 +41,7 @@ def __init__(
         self.root = root
         self.split = split
         self.num_classes = num_classes
+        self.class_names = class_names
         self.ignore_index = ignore_index
         self.task_type = "segmentation"
 
@@ -51,7 +53,7 @@ def __init__(
             for f in os.listdir(img_dir)
             if f.lower().endswith((".jpg", ".jpeg", ".png"))
         ]
-        image_files = sorted(set(image_files))[:max_samples] if max_samples is not None else sorted(set(image_files))  # Optionally limit number of samples for faster testing
+        image_files = sorted(set(image_files))[:max_samples] if max_samples != None else sorted(set(image_files)) # Optionally limit number of samples for faster testing
 
         self.images = []
         self.masks = []
@@ -113,6 +115,16 @@ def get_items(self, idx, include_metadata=False, include_labels=False, include_i
             img = Image.open(img_path).convert("RGB")
             img_t = self.image_transform(img)
 
+        # Process labels/masks
+        # # # Sample wise segmentation
+        # mask_t = None
+        # if include_labels:
+        #     mask = Image.open(mask_path)
+        #     mask_r = self.mask_resize(mask)
+        #     mask_np = np.array(mask_r, dtype=np.int64)
+        #     mask_t = torch.from_numpy(mask_np) # [H, W] int64
+        # return img_t, uid, mask_t, metadata
+        # # Instance wise segmentaiton
         # Process labels/masks
         mask_t_instances = list()
         mask_t = None
@@ -120,17 +132,16 @@ def get_items(self, idx, include_metadata=False, include_labels=False, include_i
             mask = Image.open(mask_path)
             mask_r = self.mask_resize(mask)
             mask_np = np.array(mask_r, dtype=np.int64)
-            mask_t = torch.from_numpy(mask_np)  # [H, W] int64
+            mask_t = torch.from_numpy(mask_np) # [H, W] int64
 
             # Format labels to register multiple instance_ids
             lbl_max = mask_t.max().item()
             for i in range(1, lbl_max + 1):
                 m = torch.zeros_like(mask_t)
-                m[mask_t == i] = i  # Assign class ID as instance ID for simplicity; if set to 1, all instances of the same class would be merged...
+                m[mask_t == i] = i # Assign class ID as instance ID for simplicity; if set to 1, all instances of the same class would be merged...
                 mask_t_instances.append(m)
         return img_t, uid, mask_t_instances, metadata
 
-
 def seg_collate(batch):
     """Collate WL per-sample tuples for instance-segmentation.
 
@@ -141,10 +152,10 @@ def seg_collate(batch):
     background) are filtered out so every kept instance is a real annotation.
 
     Returns:
-        images:  FloatTensor [B, C, H, W]
-        ids:     list[str] of length B
-        labels:  list[B] where labels[s] is a list of instance mask tensors
-        metas:   list[B] of metadata dicts
+        images: FloatTensor [B, C, H, W]
+        ids: list[str] of length B
+        labels: list[B] where labels[s] is a list of instance mask tensors
+        metas: list[B] of metadata dicts
     """
     images = torch.stack([b[0] for b in batch], dim=0)
     ids = [b[1] for b in batch]
diff --git a/weightslab/examples/Ultralytics/ws-detection/config.yaml b/weightslab/examples/Ultralytics/ws-detection/config.yaml
index 2ba8184e..512f60a4 100644
--- a/weightslab/examples/Ultralytics/ws-detection/config.yaml
+++ b/weightslab/examples/Ultralytics/ws-detection/config.yaml
@@ -46,14 +46,14 @@ data:
     preload_labels: true
     preload_metadata: true
     drop_last: false
-    num_workers: 4
+    num_workers: 0  # Force to 0 for windows for perfs. opt
   val_loader:
     batch_size: 2
     shuffle: false
     preload_labels: true
     preload_metadata: true
     drop_last: false
-    num_workers: 4
+    num_workers: 0  # Force to 0 for windows for perfs. opt
 
 signals_cfg:
   train_nms:
diff --git a/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml b/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml
index 80b795c2..edde4c24 100644
--- a/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml
+++ b/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml
@@ -2,6 +2,7 @@
 device: auto
 experiment_name: lidar2d_detection_usecase
 training_steps_to_do: null  # null = infinite training until manually stopped
+# root_log_dir:  # Empty to write in tmp directory, or specify a path to store logs and checkpoints
 
 checkpoint_manager:
   load_config: false
diff --git a/weightslab/src.py b/weightslab/src.py
index a41d9939..e55be200 100644
--- a/weightslab/src.py
+++ b/weightslab/src.py
@@ -3012,22 +3012,9 @@ def _check_cancel_or_timeout(self) -> None:
         if self._controller.is_cancel_requested():
             raise _EvalCanceled(f"Evaluation on '{self._split_name}' canceled by user")
 
-        elapsed = time.monotonic() - self._start_time
-        if self._absolute_timeout > 0 and elapsed > self._absolute_timeout:
-            raise _EvalTimeout(
-                f"Evaluation timeout on '{self._split_name}' after {elapsed:.1f}s (configured {self._absolute_timeout:.1f}s)"
-            )
-
         if self._total_batches <= 0 or self._processed_batches <= 0 or self._avg_batch_seconds <= 0:
             return
 
-        projected = self._avg_batch_seconds * self._total_batches
-        timeout_seconds = max(self._min_seconds, projected * self._multiplier)
-        if elapsed > timeout_seconds:
-            raise _EvalTimeout(
-                f"Evaluation timeout on '{self._split_name}' after {elapsed:.1f}s "
-                f"(projected={projected:.1f}s, limit={timeout_seconds:.1f}s, multiplier={self._multiplier:.2f})"
-            )
     def __len__(self):
         return len(self._loader)
 
diff --git a/weightslab/trainer/services/data_service.py b/weightslab/trainer/services/data_service.py
index c2f97d2b..4860517f 100755
--- a/weightslab/trainer/services/data_service.py
+++ b/weightslab/trainer/services/data_service.py
@@ -1362,6 +1362,8 @@ def _process_sample_row(self, args):
                             except Exception:
                                 label_arr = np.array([])
 
+                        if label_arr.ndim == 1 and label_arr.size >= 4:
+                            label_arr = label_arr.reshape(1, -1)
                         if label_arr.size > 0 and label_arr.ndim == 2 and label_arr.shape[-1] >= 4:
                             # 6-col rows when class+score present: [x,y,x,y,class,score]
                             # (or [tl_x,tl_y,w,h,class,score] for the xywh-top-left flavor).
@@ -2771,7 +2773,7 @@ def _build_metadata_only_response(self, df_slice: pd.DataFrame, request):
         # detection) pred/target are large JSON arrays (~310 KB/record) that bloat the
         # response to 100s of MB and silently break the histogram fetch.
         if not requested_cols:
-            _HEAVY_BLOB_COLS = {"pred", "prediction", "prediction_raw", "target"}
+            _HEAVY_BLOB_COLS = {"prediction_raw"}
             requested_cols = [c for c in df_slice.columns if c not in _HEAVY_BLOB_COLS]
 
         metadata_cols = [
@@ -3808,6 +3810,8 @@ def EditDataSample(self, request, context):
                         )
 
                     tag_col = f"{SampleStatsEx.TAG.value}:{tag_name}"
+                    if tag_col not in self._all_datasets_df.columns:
+                        self._slowUpdateInternals()
                     df = safe_reset_index(self._all_datasets_df)
                     if tag_col not in df.columns:
                         return pb2.DataEditsResponse(