GrayboxTech · guillaume-byte · Jun 25, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 25, 2026
diff --git a/weightslab/data/data_utils.py b/weightslab/data/data_utils.py
@@ -5,6 +5,8 @@
 
 from PIL import Image
 
+from weightslab.data.array_proxy import ArrayH5Proxy
+
 
 __all__ = [
     "_detect_dataset_split",
@@ -21,9 +23,6 @@
 logger = logging.getLogger(__name__)
 
 
-
-
-
 def _to_uint8_image(img_float: np.ndarray) -> np.ndarray:
     """
     Convert float image to uint8 for visualization.
@@ -222,6 +221,9 @@ def _downsample_nn(arr: np.ndarray, max_hw: int = 96) -> np.ndarray:
 
 
 def to_numpy_safe(x):
+    if isinstance(x, ArrayH5Proxy):
+        return np.asanyarray(x)
+
     if isinstance(x, (int, float)):
         return np.array([x])
 

diff --git a/weightslab/data/dataframe_manager.py b/weightslab/data/dataframe_manager.py
@@ -694,7 +694,7 @@ def upsert_df(self, df_local: List | pd.DataFrame, origin: str = None, force_flu
                     # "Cannot setitem on a Categorical with a new category". The
                     # _optimize_dataframe_memory pass below re-applies categorical dtypes.
                     for col in all_cols:
-                        if col in self._df.columns and isinstance(self._df[col].dtype, pd.CategoricalDtype):
+                        if col in self._df.columns and col != SampleStatsEx.ORIGIN and isinstance(self._df[col].dtype, pd.CategoricalDtype):
                             self._df[col] = self._df[col].astype(object)
                     self._df.loc[existing_idx, all_cols] = df_norm.loc[existing_idx, all_cols]
 

diff --git a/weightslab/data/h5_dataframe_store.py b/weightslab/data/h5_dataframe_store.py
@@ -1,4 +1,5 @@
 import os
+import re
 import json
 import time
 import logging
@@ -365,10 +366,8 @@ def serialize_value(val):
             if not isinstance(val, (list, set, np.ndarray)) and pd.isna(val):
                 return np.nan
 
-            if isinstance(val, np.ndarray) and val.ndim <= 1:
-                if val.ndim == 0:
-                    val = val.reshape(-1)
-                val = val.tolist()
+            if isinstance(val, np.ndarray):
+                val = val.item() if val.ndim == 0 else val.tolist()
 
             if isinstance(val, (list, dict)):
                 try:
@@ -428,18 +427,30 @@ def _normalize_for_read(self, df: pd.DataFrame, origin: str) -> pd.DataFrame:
         # Handle deserialization of nested objects (lists, dicts) stored as JSON strings
         cols_to_deserialize = [col for col in SampleStats.MODEL_INOUT_LIST if col in df.columns]
         if cols_to_deserialize:
+            _MISSING = {"nan", "none", "<na>", ""}
+
             def deserialize_value(val):
-                if not isinstance(val, str) or not (val.startswith('[') or val.startswith('{')):
+                if not isinstance(val, str):
                     return val
-                try:
-                    obj = json.loads(val)
-                except Exception:
+                stripped = val.strip()
+                if stripped.lower() in _MISSING:
+                    return np.nan
+                if not (stripped.startswith('[') or stripped.startswith('{')):
                     return val
-
-                # Unwrap single-element lists to scalars for consistency with active training data
-                if isinstance(obj, list) and len(obj) == 1:
-                    return obj[0]
-                return obj
+                try:
+                    return json.loads(stripped)
+                except json.JSONDecodeError:
+                    # Fallback: numpy repr uses spaces as delimiters without commas.
+                    # E.g. "[0.1 0.2]" or "[[0.1 0.2]\n [0.3 0.4]]"
+                    try:
+                        normalized = re.sub(
+                            r'(?<=[0-9.])\s+(?=[-0-9.\[])',
+                            ', ',
+                            stripped.replace('\n', ' '),
+                        )
+                        return json.loads(normalized)
+                    except Exception:
+                        return val
 
             for col in cols_to_deserialize:
                 df[col] = df[col].apply(deserialize_value)

diff --git a/weightslab/examples/PyTorch/ws-detection/main.py b/weightslab/examples/PyTorch/ws-detection/main.py
@@ -120,6 +120,7 @@ def test(loader, model, sig, device, grid_size, conf_thresh, test_loader_len):
     parameters.setdefault("compute_natural_sort", True)
 
     # --- 2) Register hyperparameters ---
+    exp_name = parameters["experiment_name"]
     wl.watch_or_edit(
         parameters,
         flag="hyperparameters",
@@ -128,7 +129,6 @@ def test(loader, model, sig, device, grid_size, conf_thresh, test_loader_len):
         poll_interval=1.0,
     )
 
-    exp_name = parameters["experiment_name"]
     num_classes = int(parameters["num_classes"])
     image_size = int(parameters["image_size"])
     grid_size = int(parameters["grid_size"])

diff --git a/weightslab/examples/PyTorch/ws-segmentation/config.yaml b/weightslab/examples/PyTorch/ws-segmentation/config.yaml
@@ -28,8 +28,9 @@ ledger_flush_interval: 60.0
 
 # Data
 num_classes: 6
-image_size: 180
-data_root: C:\Users\GuillaumePELLUET\Documents\Codes\weightslab\weightslab\examples\PyTorch\ws-segmentation\BDD_subset  # Bdd format
+class_names: ["Background", "Ego Road", "Driveable Area", "Lane Line 1", "Lane Line 2", "Lane Line 3"]
+image_size: 128
+data_root: .\BDD_subset  # Bdd format
 data:
   train_loader:
     batch_size: 2

diff --git a/weightslab/examples/PyTorch/ws-segmentation/main.py b/weightslab/examples/PyTorch/ws-segmentation/main.py
@@ -160,6 +160,7 @@ def test(loader, model, sig, device, test_loader_len):
     parameters.setdefault("eval_full_to_train_steps_ratio", 50)
     parameters.setdefault("number_of_workers", 4)
     parameters.setdefault("num_classes", 6) # adjust to your label set
+    parameters.setdefault("class_names", None) # adjust to your label set
     parameters.setdefault("ignore_index", 255) # if you have void pixels
     parameters.setdefault("image_size", 256)
     parameters.setdefault("compute_natural_sort", True)
@@ -174,6 +175,7 @@ def test(loader, model, sig, device, test_loader_len):
         poll_interval=1.0,
     )
     num_classes = int(parameters["num_classes"])
+    class_names = parameters["class_names"]
     ignore_index = int(parameters["ignore_index"])
     image_size = int(parameters["image_size"])
 
@@ -211,6 +213,7 @@ def test(loader, model, sig, device, test_loader_len):
         root=data_root,
         split="train",
         num_classes=num_classes,
+        class_names=class_names,
         ignore_index=ignore_index,
         image_size=image_size,
         max_samples=train_cfg.get("max_samples", None) # Optionally limit number of samples for faster testing
@@ -219,6 +222,7 @@ def test(loader, model, sig, device, test_loader_len):
         root=data_root,
         split="val",
         num_classes=num_classes,
+        class_names=class_names,
         ignore_index=ignore_index,
         image_size=image_size,
         max_samples=test_cfg.get("max_samples", None) # Optionally limit number of samples for faster testing
@@ -337,9 +341,9 @@ def compute_class_weights(dataset, num_classes, ignore_index=255, max_samples=10
     print(f" Data root: {data_root}")
     print("=" * 60 + "\n")
 
-    # # ================
-    # # Training Loop
-    # wl.start_training(timeout=3) # This will block and keep the main thread alive while background services run. You can optionally set a timeout (in seconds) to automatically stop after a certain duration.
+    # ================
+    # Training Loop
+    wl.start_training(timeout=3) # This will block and keep the main thread alive while background services run. You can optionally set a timeout (in seconds) to automatically stop after a certain duration.
 
     # ================
     train_range = tqdm.tqdm(itertools.count(), desc="Training") if tqdm_display else itertools.count()

diff --git a/weightslab/examples/PyTorch/ws-segmentation/utils/data.py b/weightslab/examples/PyTorch/ws-segmentation/utils/data.py
@@ -32,6 +32,7 @@ def __init__(
         root,
         split="train",
         num_classes=6,
+        class_names=None,
         ignore_index=255,
         image_size=256,
         max_samples=None
@@ -40,6 +41,7 @@ def __init__(
         self.root = root
         self.split = split
         self.num_classes = num_classes
+        self.class_names = class_names
         self.ignore_index = ignore_index
         self.task_type = "segmentation"
 
@@ -114,31 +116,31 @@ def get_items(self, idx, include_metadata=False, include_labels=False, include_i
             img_t = self.image_transform(img)
 
         # Process labels/masks
-        # # Sample wise segmentation
+        # # # Sample wise segmentation
+        # mask_t = None
+        # if include_labels:
+        #     mask = Image.open(mask_path)
+        #     mask_r = self.mask_resize(mask)
+        #     mask_np = np.array(mask_r, dtype=np.int64)
+        #     mask_t = torch.from_numpy(mask_np) # [H, W] int64
+        # return img_t, uid, mask_t, metadata
+        # # Instance wise segmentaiton
+        # Process labels/masks
+        mask_t_instances = list()
         mask_t = None
         if include_labels:
             mask = Image.open(mask_path)
             mask_r = self.mask_resize(mask)
             mask_np = np.array(mask_r, dtype=np.int64)
-            mask_t = torch.from_numpy(mask_np)[None] # [H, W] int64
-        return img_t, uid, mask_t, metadata
-        # # # Instance wise segmentaiton
-        # # Process labels/masks
-        # mask_t_instances = list()
-        # mask_t = None
-        # if include_labels:
-        # mask = Image.open(mask_path)
-        # mask_r = self.mask_resize(mask)
-        # mask_np = np.array(mask_r, dtype=np.int64)
-        # mask_t = torch.from_numpy(mask_np)[None] # [H, W] int64
-
-        # # Format labels to register multiple instance_ids
-        # lbl_max = mask_t.max().item()
-        # for i in range(1, lbl_max + 1):
-        # m = torch.zeros_like(mask_t)
-        # m[mask_t == i] = i # Assign class ID as instance ID for simplicity; if set to 1, all instances of the same class would be merged...
-        # mask_t_instances.append(m)
-        # return img_t, uid, mask_t_instances, metadata
+            mask_t = torch.from_numpy(mask_np) # [H, W] int64
+
+            # Format labels to register multiple instance_ids
+            lbl_max = mask_t.max().item()
+            for i in range(1, lbl_max + 1):
+                m = torch.zeros_like(mask_t)
+                m[mask_t == i] = i # Assign class ID as instance ID for simplicity; if set to 1, all instances of the same class would be merged...
+                mask_t_instances.append(m)
+        return img_t, uid, mask_t_instances, metadata
 
 def seg_collate(batch):
     """Collate WL per-sample tuples for instance-segmentation.

diff --git a/weightslab/examples/Ultralytics/ws-detection/config.yaml b/weightslab/examples/Ultralytics/ws-detection/config.yaml
@@ -46,14 +46,14 @@ data:
     preload_labels: true
     preload_metadata: true
     drop_last: false
-    num_workers: 4
+    num_workers: 0  # Force to 0 for windows for perfs. opt
   val_loader:
     batch_size: 2
     shuffle: false
     preload_labels: true
     preload_metadata: true
     drop_last: false
-    num_workers: 4
+    num_workers: 0  # Force to 0 for windows for perfs. opt
 
 signals_cfg:
   train_nms:

diff --git a/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml b/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml
@@ -2,6 +2,7 @@
 device: auto
 experiment_name: lidar2d_detection_usecase
 training_steps_to_do: null  # null = infinite training until manually stopped
+# root_log_dir:  # Empty to write in tmp directory, or specify a path to store logs and checkpoints
 
 checkpoint_manager:
   load_config: false

diff --git a/weightslab/src.py b/weightslab/src.py
@@ -3006,22 +3006,9 @@ def _check_cancel_or_timeout(self) -> None:
         if self._controller.is_cancel_requested():
             raise _EvalCanceled(f"Evaluation on '{self._split_name}' canceled by user")
 
-        elapsed = time.monotonic() - self._start_time
-        if self._absolute_timeout > 0 and elapsed > self._absolute_timeout:
-            raise _EvalTimeout(
-                f"Evaluation timeout on '{self._split_name}' after {elapsed:.1f}s (configured {self._absolute_timeout:.1f}s)"
-            )
-
         if self._total_batches <= 0 or self._processed_batches <= 0 or self._avg_batch_seconds <= 0:
             return
 
-        projected = self._avg_batch_seconds * self._total_batches
-        timeout_seconds = max(self._min_seconds, projected * self._multiplier)
-        if elapsed > timeout_seconds:
-            raise _EvalTimeout(
-                f"Evaluation timeout on '{self._split_name}' after {elapsed:.1f}s "
-                f"(projected={projected:.1f}s, limit={timeout_seconds:.1f}s, multiplier={self._multiplier:.2f})"
-            )
     def __len__(self):
         return len(self._loader)