diff --git a/weightslab/data/data_utils.py b/weightslab/data/data_utils.py index 9c0b256a..db8e7f2d 100644 --- a/weightslab/data/data_utils.py +++ b/weightslab/data/data_utils.py @@ -5,6 +5,8 @@ from PIL import Image +from weightslab.data.array_proxy import ArrayH5Proxy + __all__ = [ "_detect_dataset_split", @@ -21,9 +23,6 @@ logger = logging.getLogger(__name__) - - - def _to_uint8_image(img_float: np.ndarray) -> np.ndarray: """ Convert float image to uint8 for visualization. @@ -222,6 +221,9 @@ def _downsample_nn(arr: np.ndarray, max_hw: int = 96) -> np.ndarray: def to_numpy_safe(x): + if isinstance(x, ArrayH5Proxy): + return np.asanyarray(x) + if isinstance(x, (int, float)): return np.array([x]) diff --git a/weightslab/data/dataframe_manager.py b/weightslab/data/dataframe_manager.py index 3185d785..6dad1e7b 100644 --- a/weightslab/data/dataframe_manager.py +++ b/weightslab/data/dataframe_manager.py @@ -694,7 +694,7 @@ def upsert_df(self, df_local: List | pd.DataFrame, origin: str = None, force_flu # "Cannot setitem on a Categorical with a new category". The # _optimize_dataframe_memory pass below re-applies categorical dtypes. for col in all_cols: - if col in self._df.columns and isinstance(self._df[col].dtype, pd.CategoricalDtype): + if col in self._df.columns and col != SampleStatsEx.ORIGIN and isinstance(self._df[col].dtype, pd.CategoricalDtype): self._df[col] = self._df[col].astype(object) self._df.loc[existing_idx, all_cols] = df_norm.loc[existing_idx, all_cols] diff --git a/weightslab/data/h5_dataframe_store.py b/weightslab/data/h5_dataframe_store.py index 6a797a96..882468ec 100644 --- a/weightslab/data/h5_dataframe_store.py +++ b/weightslab/data/h5_dataframe_store.py @@ -1,4 +1,5 @@ import os +import re import json import time import logging @@ -365,10 +366,8 @@ def serialize_value(val): if not isinstance(val, (list, set, np.ndarray)) and pd.isna(val): return np.nan - if isinstance(val, np.ndarray) and val.ndim <= 1: - if val.ndim == 0: - val = val.reshape(-1) - val = val.tolist() + if isinstance(val, np.ndarray): + val = val.item() if val.ndim == 0 else val.tolist() if isinstance(val, (list, dict)): try: @@ -428,18 +427,30 @@ def _normalize_for_read(self, df: pd.DataFrame, origin: str) -> pd.DataFrame: # Handle deserialization of nested objects (lists, dicts) stored as JSON strings cols_to_deserialize = [col for col in SampleStats.MODEL_INOUT_LIST if col in df.columns] if cols_to_deserialize: + _MISSING = {"nan", "none", "", ""} + def deserialize_value(val): - if not isinstance(val, str) or not (val.startswith('[') or val.startswith('{')): + if not isinstance(val, str): return val - try: - obj = json.loads(val) - except Exception: + stripped = val.strip() + if stripped.lower() in _MISSING: + return np.nan + if not (stripped.startswith('[') or stripped.startswith('{')): return val - - # Unwrap single-element lists to scalars for consistency with active training data - if isinstance(obj, list) and len(obj) == 1: - return obj[0] - return obj + try: + return json.loads(stripped) + except json.JSONDecodeError: + # Fallback: numpy repr uses spaces as delimiters without commas. + # E.g. "[0.1 0.2]" or "[[0.1 0.2]\n [0.3 0.4]]" + try: + normalized = re.sub( + r'(?<=[0-9.])\s+(?=[-0-9.\[])', + ', ', + stripped.replace('\n', ' '), + ) + return json.loads(normalized) + except Exception: + return val for col in cols_to_deserialize: df[col] = df[col].apply(deserialize_value) diff --git a/weightslab/examples/PyTorch/ws-detection/main.py b/weightslab/examples/PyTorch/ws-detection/main.py index 9ff5357e..ff4b2030 100644 --- a/weightslab/examples/PyTorch/ws-detection/main.py +++ b/weightslab/examples/PyTorch/ws-detection/main.py @@ -120,6 +120,7 @@ def test(loader, model, sig, device, grid_size, conf_thresh, test_loader_len): parameters.setdefault("compute_natural_sort", True) # --- 2) Register hyperparameters --- + exp_name = parameters["experiment_name"] wl.watch_or_edit( parameters, flag="hyperparameters", @@ -128,7 +129,6 @@ def test(loader, model, sig, device, grid_size, conf_thresh, test_loader_len): poll_interval=1.0, ) - exp_name = parameters["experiment_name"] num_classes = int(parameters["num_classes"]) image_size = int(parameters["image_size"]) grid_size = int(parameters["grid_size"]) diff --git a/weightslab/examples/PyTorch/ws-segmentation/config.yaml b/weightslab/examples/PyTorch/ws-segmentation/config.yaml index 9aceb6c8..3fd60d3b 100644 --- a/weightslab/examples/PyTorch/ws-segmentation/config.yaml +++ b/weightslab/examples/PyTorch/ws-segmentation/config.yaml @@ -28,8 +28,9 @@ ledger_flush_interval: 60.0 # Data num_classes: 6 -image_size: 180 -data_root: C:\Users\GuillaumePELLUET\Documents\Codes\weightslab\weightslab\examples\PyTorch\ws-segmentation\BDD_subset # Bdd format +class_names: ["Background", "Ego Road", "Driveable Area", "Lane Line 1", "Lane Line 2", "Lane Line 3"] +image_size: 128 +data_root: .\BDD_subset # Bdd format data: train_loader: batch_size: 2 diff --git a/weightslab/examples/PyTorch/ws-segmentation/main.py b/weightslab/examples/PyTorch/ws-segmentation/main.py index 21eb0e0c..1a56c59e 100644 --- a/weightslab/examples/PyTorch/ws-segmentation/main.py +++ b/weightslab/examples/PyTorch/ws-segmentation/main.py @@ -160,6 +160,7 @@ def test(loader, model, sig, device, test_loader_len): parameters.setdefault("eval_full_to_train_steps_ratio", 50) parameters.setdefault("number_of_workers", 4) parameters.setdefault("num_classes", 6) # adjust to your label set + parameters.setdefault("class_names", None) # adjust to your label set parameters.setdefault("ignore_index", 255) # if you have void pixels parameters.setdefault("image_size", 256) parameters.setdefault("compute_natural_sort", True) @@ -174,6 +175,7 @@ def test(loader, model, sig, device, test_loader_len): poll_interval=1.0, ) num_classes = int(parameters["num_classes"]) + class_names = parameters["class_names"] ignore_index = int(parameters["ignore_index"]) image_size = int(parameters["image_size"]) @@ -211,6 +213,7 @@ def test(loader, model, sig, device, test_loader_len): root=data_root, split="train", num_classes=num_classes, + class_names=class_names, ignore_index=ignore_index, image_size=image_size, max_samples=train_cfg.get("max_samples", None) # Optionally limit number of samples for faster testing @@ -219,6 +222,7 @@ def test(loader, model, sig, device, test_loader_len): root=data_root, split="val", num_classes=num_classes, + class_names=class_names, ignore_index=ignore_index, image_size=image_size, max_samples=test_cfg.get("max_samples", None) # Optionally limit number of samples for faster testing @@ -337,9 +341,9 @@ def compute_class_weights(dataset, num_classes, ignore_index=255, max_samples=10 print(f" Data root: {data_root}") print("=" * 60 + "\n") - # # ================ - # # Training Loop - # wl.start_training(timeout=3) # This will block and keep the main thread alive while background services run. You can optionally set a timeout (in seconds) to automatically stop after a certain duration. + # ================ + # Training Loop + wl.start_training(timeout=3) # This will block and keep the main thread alive while background services run. You can optionally set a timeout (in seconds) to automatically stop after a certain duration. # ================ train_range = tqdm.tqdm(itertools.count(), desc="Training") if tqdm_display else itertools.count() diff --git a/weightslab/examples/PyTorch/ws-segmentation/utils/data.py b/weightslab/examples/PyTorch/ws-segmentation/utils/data.py index 656c0180..fcc14cc5 100644 --- a/weightslab/examples/PyTorch/ws-segmentation/utils/data.py +++ b/weightslab/examples/PyTorch/ws-segmentation/utils/data.py @@ -32,6 +32,7 @@ def __init__( root, split="train", num_classes=6, + class_names=None, ignore_index=255, image_size=256, max_samples=None @@ -40,6 +41,7 @@ def __init__( self.root = root self.split = split self.num_classes = num_classes + self.class_names = class_names self.ignore_index = ignore_index self.task_type = "segmentation" @@ -114,31 +116,31 @@ def get_items(self, idx, include_metadata=False, include_labels=False, include_i img_t = self.image_transform(img) # Process labels/masks - # # Sample wise segmentation + # # # Sample wise segmentation + # mask_t = None + # if include_labels: + # mask = Image.open(mask_path) + # mask_r = self.mask_resize(mask) + # mask_np = np.array(mask_r, dtype=np.int64) + # mask_t = torch.from_numpy(mask_np) # [H, W] int64 + # return img_t, uid, mask_t, metadata + # # Instance wise segmentaiton + # Process labels/masks + mask_t_instances = list() mask_t = None if include_labels: mask = Image.open(mask_path) mask_r = self.mask_resize(mask) mask_np = np.array(mask_r, dtype=np.int64) - mask_t = torch.from_numpy(mask_np)[None] # [H, W] int64 - return img_t, uid, mask_t, metadata - # # # Instance wise segmentaiton - # # Process labels/masks - # mask_t_instances = list() - # mask_t = None - # if include_labels: - # mask = Image.open(mask_path) - # mask_r = self.mask_resize(mask) - # mask_np = np.array(mask_r, dtype=np.int64) - # mask_t = torch.from_numpy(mask_np)[None] # [H, W] int64 - - # # Format labels to register multiple instance_ids - # lbl_max = mask_t.max().item() - # for i in range(1, lbl_max + 1): - # m = torch.zeros_like(mask_t) - # m[mask_t == i] = i # Assign class ID as instance ID for simplicity; if set to 1, all instances of the same class would be merged... - # mask_t_instances.append(m) - # return img_t, uid, mask_t_instances, metadata + mask_t = torch.from_numpy(mask_np) # [H, W] int64 + + # Format labels to register multiple instance_ids + lbl_max = mask_t.max().item() + for i in range(1, lbl_max + 1): + m = torch.zeros_like(mask_t) + m[mask_t == i] = i # Assign class ID as instance ID for simplicity; if set to 1, all instances of the same class would be merged... + mask_t_instances.append(m) + return img_t, uid, mask_t_instances, metadata def seg_collate(batch): """Collate WL per-sample tuples for instance-segmentation. diff --git a/weightslab/examples/Ultralytics/ws-detection/config.yaml b/weightslab/examples/Ultralytics/ws-detection/config.yaml index 9c07dfb9..50a4033e 100644 --- a/weightslab/examples/Ultralytics/ws-detection/config.yaml +++ b/weightslab/examples/Ultralytics/ws-detection/config.yaml @@ -46,14 +46,14 @@ data: preload_labels: true preload_metadata: true drop_last: false - num_workers: 4 + num_workers: 0 # Force to 0 for windows for perfs. opt val_loader: batch_size: 2 shuffle: false preload_labels: true preload_metadata: true drop_last: false - num_workers: 4 + num_workers: 0 # Force to 0 for windows for perfs. opt signals_cfg: train_nms: diff --git a/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml b/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml index 80b795c2..edde4c24 100644 --- a/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml +++ b/weightslab/examples/Usecases/ws-2d-lidar-detection/config.yaml @@ -2,6 +2,7 @@ device: auto experiment_name: lidar2d_detection_usecase training_steps_to_do: null # null = infinite training until manually stopped +# root_log_dir: # Empty to write in tmp directory, or specify a path to store logs and checkpoints checkpoint_manager: load_config: false diff --git a/weightslab/src.py b/weightslab/src.py index 29f9065c..a4e93cb4 100644 --- a/weightslab/src.py +++ b/weightslab/src.py @@ -3006,22 +3006,9 @@ def _check_cancel_or_timeout(self) -> None: if self._controller.is_cancel_requested(): raise _EvalCanceled(f"Evaluation on '{self._split_name}' canceled by user") - elapsed = time.monotonic() - self._start_time - if self._absolute_timeout > 0 and elapsed > self._absolute_timeout: - raise _EvalTimeout( - f"Evaluation timeout on '{self._split_name}' after {elapsed:.1f}s (configured {self._absolute_timeout:.1f}s)" - ) - if self._total_batches <= 0 or self._processed_batches <= 0 or self._avg_batch_seconds <= 0: return - projected = self._avg_batch_seconds * self._total_batches - timeout_seconds = max(self._min_seconds, projected * self._multiplier) - if elapsed > timeout_seconds: - raise _EvalTimeout( - f"Evaluation timeout on '{self._split_name}' after {elapsed:.1f}s " - f"(projected={projected:.1f}s, limit={timeout_seconds:.1f}s, multiplier={self._multiplier:.2f})" - ) def __len__(self): return len(self._loader) diff --git a/weightslab/trainer/services/data_service.py b/weightslab/trainer/services/data_service.py index f30f8831..cb9b12a5 100755 --- a/weightslab/trainer/services/data_service.py +++ b/weightslab/trainer/services/data_service.py @@ -100,19 +100,39 @@ def normalize_metadata_copy_source_name(source_name: str, experiment_hash: str = return name -def build_metadata_copy_column_names(existing_columns, experiment_hash: str, source_name: str): - """Build backend/ui copied metadata names with incrementing suffix _1, _2, ...""" - exp_hash = str(experiment_hash or "current_experiment_hash").strip() or "current_experiment_hash" - normalized_source = normalize_metadata_copy_source_name(source_name, exp_hash) +def build_metadata_copy_column_names(existing_columns, experiment_hash: str, source_name: str) -> str: + """Build backend/ui copied metadata column names. + + Naming rules: + - Cloning an original field (no '@'): first copy is ``{source}@{hash}``; + subsequent copies of the same source get ``{source}@{hash}_1``, ``_2``, ... + - Cloning an already-cloned field (contains '@'): strip any trailing ``_N`` + suffix to find the base name, then append ``_1``, ``_2``, ... — no new hash. + """ existing_iterable = [] if existing_columns is None else existing_columns existing = {str(col) for col in existing_iterable} - index = 1 - while True: - copy_name = f"{normalized_source}_{index}@{exp_hash}" - if copy_name not in existing: - return copy_name - index += 1 + if "@" in source_name: + # Cloning a clone: reuse the same @hash suffix, just increment _{n} + base = re.sub(r"_\d+$", "", source_name) + n = 1 + while True: + candidate = f"{base}_{n}" + if candidate not in existing: + return candidate + n += 1 + else: + # Cloning an original field: first copy has no index, then _1, _2, ... + exp_hash = str(experiment_hash or "current_experiment_hash").strip() or "current_experiment_hash" + base = f"{source_name}@{exp_hash}" + if base not in existing: + return base + n = 1 + while True: + candidate = f"{base}_{n}" + if candidate not in existing: + return candidate + n += 1 def duplicate_metadata_column_in_dataframe(df: pd.DataFrame, source_column: str, experiment_hash: str): @@ -127,7 +147,9 @@ def duplicate_metadata_column_in_dataframe(df: pd.DataFrame, source_column: str, def is_copy_metadata_column_name(column_name: str) -> bool: name = str(column_name or "").strip() - return bool(re.match(r".+_\d+@.+$", name)) + # Matches any column that was produced by a clone operation: contains '@' + # with at least one character on each side (e.g. "loss@abc123" or "loss@abc123_2"). + return bool(re.match(r".+@.+", name)) def detect_bbox_format(bboxes: np.ndarray) -> str: @@ -1337,6 +1359,8 @@ def _process_sample_row(self, args): except Exception: label_arr = np.array([]) + if label_arr.ndim == 1 and label_arr.size >= 4: + label_arr = label_arr.reshape(1, -1) if label_arr.size > 0 and label_arr.ndim == 2 and label_arr.shape[-1] >= 4: # 6-col rows when class+score present: [x,y,x,y,class,score] # (or [tl_x,tl_y,w,h,class,score] for the xywh-top-left flavor). @@ -2775,7 +2799,7 @@ def _build_metadata_only_response(self, df_slice: pd.DataFrame, requested_cols=N # detection) pred/target are large JSON arrays (~310 KB/record) that bloat the # response to 100s of MB and silently break the histogram fetch. if not requested_cols: - _HEAVY_BLOB_COLS = {"pred", "prediction", "prediction_raw", "target"} + _HEAVY_BLOB_COLS = {"prediction_raw"} requested_cols = [c for c in df_slice.columns if c not in _HEAVY_BLOB_COLS] metadata_cols = [ @@ -3982,7 +4006,10 @@ def EditDataSample(self, request, context): if target_column in self._all_datasets_df.columns: self._all_datasets_df = self._all_datasets_df.drop(columns=[target_column]) - self._slowUpdateInternals(force=True) + # Kick a background view-refresh (non-blocking) — the in-memory view + # is already consistent after the drop above, so blocking inline rebuild + # is unnecessary and causes the gRPC response to stall for 5-10 s. + self._slowUpdateInternals() return pb2.DataEditsResponse( success=True, @@ -3995,10 +4022,66 @@ def EditDataSample(self, request, context): message=f"Failed to delete metadata column: {str(e)}", ) + if request.stat_name == "__discard_by_tag__": + tag_name = str(request.string_value or "").strip() + if not tag_name: + return pb2.DataEditsResponse( + success=False, + message="Missing tag name for discard-by-tag operation.", + ) + + with self._watched_lock("_lock[EditDataSample/__discard_by_tag__]"): + try: + self._slowUpdateInternals() + if self._all_datasets_df is None or self._all_datasets_df.empty: + return pb2.DataEditsResponse( + success=False, + message="No dataframe available.", + ) + + tag_col = f"{SampleStatsEx.TAG.value}:{tag_name}" + if tag_col not in self._all_datasets_df.columns: + self._slowUpdateInternals() + df = safe_reset_index(self._all_datasets_df) + if tag_col not in df.columns: + return pb2.DataEditsResponse( + success=True, + message=f"No samples found with tag '{tag_name}'.", + ) + + tagged = df[df[tag_col] == 1] + if tagged.empty: + return pb2.DataEditsResponse( + success=True, + message=f"No samples found with tag '{tag_name}'.", + ) + + for origin, origin_df in tagged.groupby(SampleStatsEx.ORIGIN.value, sort=False): + sample_ids = origin_df.index.astype(str).tolist() + rows = [ + {"sample_id": sid, SampleStatsEx.ORIGIN.value: origin, SampleStatsEx.DISCARDED.value: True} + for sid in sample_ids + ] + df_update = pd.DataFrame(rows).set_index("sample_id") + self._df_manager.upsert_df(df_update, origin=origin, force_flush=True) + + count = len(tagged) + self._slowUpdateInternals(force=True) + return pb2.DataEditsResponse( + success=True, + message=f"Discarded {count} samples with tag '{tag_name}'.", + ) + except Exception as e: + logger.error(f"[EditDataSample] discard_by_tag failed: {e}", exc_info=True) + return pb2.DataEditsResponse( + success=False, + message=f"Failed to discard by tag: {str(e)}", + ) + if not request.stat_name or not request.stat_name.startswith(SampleStatsEx.TAG.value) and request.stat_name not in [SampleStatsEx.DISCARDED.value]: return pb2.DataEditsResponse( success=False, - message="Only 'tags', 'discarded', '__copy_metadata__', '__delete_metadata__', '__save_data_state__', and '__force_h5_write_and_save__' edits are supported.", + message="Only 'tags', 'discarded', '__copy_metadata__', '__delete_metadata__', '__save_data_state__', '__force_h5_write_and_save__', and '__discard_by_tag__' edits are supported.", ) # ===================================================================== diff --git a/weightslab/utils/tools.py b/weightslab/utils/tools.py index 8ae77b8f..b57ff83a 100644 --- a/weightslab/utils/tools.py +++ b/weightslab/utils/tools.py @@ -34,10 +34,12 @@ def safe_reset_index(df: "pd.DataFrame") -> "pd.DataFrame": """ import pandas as _pd if not isinstance(df, _pd.DataFrame) or not isinstance(df.index, _pd.MultiIndex): - # Single-level index: only reset if the name isn't already a column. - if df.index.name and df.index.name in df.columns: - return df - return df.reset_index() + # Single-level index: only promote if the name is meaningful and missing. + # Unnamed index (None) has nothing to promote; drop it to avoid inserting + # a spurious 'index' / 'level_0' column when one already exists. + if df.index.name and df.index.name not in df.columns: + return df.reset_index() + return df.reset_index(drop=True) missing = [n for n in df.index.names if n and n not in df.columns] if not missing: # All levels already present as columns — nothing to promote.