Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions weightslab/data/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from PIL import Image

from weightslab.data.array_proxy import ArrayH5Proxy


__all__ = [
"_detect_dataset_split",
Expand All @@ -21,9 +23,6 @@
logger = logging.getLogger(__name__)





def _to_uint8_image(img_float: np.ndarray) -> np.ndarray:
"""
Convert float image to uint8 for visualization.
Expand Down Expand Up @@ -222,6 +221,9 @@ def _downsample_nn(arr: np.ndarray, max_hw: int = 96) -> np.ndarray:


def to_numpy_safe(x):
if isinstance(x, ArrayH5Proxy):
return np.asanyarray(x)

if isinstance(x, (int, float)):
return np.array([x])

Expand Down
2 changes: 1 addition & 1 deletion weightslab/data/dataframe_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ def upsert_df(self, df_local: List | pd.DataFrame, origin: str = None, force_flu
# "Cannot setitem on a Categorical with a new category". The
# _optimize_dataframe_memory pass below re-applies categorical dtypes.
for col in all_cols:
if col in self._df.columns and isinstance(self._df[col].dtype, pd.CategoricalDtype):
if col in self._df.columns and col != SampleStatsEx.ORIGIN and isinstance(self._df[col].dtype, pd.CategoricalDtype):
self._df[col] = self._df[col].astype(object)
self._df.loc[existing_idx, all_cols] = df_norm.loc[existing_idx, all_cols]

Expand Down
37 changes: 24 additions & 13 deletions weightslab/data/h5_dataframe_store.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
import json
import time
import logging
Expand Down Expand Up @@ -365,10 +366,8 @@ def serialize_value(val):
if not isinstance(val, (list, set, np.ndarray)) and pd.isna(val):
return np.nan

if isinstance(val, np.ndarray) and val.ndim <= 1:
if val.ndim == 0:
val = val.reshape(-1)
val = val.tolist()
if isinstance(val, np.ndarray):
val = val.item() if val.ndim == 0 else val.tolist()

if isinstance(val, (list, dict)):
try:
Expand Down Expand Up @@ -428,18 +427,30 @@ def _normalize_for_read(self, df: pd.DataFrame, origin: str) -> pd.DataFrame:
# Handle deserialization of nested objects (lists, dicts) stored as JSON strings
cols_to_deserialize = [col for col in SampleStats.MODEL_INOUT_LIST if col in df.columns]
if cols_to_deserialize:
_MISSING = {"nan", "none", "<na>", ""}

def deserialize_value(val):
if not isinstance(val, str) or not (val.startswith('[') or val.startswith('{')):
if not isinstance(val, str):
return val
try:
obj = json.loads(val)
except Exception:
stripped = val.strip()
if stripped.lower() in _MISSING:
return np.nan
if not (stripped.startswith('[') or stripped.startswith('{')):
return val

# Unwrap single-element lists to scalars for consistency with active training data
if isinstance(obj, list) and len(obj) == 1:
return obj[0]
return obj
try:
return json.loads(stripped)
except json.JSONDecodeError:
# Fallback: numpy repr uses spaces as delimiters without commas.
# E.g. "[0.1 0.2]" or "[[0.1 0.2]\n [0.3 0.4]]"
try:
normalized = re.sub(
r'(?<=[0-9.])\s+(?=[-0-9.\[])',
', ',
stripped.replace('\n', ' '),
)
return json.loads(normalized)
except Exception:
return val

for col in cols_to_deserialize:
df[col] = df[col].apply(deserialize_value)
Expand Down
2 changes: 1 addition & 1 deletion weightslab/examples/PyTorch/ws-detection/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def test(loader, model, sig, device, grid_size, conf_thresh, test_loader_len):
parameters.setdefault("compute_natural_sort", True)

# --- 2) Register hyperparameters ---
exp_name = parameters["experiment_name"]
wl.watch_or_edit(
parameters,
flag="hyperparameters",
Expand All @@ -128,7 +129,6 @@ def test(loader, model, sig, device, grid_size, conf_thresh, test_loader_len):
poll_interval=1.0,
)

exp_name = parameters["experiment_name"]
num_classes = int(parameters["num_classes"])
image_size = int(parameters["image_size"])
grid_size = int(parameters["grid_size"])
Expand Down
5 changes: 3 additions & 2 deletions weightslab/examples/PyTorch/ws-segmentation/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ ledger_flush_interval: 60.0

# Data
num_classes: 6
image_size: 180
data_root: C:\Users\GuillaumePELLUET\Documents\Codes\weightslab\weightslab\examples\PyTorch\ws-segmentation\BDD_subset # Bdd format
class_names: ["Background", "Ego Road", "Driveable Area", "Lane Line 1", "Lane Line 2", "Lane Line 3"]
image_size: 128
data_root: .\BDD_subset # Bdd format
data:
train_loader:
batch_size: 2
Expand Down
10 changes: 7 additions & 3 deletions weightslab/examples/PyTorch/ws-segmentation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def test(loader, model, sig, device, test_loader_len):
parameters.setdefault("eval_full_to_train_steps_ratio", 50)
parameters.setdefault("number_of_workers", 4)
parameters.setdefault("num_classes", 6) # adjust to your label set
parameters.setdefault("class_names", None) # adjust to your label set
parameters.setdefault("ignore_index", 255) # if you have void pixels
parameters.setdefault("image_size", 256)
parameters.setdefault("compute_natural_sort", True)
Expand All @@ -174,6 +175,7 @@ def test(loader, model, sig, device, test_loader_len):
poll_interval=1.0,
)
num_classes = int(parameters["num_classes"])
class_names = parameters["class_names"]
ignore_index = int(parameters["ignore_index"])
image_size = int(parameters["image_size"])

Expand Down Expand Up @@ -211,6 +213,7 @@ def test(loader, model, sig, device, test_loader_len):
root=data_root,
split="train",
num_classes=num_classes,
class_names=class_names,
ignore_index=ignore_index,
image_size=image_size,
max_samples=train_cfg.get("max_samples", None) # Optionally limit number of samples for faster testing
Expand All @@ -219,6 +222,7 @@ def test(loader, model, sig, device, test_loader_len):
root=data_root,
split="val",
num_classes=num_classes,
class_names=class_names,
ignore_index=ignore_index,
image_size=image_size,
max_samples=test_cfg.get("max_samples", None) # Optionally limit number of samples for faster testing
Expand Down Expand Up @@ -337,9 +341,9 @@ def compute_class_weights(dataset, num_classes, ignore_index=255, max_samples=10
print(f" Data root: {data_root}")
print("=" * 60 + "\n")

# # ================
# # Training Loop
# wl.start_training(timeout=3) # This will block and keep the main thread alive while background services run. You can optionally set a timeout (in seconds) to automatically stop after a certain duration.
# ================
# Training Loop
wl.start_training(timeout=3) # This will block and keep the main thread alive while background services run. You can optionally set a timeout (in seconds) to automatically stop after a certain duration.

# ================
train_range = tqdm.tqdm(itertools.count(), desc="Training") if tqdm_display else itertools.count()
Expand Down
42 changes: 22 additions & 20 deletions weightslab/examples/PyTorch/ws-segmentation/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(
root,
split="train",
num_classes=6,
class_names=None,
ignore_index=255,
image_size=256,
max_samples=None
Expand All @@ -40,6 +41,7 @@ def __init__(
self.root = root
self.split = split
self.num_classes = num_classes
self.class_names = class_names
self.ignore_index = ignore_index
self.task_type = "segmentation"

Expand Down Expand Up @@ -114,31 +116,31 @@ def get_items(self, idx, include_metadata=False, include_labels=False, include_i
img_t = self.image_transform(img)

# Process labels/masks
# # Sample wise segmentation
# # # Sample wise segmentation
# mask_t = None
# if include_labels:
# mask = Image.open(mask_path)
# mask_r = self.mask_resize(mask)
# mask_np = np.array(mask_r, dtype=np.int64)
# mask_t = torch.from_numpy(mask_np) # [H, W] int64
# return img_t, uid, mask_t, metadata
# # Instance wise segmentaiton
# Process labels/masks
mask_t_instances = list()
mask_t = None
if include_labels:
mask = Image.open(mask_path)
mask_r = self.mask_resize(mask)
mask_np = np.array(mask_r, dtype=np.int64)
mask_t = torch.from_numpy(mask_np)[None] # [H, W] int64
return img_t, uid, mask_t, metadata
# # # Instance wise segmentaiton
# # Process labels/masks
# mask_t_instances = list()
# mask_t = None
# if include_labels:
# mask = Image.open(mask_path)
# mask_r = self.mask_resize(mask)
# mask_np = np.array(mask_r, dtype=np.int64)
# mask_t = torch.from_numpy(mask_np)[None] # [H, W] int64

# # Format labels to register multiple instance_ids
# lbl_max = mask_t.max().item()
# for i in range(1, lbl_max + 1):
# m = torch.zeros_like(mask_t)
# m[mask_t == i] = i # Assign class ID as instance ID for simplicity; if set to 1, all instances of the same class would be merged...
# mask_t_instances.append(m)
# return img_t, uid, mask_t_instances, metadata
mask_t = torch.from_numpy(mask_np) # [H, W] int64

# Format labels to register multiple instance_ids
lbl_max = mask_t.max().item()
for i in range(1, lbl_max + 1):
m = torch.zeros_like(mask_t)
m[mask_t == i] = i # Assign class ID as instance ID for simplicity; if set to 1, all instances of the same class would be merged...
mask_t_instances.append(m)
return img_t, uid, mask_t_instances, metadata

def seg_collate(batch):
"""Collate WL per-sample tuples for instance-segmentation.
Expand Down
4 changes: 2 additions & 2 deletions weightslab/examples/Ultralytics/ws-detection/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ data:
preload_labels: true
preload_metadata: true
drop_last: false
num_workers: 4
num_workers: 0 # Force to 0 for windows for perfs. opt
val_loader:
batch_size: 2
shuffle: false
preload_labels: true
preload_metadata: true
drop_last: false
num_workers: 4
num_workers: 0 # Force to 0 for windows for perfs. opt

signals_cfg:
train_nms:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
device: auto
experiment_name: lidar2d_detection_usecase
training_steps_to_do: null # null = infinite training until manually stopped
# root_log_dir: # Empty to write in tmp directory, or specify a path to store logs and checkpoints

checkpoint_manager:
load_config: false
Expand Down
13 changes: 0 additions & 13 deletions weightslab/src.py
Original file line number Diff line number Diff line change
Expand Up @@ -3006,22 +3006,9 @@ def _check_cancel_or_timeout(self) -> None:
if self._controller.is_cancel_requested():
raise _EvalCanceled(f"Evaluation on '{self._split_name}' canceled by user")

elapsed = time.monotonic() - self._start_time
if self._absolute_timeout > 0 and elapsed > self._absolute_timeout:
raise _EvalTimeout(
f"Evaluation timeout on '{self._split_name}' after {elapsed:.1f}s (configured {self._absolute_timeout:.1f}s)"
)

if self._total_batches <= 0 or self._processed_batches <= 0 or self._avg_batch_seconds <= 0:
return

projected = self._avg_batch_seconds * self._total_batches
timeout_seconds = max(self._min_seconds, projected * self._multiplier)
if elapsed > timeout_seconds:
raise _EvalTimeout(
f"Evaluation timeout on '{self._split_name}' after {elapsed:.1f}s "
f"(projected={projected:.1f}s, limit={timeout_seconds:.1f}s, multiplier={self._multiplier:.2f})"
)
def __len__(self):
return len(self._loader)

Expand Down
Loading