Skip to content

Commit 35025b8

Browse files
committed
- Analyzer: Properly detects optional fields when None values present
- Analyzer: Filters None/unknown before type conflict detection - Schema: Smart conflict resolution (None + single type → optional field)
1 parent 11b61c2 commit 35025b8

2 files changed

Lines changed: 91 additions & 16 deletions

File tree

json_explorer/analyzer.py

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55

66
def detect_timestamp(value):
7-
if not isinstance(value, str):
7+
if not isinstance(value, str) or len(value) < 4:
88
return False
99
parsed = dateparser.parse(value)
1010
return parsed is not None
@@ -27,6 +27,7 @@ def analyze_node(node):
2727
progress.update(task, advance=1)
2828
children[key] = analyze_node(val)
2929
return {"type": "object", "children": children}
30+
3031
elif isinstance(node, list):
3132
# Skip empty or null-only lists
3233
non_empty_items = [
@@ -67,6 +68,11 @@ def analyze_node(node):
6768
}
6869

6970
return {"type": "list", "child_type": "mixed"}
71+
72+
elif node is None:
73+
# Explicitly handle None - mark as unknown but with a flag
74+
return {"type": "unknown", "is_none": True}
75+
7076
else:
7177
if isinstance(node, str):
7278
if detect_timestamp(node):
@@ -79,11 +85,20 @@ def analyze_node(node):
7985
def merge_object_summaries(summaries):
8086
key_structures = {}
8187
key_counts = Counter()
88+
key_none_counts = Counter()
8289
total = len(summaries)
8390

8491
for summary in summaries:
92+
seen_keys = set()
93+
8594
for key, val in summary.get("children", {}).items():
8695
key_counts[key] += 1
96+
seen_keys.add(key)
97+
98+
# Track if this value is None/unknown
99+
if val.get("type") == "unknown":
100+
key_none_counts[key] += 1
101+
87102
if key not in key_structures:
88103
key_structures[key] = []
89104
key_structures[key].append(val)
@@ -93,19 +108,34 @@ def merge_object_summaries(summaries):
93108

94109
for key, structures in key_structures.items():
95110
count = key_counts[key]
96-
optional = count < total
111+
none_count = key_none_counts[key]
112+
113+
# Field is optional if:
114+
# 1. Missing from some objects (count < total)
115+
# 2. Has None in some objects (none_count > 0)
116+
optional = (count < total) or (none_count > 0)
97117

98-
# Get unique types for this key
99-
types = {s["type"] for s in structures}
118+
# Filter out None/unknown types to find concrete types
119+
concrete_structures = [
120+
s for s in structures if s.get("type") != "unknown"
121+
]
122+
123+
# If we have concrete types, use those; otherwise use all structures
124+
working_structures = (
125+
concrete_structures if concrete_structures else structures
126+
)
127+
128+
# Get unique types from working structures
129+
types = {s["type"] for s in working_structures}
100130

101131
if len(types) == 1:
102-
# All structures have the same type
132+
# Single type (possibly with None values)
103133
structure_type = list(types)[0]
104134

105135
if structure_type == "object":
106136
# Recursively merge object structures
107137
merged_children, child_conflicts = merge_object_summaries(
108-
structures
138+
working_structures
109139
)
110140
merged[key] = {
111141
"type": "object",
@@ -117,21 +147,26 @@ def merge_object_summaries(summaries):
117147

118148
elif structure_type == "list":
119149
# Merge list structures
120-
merged_list = merge_list_summaries(structures)
150+
merged_list = merge_list_summaries(working_structures)
121151
merged[key] = {
122152
"type": "list",
123153
"optional": optional,
124154
**{k: v for k, v in merged_list.items() if k != "type"},
125155
}
126156

127157
else:
128-
# Primitive type
158+
# Primitive type (possibly with None)
129159
merged[key] = {"type": structure_type, "optional": optional}
130-
else:
131-
# Type conflict
160+
161+
elif len(types) > 1:
162+
# Multiple different types = real conflict
132163
merged[key] = {"type": "conflict", "optional": optional}
133164
conflicts[key] = list(types)
134165

166+
else:
167+
# Should not happen, but handle gracefully
168+
merged[key] = {"type": "unknown", "optional": optional}
169+
135170
return merged, conflicts
136171

137172
def merge_list_summaries(summaries):

json_explorer/codegen/core/schema.py

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -349,13 +349,53 @@ def _create_field_from_node(
349349
optional=optional,
350350
)
351351

352-
# Handle type conflicts
352+
# Handle type conflicts with None/unknown handling
353353
if field_name in conflicts:
354-
field_obj.type = FieldType.CONFLICT
355-
field_obj.conflicting_types = [
356-
map_analyzer_type(t) for t in conflicts[field_name]
357-
]
358-
logger.warning(f"Type conflict in {parent_schema_name}.{field_name}")
354+
conflict_types = conflicts[field_name]
355+
356+
if "unknown" in conflict_types:
357+
# Filter out "unknown" to see what concrete types remain
358+
concrete_types = [t for t in conflict_types if t != "unknown"]
359+
360+
if len(concrete_types) == 1:
361+
# Single concrete type + None → Optional[ConcreteType]
362+
# This is the most common case and should NOT be treated as a conflict
363+
field_obj.type = map_analyzer_type(concrete_types[0])
364+
field_obj.optional = True
365+
logger.debug(
366+
f"Resolved None conflict in {parent_schema_name}.{field_name}: "
367+
f"using {concrete_types[0]} as optional"
368+
)
369+
370+
elif len(concrete_types) > 1:
371+
# Multiple concrete types + None → Real conflict
372+
# Example: [{"value": None}, {"value": 1}, {"value": "text"}]
373+
field_obj.type = FieldType.CONFLICT
374+
field_obj.conflicting_types = [
375+
map_analyzer_type(t) for t in concrete_types
376+
]
377+
field_obj.optional = True # Can also be None
378+
logger.warning(
379+
f"Type conflict in {parent_schema_name}.{field_name}: "
380+
f"{', '.join(concrete_types)} (plus None)"
381+
)
382+
383+
else:
384+
# Only "unknown" types → Keep as unknown but mark optional
385+
field_obj.type = FieldType.UNKNOWN
386+
field_obj.optional = True
387+
logger.debug(
388+
f"Unknown type in {parent_schema_name}.{field_name} "
389+
f"(only None values found)"
390+
)
391+
else:
392+
# Real conflict without None involved
393+
field_obj.type = FieldType.CONFLICT
394+
field_obj.conflicting_types = [map_analyzer_type(t) for t in conflict_types]
395+
logger.warning(
396+
f"Type conflict in {parent_schema_name}.{field_name}: "
397+
f"{', '.join(conflict_types)}"
398+
)
359399

360400
# Handle specific field types
361401
elif field_type == FieldType.OBJECT:

0 commit comments

Comments
 (0)