Skip to content

Commit cf68672

Browse files
committed
codegen interactive mode, output formating, analyzer progress
1 parent ef573fa commit cf68672

5 files changed

Lines changed: 528 additions & 198 deletions

File tree

json_explorer/analyzer.py

Lines changed: 165 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from collections import Counter
22
import dateparser
3+
from rich.progress import Progress, SpinnerColumn, TextColumn
34

45

56
def detect_timestamp(value):
@@ -10,167 +11,181 @@ def detect_timestamp(value):
1011

1112

1213
def analyze_json(data):
13-
def analyze_node(node):
14-
if isinstance(node, dict):
15-
children = {}
16-
for key, val in node.items():
17-
children[key] = analyze_node(val)
18-
return {"type": "object", "children": children}
19-
elif isinstance(node, list):
20-
# Skip empty or null-only lists
21-
non_empty_items = [item for item in node if item not in (None, {}, [], "")]
22-
if not non_empty_items:
23-
return {"type": "list", "child_type": "unknown"}
24-
25-
sample = non_empty_items[:20]
26-
element_summaries = [analyze_node(item) for item in sample]
27-
types = {e["type"] for e in element_summaries}
28-
29-
# List of primitives
30-
if len(types) == 1 and all(
31-
e["type"] not in {"object", "list"} for e in element_summaries
32-
):
33-
return {"type": "list", "child_type": types.pop()}
34-
35-
# List of objects
36-
if all(e["type"] == "object" for e in element_summaries):
37-
merged, conflicts = merge_object_summaries(element_summaries)
38-
return {
39-
"type": "list",
40-
"child": {
41-
"type": "object",
42-
"children": merged,
43-
"conflicts": conflicts,
44-
},
45-
}
46-
47-
# List of lists
48-
if all(e["type"] == "list" for e in element_summaries):
49-
# Merge list structures recursively
50-
merged_list = merge_list_summaries(element_summaries)
51-
return {
52-
"type": "list",
53-
"child": merged_list,
54-
}
55-
56-
return {"type": "list", "child_type": "mixed"}
57-
else:
58-
if isinstance(node, str):
59-
if detect_timestamp(node):
60-
return {"type": "timestamp"}
61-
else:
62-
return {"type": "str"}
63-
else:
64-
return {"type": type(node).__name__}
65-
66-
def merge_object_summaries(summaries):
67-
key_structures = {}
68-
key_counts = Counter()
69-
total = len(summaries)
70-
71-
for summary in summaries:
72-
for key, val in summary.get("children", {}).items():
73-
key_counts[key] += 1
74-
if key not in key_structures:
75-
key_structures[key] = []
76-
key_structures[key].append(val)
77-
78-
merged = {}
79-
conflicts = {}
80-
81-
for key, structures in key_structures.items():
82-
count = key_counts[key]
83-
optional = count < total
84-
85-
# Get unique types for this key
86-
types = {s["type"] for s in structures}
87-
88-
if len(types) == 1:
89-
# All structures have the same type
90-
structure_type = list(types)[0]
91-
92-
if structure_type == "object":
93-
# Recursively merge object structures
94-
merged_children, child_conflicts = merge_object_summaries(
95-
structures
96-
)
97-
merged[key] = {
98-
"type": "object",
99-
"children": merged_children,
100-
"optional": optional,
101-
}
102-
if child_conflicts:
103-
merged[key]["conflicts"] = child_conflicts
104-
105-
elif structure_type == "list":
106-
# Merge list structures
107-
merged_list = merge_list_summaries(structures)
108-
merged[key] = {
109-
"type": "list",
110-
"optional": optional,
111-
**{k: v for k, v in merged_list.items() if k != "type"},
112-
}
113-
114-
else:
115-
# Primitive type
116-
merged[key] = {"type": structure_type, "optional": optional}
117-
else:
118-
# Type conflict
119-
merged[key] = {"type": "conflict", "optional": optional}
120-
conflicts[key] = list(types)
121-
122-
return merged, conflicts
123-
124-
def merge_list_summaries(summaries):
125-
child_types = set()
126-
child_structures = []
127-
128-
for summary in summaries:
129-
if "child_type" in summary:
130-
child_types.add(summary["child_type"])
131-
elif "child" in summary:
132-
child_structures.append(summary["child"])
133-
134-
if child_structures:
135-
# All lists contain complex structures
136-
structure_types = {s["type"] for s in child_structures}
137-
138-
if len(structure_types) == 1:
139-
structure_type = list(structure_types)[0]
140-
141-
if structure_type == "object":
142-
# Merge object structures within lists
143-
merged_children, child_conflicts = merge_object_summaries(
144-
child_structures
145-
)
14+
with Progress(
15+
SpinnerColumn(),
16+
TextColumn("[progress.description]{task.description}"),
17+
console=None,
18+
transient=True,
19+
) as progress:
20+
21+
task = progress.add_task("[cyan]Analyzing JSON...", total=None)
22+
23+
def analyze_node(node):
24+
if isinstance(node, dict):
25+
children = {}
26+
for key, val in node.items():
27+
progress.update(task, advance=1)
28+
children[key] = analyze_node(val)
29+
return {"type": "object", "children": children}
30+
elif isinstance(node, list):
31+
# Skip empty or null-only lists
32+
non_empty_items = [
33+
item for item in node if item not in (None, {}, [], "")
34+
]
35+
if not non_empty_items:
36+
return {"type": "list", "child_type": "unknown"}
37+
38+
sample = non_empty_items[:20]
39+
element_summaries = [analyze_node(item) for item in sample]
40+
types = {e["type"] for e in element_summaries}
41+
42+
# List of primitives
43+
if len(types) == 1 and all(
44+
e["type"] not in {"object", "list"} for e in element_summaries
45+
):
46+
return {"type": "list", "child_type": types.pop()}
47+
48+
# List of objects
49+
if all(e["type"] == "object" for e in element_summaries):
50+
merged, conflicts = merge_object_summaries(element_summaries)
14651
return {
14752
"type": "list",
14853
"child": {
14954
"type": "object",
150-
"children": merged_children,
151-
"conflicts": child_conflicts,
55+
"children": merged,
56+
"conflicts": conflicts,
15257
},
15358
}
154-
elif structure_type == "list":
155-
# Nested lists
156-
merged_nested = merge_list_summaries(child_structures)
157-
return {"type": "list", "child": merged_nested}
15859

159-
return {"type": "list", "child_type": "mixed_complex"}
60+
# List of lists
61+
if all(e["type"] == "list" for e in element_summaries):
62+
# Merge list structures recursively
63+
merged_list = merge_list_summaries(element_summaries)
64+
return {
65+
"type": "list",
66+
"child": merged_list,
67+
}
16068

161-
elif child_types:
162-
# Simple child types
163-
if len(child_types) == 1:
164-
return {"type": "list", "child_type": list(child_types)[0]}
69+
return {"type": "list", "child_type": "mixed"}
16570
else:
166-
return {
167-
"type": "list",
168-
"child_type": f"mixed: {', '.join(sorted(child_types))}",
169-
}
71+
if isinstance(node, str):
72+
if detect_timestamp(node):
73+
return {"type": "timestamp"}
74+
else:
75+
return {"type": "str"}
76+
else:
77+
return {"type": type(node).__name__}
78+
79+
def merge_object_summaries(summaries):
80+
key_structures = {}
81+
key_counts = Counter()
82+
total = len(summaries)
83+
84+
for summary in summaries:
85+
for key, val in summary.get("children", {}).items():
86+
key_counts[key] += 1
87+
if key not in key_structures:
88+
key_structures[key] = []
89+
key_structures[key].append(val)
90+
91+
merged = {}
92+
conflicts = {}
93+
94+
for key, structures in key_structures.items():
95+
count = key_counts[key]
96+
optional = count < total
97+
98+
# Get unique types for this key
99+
types = {s["type"] for s in structures}
100+
101+
if len(types) == 1:
102+
# All structures have the same type
103+
structure_type = list(types)[0]
104+
105+
if structure_type == "object":
106+
# Recursively merge object structures
107+
merged_children, child_conflicts = merge_object_summaries(
108+
structures
109+
)
110+
merged[key] = {
111+
"type": "object",
112+
"children": merged_children,
113+
"optional": optional,
114+
}
115+
if child_conflicts:
116+
merged[key]["conflicts"] = child_conflicts
117+
118+
elif structure_type == "list":
119+
# Merge list structures
120+
merged_list = merge_list_summaries(structures)
121+
merged[key] = {
122+
"type": "list",
123+
"optional": optional,
124+
**{k: v for k, v in merged_list.items() if k != "type"},
125+
}
126+
127+
else:
128+
# Primitive type
129+
merged[key] = {"type": structure_type, "optional": optional}
130+
else:
131+
# Type conflict
132+
merged[key] = {"type": "conflict", "optional": optional}
133+
conflicts[key] = list(types)
134+
135+
return merged, conflicts
136+
137+
def merge_list_summaries(summaries):
138+
child_types = set()
139+
child_structures = []
140+
141+
for summary in summaries:
142+
if "child_type" in summary:
143+
child_types.add(summary["child_type"])
144+
elif "child" in summary:
145+
child_structures.append(summary["child"])
146+
147+
if child_structures:
148+
# All lists contain complex structures
149+
structure_types = {s["type"] for s in child_structures}
150+
151+
if len(structure_types) == 1:
152+
structure_type = list(structure_types)[0]
153+
154+
if structure_type == "object":
155+
# Merge object structures within lists
156+
merged_children, child_conflicts = merge_object_summaries(
157+
child_structures
158+
)
159+
return {
160+
"type": "list",
161+
"child": {
162+
"type": "object",
163+
"children": merged_children,
164+
"conflicts": child_conflicts,
165+
},
166+
}
167+
elif structure_type == "list":
168+
# Nested lists
169+
merged_nested = merge_list_summaries(child_structures)
170+
return {"type": "list", "child": merged_nested}
171+
172+
return {"type": "list", "child_type": "mixed_complex"}
173+
174+
elif child_types:
175+
# Simple child types
176+
if len(child_types) == 1:
177+
return {"type": "list", "child_type": list(child_types)[0]}
178+
else:
179+
return {
180+
"type": "list",
181+
"child_type": f"mixed: {', '.join(sorted(child_types))}",
182+
}
170183

171-
return {"type": "list", "child_type": "unknown"}
184+
return {"type": "list", "child_type": "unknown"}
172185

173-
return analyze_node(data)
186+
# Start the analysis
187+
result = analyze_node(data)
188+
return result
174189

175190

176191
if __name__ == "__main__":

json_explorer/cli.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ def _handle_stats(self, args):
118118

119119
def _handle_visualization(self, args):
120120
"""Handle visualization generation."""
121-
self.console.print("\n📈 Generating visualizations...")
122121

123122
plot_format = getattr(args, "plot_format", "matplotlib")
124123
save_path = getattr(args, "save_path", None)

0 commit comments

Comments
 (0)