Skip to content

Commit ef473f6

Browse files
committed
python 3.11+ and centralized logging
1 parent f9b9414 commit ef473f6

12 files changed

Lines changed: 1410 additions & 1292 deletions

File tree

json_explorer/__init__.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,31 @@
1-
__version__ = "0.3.0"
1+
"""JSON Explorer - Comprehensive JSON analysis and code generation tool."""
2+
3+
from .logging_config import configure_logging, get_logger
4+
from .analyzer import analyze_json
5+
from .search import JsonSearcher, SearchMode, SearchResult
6+
from .stats import DataStatsAnalyzer, generate_stats
7+
from .visualizer import JSONVisualizer, visualize_json
8+
from .utils import load_json, JSONLoaderError
9+
10+
__version__ = "0.4.0"
11+
12+
__all__ = [
13+
# Logging
14+
"configure_logging",
15+
"get_logger",
16+
# Core
17+
"analyze_json",
18+
"load_json",
19+
# Search
20+
"JsonSearcher",
21+
"SearchMode",
22+
"SearchResult",
23+
# Statistics
24+
"DataStatsAnalyzer",
25+
"generate_stats",
26+
# Visualization
27+
"JSONVisualizer",
28+
"visualize_json",
29+
# Exceptions
30+
"JSONLoaderError",
31+
]

json_explorer/analyzer.py

Lines changed: 65 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,72 @@
1+
"""JSON structure analyzer with type detection and schema inference.
2+
3+
This module analyzes JSON data structures to detect types, optional fields,
4+
conflicts, and generates comprehensive structural summaries.
5+
"""
6+
17
from collections import Counter
8+
from typing import Any
9+
210
import dateparser
311
from rich.progress import Progress, SpinnerColumn, TextColumn
412

13+
from .logging_config import get_logger
14+
15+
logger = get_logger(__name__)
16+
517

6-
def detect_timestamp(value):
18+
def detect_timestamp(value: Any) -> bool:
19+
"""Detect if a string value is a timestamp.
20+
21+
Args:
22+
value: Value to check.
23+
24+
Returns:
25+
True if the value is a parseable timestamp, False otherwise.
26+
"""
727
if not isinstance(value, str) or len(value) < 4:
828
return False
9-
parsed = dateparser.parse(value)
10-
return parsed is not None
1129

30+
try:
31+
parsed = dateparser.parse(value)
32+
return parsed is not None
33+
except Exception:
34+
return False
35+
36+
37+
def analyze_json(data: Any) -> dict[str, Any]:
38+
"""Analyze JSON structure and return detailed metadata.
39+
40+
This function performs deep structural analysis of JSON data, identifying:
41+
- Data types and their distribution
42+
- Optional and required fields
43+
- Type conflicts across similar structures
44+
- Nested object and array patterns
45+
46+
Args:
47+
data: JSON data to analyze (dict, list, or primitive type).
48+
49+
Returns:
50+
Dictionary containing analysis summary with structure, types, and conflicts.
51+
52+
Example:
53+
>>> data = {"users": [{"id": 1, "name": "Alice"}]}
54+
>>> analysis = analyze_json(data)
55+
>>> print(analysis['type'])
56+
'object'
57+
"""
58+
logger.info("Starting JSON analysis")
1259

13-
def analyze_json(data):
1460
with Progress(
1561
SpinnerColumn(),
1662
TextColumn("[progress.description]{task.description}"),
1763
console=None,
1864
transient=True,
1965
) as progress:
20-
2166
task = progress.add_task("[cyan]Analyzing JSON...", total=None)
2267

23-
def analyze_node(node):
68+
def analyze_node(node: Any) -> dict[str, Any]:
69+
"""Recursively analyze a node in the JSON structure."""
2470
if isinstance(node, dict):
2571
children = {}
2672
for key, val in node.items():
@@ -60,17 +106,12 @@ def analyze_node(node):
60106

61107
# List of lists
62108
if all(e["type"] == "list" for e in element_summaries):
63-
# Merge list structures recursively
64109
merged_list = merge_list_summaries(element_summaries)
65-
return {
66-
"type": "list",
67-
"child": merged_list,
68-
}
110+
return {"type": "list", "child": merged_list}
69111

70112
return {"type": "list", "child_type": "mixed"}
71113

72114
elif node is None:
73-
# Explicitly handle None - mark as unknown but with a flag
74115
return {"type": "unknown", "is_none": True}
75116

76117
else:
@@ -82,8 +123,11 @@ def analyze_node(node):
82123
else:
83124
return {"type": type(node).__name__}
84125

85-
def merge_object_summaries(summaries):
86-
key_structures = {}
126+
def merge_object_summaries(
127+
summaries: list[dict[str, Any]],
128+
) -> tuple[dict[str, Any], dict[str, list[str]]]:
129+
"""Merge multiple object summaries, detecting optional fields and conflicts."""
130+
key_structures: dict[str, list] = {}
87131
key_counts = Counter()
88132
key_none_counts = Counter()
89133
total = len(summaries)
@@ -95,45 +139,38 @@ def merge_object_summaries(summaries):
95139
key_counts[key] += 1
96140
seen_keys.add(key)
97141

98-
# Track if this value is None/unknown
99142
if val.get("type") == "unknown":
100143
key_none_counts[key] += 1
101144

102145
if key not in key_structures:
103146
key_structures[key] = []
104147
key_structures[key].append(val)
105148

106-
merged = {}
107-
conflicts = {}
149+
merged: dict[str, Any] = {}
150+
conflicts: dict[str, list[str]] = {}
108151

109152
for key, structures in key_structures.items():
110153
count = key_counts[key]
111154
none_count = key_none_counts[key]
112155

113-
# Field is optional if:
114-
# 1. Missing from some objects (count < total)
115-
# 2. Has None in some objects (none_count > 0)
156+
# Field is optional if missing or has None values
116157
optional = (count < total) or (none_count > 0)
117158

118-
# Filter out None/unknown types to find concrete types
159+
# Filter out None/unknown types
119160
concrete_structures = [
120161
s for s in structures if s.get("type") != "unknown"
121162
]
122163

123-
# If we have concrete types, use those; otherwise use all structures
124164
working_structures = (
125165
concrete_structures if concrete_structures else structures
126166
)
127167

128-
# Get unique types from working structures
129168
types = {s["type"] for s in working_structures}
130169

131170
if len(types) == 1:
132-
# Single type (possibly with None values)
133171
structure_type = list(types)[0]
134172

135173
if structure_type == "object":
136-
# Recursively merge object structures
137174
merged_children, child_conflicts = merge_object_summaries(
138175
working_structures
139176
)
@@ -146,7 +183,6 @@ def merge_object_summaries(summaries):
146183
merged[key]["conflicts"] = child_conflicts
147184

148185
elif structure_type == "list":
149-
# Merge list structures
150186
merged_list = merge_list_summaries(working_structures)
151187
merged[key] = {
152188
"type": "list",
@@ -155,21 +191,19 @@ def merge_object_summaries(summaries):
155191
}
156192

157193
else:
158-
# Primitive type (possibly with None)
159194
merged[key] = {"type": structure_type, "optional": optional}
160195

161196
elif len(types) > 1:
162-
# Multiple different types = real conflict
163197
merged[key] = {"type": "conflict", "optional": optional}
164198
conflicts[key] = list(types)
165199

166200
else:
167-
# Should not happen, but handle gracefully
168201
merged[key] = {"type": "unknown", "optional": optional}
169202

170203
return merged, conflicts
171204

172-
def merge_list_summaries(summaries):
205+
def merge_list_summaries(summaries: list[dict[str, Any]]) -> dict[str, Any]:
206+
"""Merge multiple list summaries."""
173207
child_types = set()
174208
child_structures = []
175209

@@ -180,14 +214,12 @@ def merge_list_summaries(summaries):
180214
child_structures.append(summary["child"])
181215

182216
if child_structures:
183-
# All lists contain complex structures
184217
structure_types = {s["type"] for s in child_structures}
185218

186219
if len(structure_types) == 1:
187220
structure_type = list(structure_types)[0]
188221

189222
if structure_type == "object":
190-
# Merge object structures within lists
191223
merged_children, child_conflicts = merge_object_summaries(
192224
child_structures
193225
)
@@ -200,14 +232,12 @@ def merge_list_summaries(summaries):
200232
},
201233
}
202234
elif structure_type == "list":
203-
# Nested lists
204235
merged_nested = merge_list_summaries(child_structures)
205236
return {"type": "list", "child": merged_nested}
206237

207238
return {"type": "list", "child_type": "mixed_complex"}
208239

209240
elif child_types:
210-
# Simple child types
211241
if len(child_types) == 1:
212242
return {"type": "list", "child_type": list(child_types)[0]}
213243
else:
@@ -218,45 +248,6 @@ def merge_list_summaries(summaries):
218248

219249
return {"type": "list", "child_type": "unknown"}
220250

221-
# Start the analysis
222251
result = analyze_node(data)
252+
logger.info("JSON analysis completed successfully")
223253
return result
224-
225-
226-
if __name__ == "__main__":
227-
from rich import print as rprint
228-
from rich.pretty import pretty_repr
229-
230-
test_data = {
231-
"users": [
232-
{
233-
"id": 1,
234-
"name": "Alice",
235-
"profile": {
236-
"age": 30,
237-
"settings": {"theme": "dark", "notifications": True},
238-
},
239-
"tags": ["admin", "user"],
240-
"last_login": "2024-07-15T12:30:00Z",
241-
},
242-
{
243-
"id": 2,
244-
"name": "Bob",
245-
"profile": {
246-
"age": 25,
247-
"settings": {
248-
"theme": "light",
249-
"notifications": False,
250-
"language": "en",
251-
},
252-
},
253-
"tags": ["user"],
254-
"email": "[email protected]",
255-
"last_login": "not a date",
256-
},
257-
],
258-
"metadata": {"total": 2, "created": "2024-01-01"},
259-
}
260-
261-
summary = analyze_json(test_data)
262-
rprint(pretty_repr(summary))

0 commit comments

Comments
 (0)