66"""
77
88import re
9- from typing import Set , Dict , Optional
9+ from typing import Set , Dict
1010from enum import Enum
1111
1212
1313class NamingCase (Enum ):
1414 """Different naming case styles."""
15- SNAKE_CASE = "snake" # user_name
16- CAMEL_CASE = "camel" # userName
17- PASCAL_CASE = "pascal" # UserName
18- KEBAB_CASE = "kebab" # user-name
15+
16+ SNAKE_CASE = "snake" # user_name
17+ CAMEL_CASE = "camel" # userName
18+ PASCAL_CASE = "pascal" # UserName
19+ KEBAB_CASE = "kebab" # user-name
1920 SCREAMING_SNAKE = "screaming_snake" # USER_NAME
2021
2122
2223class NameSanitizer :
2324 """Handles name sanitization and case conversion."""
24-
25+
2526 def __init__ (self , reserved_words : Set [str ] = None , builtin_types : Set [str ] = None ):
2627 """
2728 Initialize name sanitizer.
28-
29+
2930 Args:
3031 reserved_words: Set of language reserved words
3132 builtin_types: Set of builtin type names that might conflict
@@ -34,58 +35,62 @@ def __init__(self, reserved_words: Set[str] = None, builtin_types: Set[str] = No
3435 self .builtin_types = builtin_types or set ()
3536 self ._name_cache : Dict [str , str ] = {}
3637 self ._used_names : Set [str ] = set ()
37-
38- def sanitize_name (self , name : str , target_case : NamingCase = NamingCase .SNAKE_CASE ,
39- suffix_on_conflict : str = "_" ) -> str :
38+
39+ def sanitize_name (
40+ self ,
41+ name : str ,
42+ target_case : NamingCase = NamingCase .SNAKE_CASE ,
43+ suffix_on_conflict : str = "_" ,
44+ ) -> str :
4045 """
4146 Sanitize a name for safe use in target language.
42-
47+
4348 Args:
4449 name: Original name to sanitize
4550 target_case: Desired case style
4651 suffix_on_conflict: Suffix to add for conflicts
47-
52+
4853 Returns:
4954 Sanitized name safe for use
5055 """
5156 # Use cache if available
5257 cache_key = f"{ name } _{ target_case .value } _{ suffix_on_conflict } "
5358 if cache_key in self ._name_cache :
5459 return self ._name_cache [cache_key ]
55-
60+
5661 # Step 1: Basic cleanup
5762 cleaned = self ._clean_basic (name )
58-
63+
5964 # Step 2: Convert to target case
6065 converted = self ._convert_case (cleaned , target_case )
61-
66+
6267 # Step 3: Handle conflicts
6368 final_name = self ._resolve_conflicts (converted , suffix_on_conflict )
64-
69+
6570 # Cache and track
6671 self ._name_cache [cache_key ] = final_name
6772 self ._used_names .add (final_name )
68-
73+
6974 return final_name
70-
75+
7176 def _clean_basic (self , name : str ) -> str :
7277 """Basic name cleanup - remove invalid characters."""
7378 # Remove non-alphanumeric chars except underscore and hyphen
74- cleaned = re .sub (r' [^a-zA-Z0-9_-]' , '_' , name )
75-
79+ cleaned = re .sub (r" [^a-zA-Z0-9_-]" , "_" , name )
80+
7681 # Remove leading/trailing underscores and hyphens
77- cleaned = cleaned .strip ('_-' )
78-
82+ cleaned = cleaned .strip ("_-" )
83+
7984 # Ensure doesn't start with number
8085 if cleaned and cleaned [0 ].isdigit ():
8186 cleaned = f"_{ cleaned } "
82-
87+
8388 # Ensure not empty
8489 if not cleaned :
8590 cleaned = "field"
86-
91+
8792 return cleaned
88-
93+
8994 def _convert_case (self , name : str , target_case : NamingCase ) -> str :
9095 """Convert name to target case style."""
9196 if target_case == NamingCase .SNAKE_CASE :
@@ -100,56 +105,56 @@ def _convert_case(self, name: str, target_case: NamingCase) -> str:
100105 return self ._to_snake_case (name ).upper ()
101106 else :
102107 return name
103-
108+
104109 def _to_snake_case (self , name : str ) -> str :
105110 """Convert to snake_case."""
106111 # Replace hyphens with underscores
107- name = name .replace ('-' , '_' )
108-
112+ name = name .replace ("-" , "_" )
113+
109114 # Insert underscore before uppercase letters
110- name = re .sub (r' ([a-z0-9])([A-Z])' , r' \1_\2' , name )
111-
115+ name = re .sub (r" ([a-z0-9])([A-Z])" , r" \1_\2" , name )
116+
112117 # Convert to lowercase and clean up multiple underscores
113118 name = name .lower ()
114- name = re .sub (r'_+' , '_' , name )
115-
116- return name .strip ('_' )
117-
119+ name = re .sub (r"_+" , "_" , name )
120+
121+ return name .strip ("_" )
122+
118123 def _to_camel_case (self , name : str ) -> str :
119124 """Convert to camelCase."""
120125 # First convert to snake case, then to camel
121126 snake = self ._to_snake_case (name )
122- parts = snake .split ('_' )
123-
127+ parts = snake .split ("_" )
128+
124129 if not parts :
125130 return name
126-
131+
127132 # First part lowercase, rest title case
128- return parts [0 ].lower () + '' .join (part .capitalize () for part in parts [1 :])
129-
133+ return parts [0 ].lower () + "" .join (part .capitalize () for part in parts [1 :])
134+
130135 def _to_pascal_case (self , name : str ) -> str :
131136 """Convert to PascalCase."""
132137 # First convert to snake case, then to pascal
133138 snake = self ._to_snake_case (name )
134- parts = snake .split ('_' )
135-
139+ parts = snake .split ("_" )
140+
136141 # All parts title case
137- return '' .join (part .capitalize () for part in parts if part )
138-
142+ return "" .join (part .capitalize () for part in parts if part )
143+
139144 def _to_kebab_case (self , name : str ) -> str :
140145 """Convert to kebab-case."""
141146 # Convert to snake case, then replace underscores with hyphens
142147 snake = self ._to_snake_case (name )
143- return snake .replace ('_' , '-' )
144-
148+ return snake .replace ("_" , "-" )
149+
145150 def _resolve_conflicts (self , name : str , suffix : str ) -> str :
146151 """Resolve naming conflicts with reserved words and existing names."""
147152 original_name = name
148-
153+
149154 # Check reserved words and builtin types
150155 if name .lower () in self .reserved_words or name .lower () in self .builtin_types :
151156 name = f"{ name } { suffix } "
152-
157+
153158 # Check for duplicates
154159 counter = 1
155160 while name in self ._used_names :
@@ -158,84 +163,13 @@ def _resolve_conflicts(self, name: str, suffix: str) -> str:
158163 else :
159164 name = f"{ original_name } { counter } "
160165 counter += 1
161-
166+
162167 return name
163-
168+
164169 def reset_used_names (self ):
165170 """Reset the tracking of used names."""
166171 self ._used_names .clear ()
167-
172+
168173 def add_used_name (self , name : str ):
169174 """Manually add a name to the used names set."""
170175 self ._used_names .add (name )
171-
172-
173- # Predefined sanitizers for common languages
174- def create_go_sanitizer () -> NameSanitizer :
175- """Create a name sanitizer configured for Go."""
176- go_reserved = {
177- 'break' , 'case' , 'chan' , 'const' , 'continue' , 'default' , 'defer' ,
178- 'else' , 'fallthrough' , 'for' , 'func' , 'go' , 'goto' , 'if' , 'import' ,
179- 'interface' , 'map' , 'package' , 'range' , 'return' , 'select' , 'struct' ,
180- 'switch' , 'type' , 'var'
181- }
182-
183- go_builtins = {
184- 'bool' , 'byte' , 'complex64' , 'complex128' , 'error' , 'float32' , 'float64' ,
185- 'int' , 'int8' , 'int16' , 'int32' , 'int64' , 'rune' , 'string' ,
186- 'uint' , 'uint8' , 'uint16' , 'uint32' , 'uint64' , 'uintptr' ,
187- 'append' , 'cap' , 'close' , 'complex' , 'copy' , 'delete' , 'imag' , 'len' ,
188- 'make' , 'new' , 'panic' , 'print' , 'println' , 'real' , 'recover'
189- }
190-
191- return NameSanitizer (go_reserved , go_builtins )
192-
193-
194- def create_python_sanitizer () -> NameSanitizer :
195- """Create a name sanitizer configured for Python."""
196- python_reserved = {
197- 'and' , 'as' , 'assert' , 'break' , 'class' , 'continue' , 'def' , 'del' ,
198- 'elif' , 'else' , 'except' , 'exec' , 'finally' , 'for' , 'from' , 'global' ,
199- 'if' , 'import' , 'in' , 'is' , 'lambda' , 'not' , 'or' , 'pass' , 'print' ,
200- 'raise' , 'return' , 'try' , 'while' , 'with' , 'yield' , 'True' , 'False' ,
201- 'None' , 'async' , 'await' , 'nonlocal'
202- }
203-
204- python_builtins = {
205- 'abs' , 'all' , 'any' , 'bin' , 'bool' , 'bytearray' , 'bytes' , 'callable' ,
206- 'chr' , 'classmethod' , 'compile' , 'complex' , 'delattr' , 'dict' , 'dir' ,
207- 'divmod' , 'enumerate' , 'eval' , 'exec' , 'filter' , 'float' , 'format' ,
208- 'frozenset' , 'getattr' , 'globals' , 'hasattr' , 'hash' , 'help' , 'hex' ,
209- 'id' , 'input' , 'int' , 'isinstance' , 'issubclass' , 'iter' , 'len' ,
210- 'list' , 'locals' , 'map' , 'max' , 'memoryview' , 'min' , 'next' , 'object' ,
211- 'oct' , 'open' , 'ord' , 'pow' , 'property' , 'range' , 'repr' , 'reversed' ,
212- 'round' , 'set' , 'setattr' , 'slice' , 'sorted' , 'staticmethod' , 'str' ,
213- 'sum' , 'super' , 'tuple' , 'type' , 'vars' , 'zip'
214- }
215-
216- return NameSanitizer (python_reserved , python_builtins )
217-
218-
219- # Convenience functions
220- def sanitize_go_struct_name (name : str ) -> str :
221- """Sanitize name for Go struct (PascalCase, exported)."""
222- sanitizer = create_go_sanitizer ()
223- return sanitizer .sanitize_name (name , NamingCase .PASCAL_CASE )
224-
225-
226- def sanitize_go_field_name (name : str ) -> str :
227- """Sanitize name for Go struct field (PascalCase, exported)."""
228- sanitizer = create_go_sanitizer ()
229- return sanitizer .sanitize_name (name , NamingCase .PASCAL_CASE )
230-
231-
232- def sanitize_python_class_name (name : str ) -> str :
233- """Sanitize name for Python class (PascalCase)."""
234- sanitizer = create_python_sanitizer ()
235- return sanitizer .sanitize_name (name , NamingCase .PASCAL_CASE )
236-
237-
238- def sanitize_python_field_name (name : str ) -> str :
239- """Sanitize name for Python field (snake_case)."""
240- sanitizer = create_python_sanitizer ()
241- return sanitizer .sanitize_name (name , NamingCase .SNAKE_CASE )
0 commit comments