Skip to content

Commit e1ff91f

Browse files
fix(cddl2ts): infer template literal types from regexp strings (#48)
* Add custom regexp channel type support * Fix operator guard regression * Generalize regexp template inference * Add cddl coverage tests
1 parent 23c638b commit e1ff91f

9 files changed

Lines changed: 401 additions & 2 deletions

File tree

packages/cddl/src/utils.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,32 @@ export function isPropertyReference (t: any): t is PropertyReference {
8181
}
8282

8383
export function isNativeTypeWithOperator (t: any): t is NativeTypeWithOperator {
84-
return t && typeof t.Type === 'object' && 'Operator' in t
84+
return Boolean(
85+
t &&
86+
typeof t === 'object' &&
87+
'Type' in t &&
88+
!('Value' in t) &&
89+
t.Operator &&
90+
typeof t.Operator === 'object'
91+
)
92+
}
93+
94+
export function getRegexpPattern (t: any): string | undefined {
95+
if (!isNativeTypeWithOperator(t)) {
96+
return
97+
}
98+
99+
if (typeof t.Type !== 'string' || !['str', 'text', 'tstr'].includes(t.Type)) {
100+
return
101+
}
102+
103+
if (t.Operator?.Type !== 'regexp' || !isLiteralWithValue(t.Operator.Value)) {
104+
return
105+
}
106+
107+
return typeof t.Operator.Value.Value === 'string'
108+
? t.Operator.Value.Value
109+
: undefined
85110
}
86111

87112
export function isRange (t: any): boolean {

packages/cddl/tests/cli.test.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import { afterEach, describe, expect, it, vi } from 'vitest'
2+
3+
import { CLI_EPILOGUE } from '../src/cli/constants.js'
4+
5+
describe('cli entrypoint', () => {
6+
afterEach(() => {
7+
vi.resetModules()
8+
vi.restoreAllMocks()
9+
})
10+
11+
it('wires yargs commands and returns argv', async () => {
12+
const command = vi.fn()
13+
const example = vi.fn()
14+
const epilogue = vi.fn()
15+
const demandCommand = vi.fn()
16+
const help = vi.fn()
17+
const argvValue = { _: ['repl'] }
18+
const chain = {
19+
command,
20+
example,
21+
epilogue,
22+
demandCommand,
23+
help,
24+
argv: argvValue
25+
}
26+
27+
command.mockReturnValue(chain)
28+
example.mockReturnValue(chain)
29+
epilogue.mockReturnValue(chain)
30+
demandCommand.mockReturnValue(chain)
31+
help.mockReturnValue(chain)
32+
33+
const yargsMock = vi.fn().mockReturnValue(chain)
34+
const hideBinMock = vi.fn().mockReturnValue(['repl'])
35+
36+
vi.doMock('yargs/yargs', () => ({ default: yargsMock }))
37+
vi.doMock('yargs/helpers', () => ({ hideBin: hideBinMock }))
38+
39+
const { default: runCli } = await import('../src/cli/index.js')
40+
const result = await runCli()
41+
42+
expect(hideBinMock).toHaveBeenCalledWith(process.argv)
43+
expect(yargsMock).toHaveBeenCalledWith(['repl'])
44+
expect(command).toHaveBeenCalledTimes(2)
45+
expect(example).toHaveBeenCalledWith('$0 repl', 'Start CDDL repl')
46+
expect(epilogue).toHaveBeenCalledWith(CLI_EPILOGUE)
47+
expect(demandCommand).toHaveBeenCalledTimes(1)
48+
expect(help).toHaveBeenCalledTimes(1)
49+
expect(result).toEqual(argvValue)
50+
})
51+
})

packages/cddl/tests/lexer.test.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,12 @@ describe('lexer', () => {
6060
expect(locEnd.line).toBe(0)
6161
expect(locEnd.position).toBe(0)
6262
})
63+
64+
it('should render caret location info for the current line', () => {
65+
const l = new Lexer('foo\nbar')
66+
l.nextToken()
67+
68+
expect(l.getLine(1)).toBe('bar')
69+
expect(l.getLocationInfo()).toBe('foo\n ^\n |\n')
70+
})
6371
})

packages/cddl/tests/parser.test.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,29 @@ describe('parser', () => {
6060

6161
vi.restoreAllMocks()
6262
})
63+
64+
it('parses RFC 9165 regexp operators on text strings', () => {
65+
vi.spyOn(fs, 'readFileSync').mockReturnValue('channel = tstr .regexp "custom:.+"\n')
66+
const p = new Parser('foo.cddl')
67+
68+
expect(p.parse()).toEqual([{
69+
Type: 'variable',
70+
Name: 'channel',
71+
IsChoiceAddition: false,
72+
PropertyType: [{
73+
Type: 'tstr',
74+
Operator: {
75+
Type: 'regexp',
76+
Value: {
77+
Type: 'literal',
78+
Value: 'custom:.+',
79+
Unwrapped: false
80+
}
81+
}
82+
}],
83+
Comments: []
84+
}])
85+
86+
vi.restoreAllMocks()
87+
})
6388
})

packages/cddl/tests/utils.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@ import type {
1010
PropertyReference,
1111
Variable
1212
} from '../src/ast.js'
13+
import { Type } from '../src/ast.js'
1314
import { Tokens, type Token } from '../src/tokens.js'
1415
import {
16+
getRegexpPattern,
1517
hasSpecialNumberCharacter,
1618
isAlphabeticCharacter,
1719
isCDDLArray,
@@ -183,6 +185,17 @@ describe('utils', () => {
183185
Value: 'tstr'
184186
}
185187
}
188+
const nativeStringTypeWithRegexp: NativeTypeWithOperator = {
189+
Type: Type.TSTR,
190+
Operator: {
191+
Type: 'regexp',
192+
Value: {
193+
Type: 'literal',
194+
Value: 'custom:.+',
195+
Unwrapped: false
196+
}
197+
}
198+
}
186199
const rangeReference: PropertyReference = {
187200
Type: 'range',
188201
Value: {
@@ -194,7 +207,25 @@ describe('utils', () => {
194207
}
195208

196209
expect(isNativeTypeWithOperator(nativeTypeWithOperator)).toBe(true)
210+
expect(isNativeTypeWithOperator(nativeStringTypeWithRegexp)).toBe(true)
197211
expect(isNativeTypeWithOperator({ Type: 'tstr' })).toBe(false)
212+
expect(isNativeTypeWithOperator({
213+
Type: Type.TSTR,
214+
Operator: 'regexp'
215+
})).toBe(false)
216+
expect(getRegexpPattern(nativeStringTypeWithRegexp)).toBe('custom:.+')
217+
expect(getRegexpPattern(nativeTypeWithOperator)).toBeUndefined()
218+
expect(getRegexpPattern({
219+
Type: Type.TSTR,
220+
Operator: {
221+
Type: 'regexp',
222+
Value: {
223+
Type: 'literal',
224+
Value: 42,
225+
Unwrapped: false
226+
}
227+
}
228+
})).toBeUndefined()
198229

199230
expect(isRange({ Type: rangeReference })).toBe(true)
200231
expect(isRange({ Type: 'range' })).toBe(false)

packages/cddl2py/src/index.ts

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import {
2+
getRegexpPattern,
23
isCDDLArray, isGroup, isNamedGroupReference, isLiteralWithValue,
34
isNativeTypeWithOperator, isUnNamedProperty, isPropertyReference,
45
isRange, isVariable, pascalCase,
5-
type Assignment, type PropertyType, type PropertyReference,
6+
type Assignment, type NativeTypeWithOperator, type PropertyType, type PropertyReference,
67
type Property, type Array as CDDLArray, type Operator, type Group,
78
type Variable, type Comment, type Tag
89
} from 'cddl'
@@ -516,6 +517,72 @@ function getExtraItemsType (props: Property[], ctx: Context): string | undefined
516517
return `Union[${uniqueTypes.join(', ')}]`
517518
}
518519

520+
function stringifyPythonLiteral (value: string) {
521+
return JSON.stringify(value)
522+
}
523+
524+
function getTemplateAnnotatedPattern (regexpPattern: string): string | undefined {
525+
const wildcard = '.+'
526+
if (!regexpPattern.includes(wildcard) || /[\\()[\]{}|?*^$]/.test(regexpPattern.replaceAll(wildcard, ''))) {
527+
return
528+
}
529+
530+
const segments = regexpPattern.split(wildcard)
531+
const parts: string[] = []
532+
533+
for (let i = 0; i < segments.length; i++) {
534+
const segment = segments[i]
535+
if (segment.length > 0) {
536+
parts.push(stringifyPythonLiteral(segment))
537+
}
538+
539+
if (i < segments.length - 1) {
540+
parts.push('str')
541+
}
542+
}
543+
544+
if (parts.length === 0 || !parts.includes('str')) {
545+
return
546+
}
547+
548+
return `Annotated[str, ${parts.join(' + ')}]`
549+
}
550+
551+
function resolveNativeTypeWithOperator (t: NativeTypeWithOperator, ctx: Context): string | undefined {
552+
if (typeof t.Type !== 'string') {
553+
return
554+
}
555+
556+
const mapped = NATIVE_TYPE_MAP[t.Type]
557+
if (!mapped) {
558+
return
559+
}
560+
561+
const regexpPattern = getRegexpPattern(t)
562+
if (!regexpPattern) {
563+
if (mapped === 'Any') {
564+
ctx.typingImports.add('Any')
565+
}
566+
return mapped
567+
}
568+
569+
const templateAnnotatedPattern = getTemplateAnnotatedPattern(regexpPattern)
570+
if (!templateAnnotatedPattern) {
571+
if (mapped === 'Any') {
572+
ctx.typingImports.add('Any')
573+
}
574+
return mapped
575+
}
576+
577+
ctx.typingImports.add('Annotated')
578+
if (ctx.pydantic) {
579+
ctx.pydanticImports.add('StringConstraints')
580+
return `Annotated[${mapped}, StringConstraints(pattern=${JSON.stringify(regexpPattern)})]`
581+
}
582+
583+
return templateAnnotatedPattern
584+
}
585+
519586
// ---------------------------------------------------------------------------
520587
// Type resolution
521588
// ---------------------------------------------------------------------------
@@ -532,6 +599,14 @@ function resolveType (t: PropertyType, ctx: Context, options: ResolveTypeOptions
532599
throw new Error(`Unknown native type: "${t}"`)
533600
}
534601

602+
if (isNativeTypeWithOperator(t) && typeof t.Type === 'string') {
603+
const resolved = resolveNativeTypeWithOperator(t, ctx)
604+
if (resolved) {
605+
return resolved
606+
}
607+
throw new Error(`Unknown native type with operator: ${JSON.stringify(t)}`)
608+
}
609+
535610
if ((t as any).Type && typeof (t as any).Type === 'string' && NATIVE_TYPE_MAP[(t as any).Type]) {
536611
const mapped = NATIVE_TYPE_MAP[(t as any).Type]
537612
if (mapped === 'Any') {

packages/cddl2py/tests/transform_edge_cases.test.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,74 @@ describe('transform edge cases', () => {
151151
expect(output).toContain('Combined = Union[_CombinedVariant0, _CombinedVariant1]')
152152
})
153153

154+
it('should preserve template-like regexp strings in python-friendly types', () => {
155+
const typedDictOutput = transform([
156+
variable('channel', {
157+
Type: 'tstr',
158+
Operator: {
159+
Type: 'regexp',
160+
Value: literal('custom:.+')
161+
}
162+
} as any),
163+
variable('prefixed-name', {
164+
Type: 'tstr',
165+
Operator: {
166+
Type: 'regexp',
167+
Value: literal('foo_.+')
168+
}
169+
} as any),
170+
variable('sandwiched-name', {
171+
Type: 'tstr',
172+
Operator: {
173+
Type: 'regexp',
174+
Value: literal('some_.+_name')
175+
}
176+
} as any),
177+
variable('multi-slot-name', {
178+
Type: 'tstr',
179+
Operator: {
180+
Type: 'regexp',
181+
Value: literal('pre_.+_mid_.+_post')
182+
}
183+
} as any),
184+
group('event-envelope', [
185+
property('channel', {
186+
Type: 'tstr',
187+
Operator: {
188+
Type: 'regexp',
189+
Value: literal('custom:.+')
190+
}
191+
} as any)
192+
]),
193+
variable('email-address', {
194+
Type: 'tstr',
195+
Operator: {
196+
Type: 'regexp',
197+
Value: literal('[^@]+@[^@]+')
198+
}
199+
} as any)
200+
])
201+
const pydanticOutput = transform([
202+
variable('channel', {
203+
Type: 'tstr',
204+
Operator: {
205+
Type: 'regexp',
206+
Value: literal('custom:.+')
207+
}
208+
} as any)
209+
], { pydantic: true })
210+
211+
expect(typedDictOutput).toContain('from typing import Annotated')
212+
expect(typedDictOutput).toContain('Channel = Annotated[str, "custom:" + str]')
213+
expect(typedDictOutput).toContain('PrefixedName = Annotated[str, "foo_" + str]')
214+
expect(typedDictOutput).toContain('SandwichedName = Annotated[str, "some_" + str + "_name"]')
215+
expect(typedDictOutput).toContain('MultiSlotName = Annotated[str, "pre_" + str + "_mid_" + str + "_post"]')
216+
expect(typedDictOutput).toContain('channel: Annotated[str, "custom:" + str]')
217+
expect(typedDictOutput).toContain('EmailAddress = str')
218+
expect(pydanticOutput).toContain('from pydantic import StringConstraints')
219+
expect(pydanticOutput).toContain('Channel = Annotated[str, StringConstraints(pattern="custom:.+")]')
220+
})
221+
154222
it('should collapse multiple union mixin groups into a single alias', () => {
155223
const output = transform([
156224
group('combined', [

0 commit comments

Comments
 (0)