From 3bc42a71b864ece9f58c1ce52f66ec79d4b01d47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Tue, 14 Apr 2026 15:17:26 +0000 Subject: [PATCH 1/9] cpp_demangle: tolerate clang SUBSTPACK placeholders --- src/ast.rs | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/src/ast.rs b/src/ast.rs index c64d698..4524f3d 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -3664,6 +3664,13 @@ pub enum Type { /// A pack expansion. PackExpansion(TypeHandle), + /// Clang-only placeholder emitted for unmangleable substitution-pack data. + /// + /// This is not part of the Itanium ABI grammar; Clang emits it verbatim in + /// a few fallback `FIXME` code paths. We keep parsing by treating it as + /// ignorable noise so surrounding structure can still demangle. + ClangSubstPackNoise, + /// Builtin type eligible for substitutions, e.g. vendor extended type or _BitInt(N). /// Note: most builtin types are excluded from substitutions, and we store them directly /// in TypeHandle without creating a Type. @@ -3742,6 +3749,23 @@ impl Parse for TypeHandle { return Ok((handle, tail)); } + // Non-standard Clang extensions seen in the wild: + // _SUBSTPACK_ + // _SUBSTBUILTINPACK_ + // + // These are emitted as fallback placeholders for pack substitutions in + // Clang's Itanium mangler (with FIXME comments in Clang source). They + // are not part of the Itanium grammar, so we parse them as ignorable + // noise to preserve demangling progress for real-world symbols. + if let Ok(tail) = consume(b"_SUBSTPACK_", input) { + let ty = Type::ClangSubstPackNoise; + return insert_and_return_handle(ty, subs, tail); + } + if let Ok(tail) = consume(b"_SUBSTBUILTINPACK_", input) { + let ty = Type::ClangSubstPackNoise; + return insert_and_return_handle(ty, subs, tail); + } + // ::= // We don't have a separate type for the production. // Process these all up front, so that any ambiguity that might exist @@ -3981,6 +4005,7 @@ where } Ok(()) } + Type::ClangSubstPackNoise => Ok(()), Type::Builtin(ref builtin) => builtin.demangle(ctx, scope), } } @@ -6063,6 +6088,13 @@ pub enum Expression { /// `throw` with no operand Rethrow, + /// Clang-only placeholder emitted for unmangleable substitution-pack data. + /// + /// This marker is non-standard and may appear where an expression is + /// expected (for example, `X_SUBSTPACK_E`). Treat it as ignorable noise so + /// parsing can continue. + ClangSubstPackNoise, + /// `f(p)`, `N::f(p)`, `::f(p)`, freestanding dependent name (e.g., `T::x`), /// objectless nonstatic member reference. UnresolvedName(UnresolvedName), @@ -6079,6 +6111,16 @@ impl Parse for Expression { ) -> Result<(Expression, IndexStr<'b>)> { try_begin_parse!("Expression", ctx, input); + // Non-standard Clang extension markers for unmangleable + // substitution-pack expressions. Keep parsing by accepting them as + // ignorable noise. + if let Ok(tail) = consume(b"_SUBSTPACK_", input) { + return Ok((Expression::ClangSubstPackNoise, tail)); + } + if let Ok(tail) = consume(b"_SUBSTBUILTINPACK_", input) { + return Ok((Expression::ClangSubstPackNoise, tail)); + } + if let Ok(tail) = consume(b"pp_", input) { let (expr, tail) = Expression::parse(ctx, subs, tail)?; let expr = Expression::PrefixInc(Box::new(expr)); @@ -6726,6 +6768,7 @@ where write!(ctx, "throw")?; Ok(()) } + Expression::ClangSubstPackNoise => Ok(()), Expression::UnresolvedName(ref name) => name.demangle(ctx, scope), Expression::Primary(ref expr) => expr.demangle(ctx, scope), } @@ -8601,6 +8644,7 @@ mod tests { use crate::error::Error; use crate::index_str::IndexStr; use crate::subs::{Substitutable, SubstitutionTable}; + use crate::Symbol; use alloc::boxed::Box; use alloc::string::String; use core::fmt::Debug; @@ -10288,6 +10332,65 @@ mod tests { }); } + #[test] + fn parse_realworld_substpack_full_mangled_name_probe() { + let mut subs = SubstitutionTable::new(); + let ctx = ParseContext::new(Default::default()); + let input = IndexStr::new( + b"_ZN2UE4Core7Private5Tuple10TTupleBaseI16TIntegerSequenceIjJLj0ELj1EEEJ7FString19FUProjectDictionaryEEC2IJRKS6_S7_ETnPDTcl21ConceptCheckingHelperspcvNS2_17TTupleBaseElementI_SUBSTPACK_X_SUBSTPACK_ELj2EEE_LNS2_22EForwardingConstructorE0Ecl7DeclValIOT_EEEEELPv0EEESF_DpSH_", + ); + + match MangledName::parse(&ctx, &mut subs, input) { + Ok((_name, tail)) => assert!( + tail.is_empty(), + "substpack full parse left tail: {:?}", + String::from_utf8_lossy(tail.as_ref()) + ), + Err(err) => panic!("failed substpack full mangled name: {:?}", err), + } + } + + #[test] + fn demangle_realworld_substpack_probe() { + let mangled = b"_ZN2UE4Core7Private5Tuple10TTupleBaseI16TIntegerSequenceIjJLj0ELj1EEEJ7FString19FUProjectDictionaryEEC2IJRKS6_S7_ETnPDTcl21ConceptCheckingHelperspcvNS2_17TTupleBaseElementI_SUBSTPACK_X_SUBSTPACK_ELj2EEE_LNS2_22EForwardingConstructorE0Ecl7DeclValIOT_EEEEELPv0EEESF_DpSH_"; + let sym = Symbol::new(&mangled[..]).expect("symbol parse"); + match sym.demangle() { + Ok(_) => {} + Err(err) => panic!("failed substpack demangle: {:?}", err), + } + } + + #[test] + fn parse_clang_substpack_noise_tokens_directly() { + let ctx = ParseContext::new(Default::default()); + + let mut subs = SubstitutionTable::new(); + let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTPACK_...")) + .expect("type _SUBSTPACK_ should parse"); + assert!(matches!(subs.get_type(&ty), Some(Type::ClangSubstPackNoise))); + assert_eq!(tail.as_ref(), b"..."); + + let mut subs = SubstitutionTable::new(); + let (ty, tail) = + TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTBUILTINPACK_...")) + .expect("type _SUBSTBUILTINPACK_ should parse"); + assert!(matches!(subs.get_type(&ty), Some(Type::ClangSubstPackNoise))); + assert_eq!(tail.as_ref(), b"..."); + + let mut subs = SubstitutionTable::new(); + let (expr, tail) = Expression::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTPACK_...")) + .expect("expression _SUBSTPACK_ should parse"); + assert!(matches!(expr, Expression::ClangSubstPackNoise)); + assert_eq!(tail.as_ref(), b"..."); + + let mut subs = SubstitutionTable::new(); + let (expr, tail) = + Expression::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTBUILTINPACK_...")) + .expect("expression _SUBSTBUILTINPACK_ should parse"); + assert!(matches!(expr, Expression::ClangSubstPackNoise)); + assert_eq!(tail.as_ref(), b"..."); + } + #[test] fn parse_expression() { assert_parse!(Expression { From 273de285352807cb2617873046198804f67f21bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Tue, 14 Apr 2026 17:17:32 +0000 Subject: [PATCH 2/9] cpp_demangle: render visible placeholder for clang substpack noise --- src/ast.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 4524f3d..f05b4c1 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -4005,7 +4005,7 @@ where } Ok(()) } - Type::ClangSubstPackNoise => Ok(()), + Type::ClangSubstPackNoise => write!(ctx, "{{clang-subst-pack-noise}}"), Type::Builtin(ref builtin) => builtin.demangle(ctx, scope), } } @@ -6768,7 +6768,7 @@ where write!(ctx, "throw")?; Ok(()) } - Expression::ClangSubstPackNoise => Ok(()), + Expression::ClangSubstPackNoise => write!(ctx, "{{clang-subst-pack-noise}}"), Expression::UnresolvedName(ref name) => name.demangle(ctx, scope), Expression::Primary(ref expr) => expr.demangle(ctx, scope), } From 7efe4ecfa34bdf8066be277ab320250aedd61def Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Wed, 15 Apr 2026 09:26:33 +0000 Subject: [PATCH 3/9] cpp_demangle: avoid semver-breaking AST variants for substpack --- src/ast.rs | 75 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index f05b4c1..f21f6b2 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -3664,13 +3664,6 @@ pub enum Type { /// A pack expansion. PackExpansion(TypeHandle), - /// Clang-only placeholder emitted for unmangleable substitution-pack data. - /// - /// This is not part of the Itanium ABI grammar; Clang emits it verbatim in - /// a few fallback `FIXME` code paths. We keep parsing by treating it as - /// ignorable noise so surrounding structure can still demangle. - ClangSubstPackNoise, - /// Builtin type eligible for substitutions, e.g. vendor extended type or _BitInt(N). /// Note: most builtin types are excluded from substitutions, and we store them directly /// in TypeHandle without creating a Type. @@ -3755,14 +3748,23 @@ impl Parse for TypeHandle { // // These are emitted as fallback placeholders for pack substitutions in // Clang's Itanium mangler (with FIXME comments in Clang source). They - // are not part of the Itanium grammar, so we parse them as ignorable - // noise to preserve demangling progress for real-world symbols. + // are not part of the Itanium grammar, so we parse them using existing + // extension nodes to preserve demangling progress without changing the + // public AST enum surface. if let Ok(tail) = consume(b"_SUBSTPACK_", input) { - let ty = Type::ClangSubstPackNoise; + let name = SourceName(Identifier { + start: input.index(), + end: input.index() + b"_SUBSTPACK_".len(), + }); + let ty = Type::Builtin(BuiltinType::Extension(name)); return insert_and_return_handle(ty, subs, tail); } if let Ok(tail) = consume(b"_SUBSTBUILTINPACK_", input) { - let ty = Type::ClangSubstPackNoise; + let name = SourceName(Identifier { + start: input.index(), + end: input.index() + b"_SUBSTBUILTINPACK_".len(), + }); + let ty = Type::Builtin(BuiltinType::Extension(name)); return insert_and_return_handle(ty, subs, tail); } @@ -4005,7 +4007,6 @@ where } Ok(()) } - Type::ClangSubstPackNoise => write!(ctx, "{{clang-subst-pack-noise}}"), Type::Builtin(ref builtin) => builtin.demangle(ctx, scope), } } @@ -6088,13 +6089,6 @@ pub enum Expression { /// `throw` with no operand Rethrow, - /// Clang-only placeholder emitted for unmangleable substitution-pack data. - /// - /// This marker is non-standard and may appear where an expression is - /// expected (for example, `X_SUBSTPACK_E`). Treat it as ignorable noise so - /// parsing can continue. - ClangSubstPackNoise, - /// `f(p)`, `N::f(p)`, `::f(p)`, freestanding dependent name (e.g., `T::x`), /// objectless nonstatic member reference. UnresolvedName(UnresolvedName), @@ -6112,13 +6106,27 @@ impl Parse for Expression { try_begin_parse!("Expression", ctx, input); // Non-standard Clang extension markers for unmangleable - // substitution-pack expressions. Keep parsing by accepting them as - // ignorable noise. + // substitution-pack expressions. Keep parsing by mapping these to an + // unresolved source name placeholder using existing AST nodes. if let Ok(tail) = consume(b"_SUBSTPACK_", input) { - return Ok((Expression::ClangSubstPackNoise, tail)); + let name = SourceName(Identifier { + start: input.index(), + end: input.index() + b"_SUBSTPACK_".len(), + }); + let expr = Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name( + SimpleId(name, None), + ))); + return Ok((expr, tail)); } if let Ok(tail) = consume(b"_SUBSTBUILTINPACK_", input) { - return Ok((Expression::ClangSubstPackNoise, tail)); + let name = SourceName(Identifier { + start: input.index(), + end: input.index() + b"_SUBSTBUILTINPACK_".len(), + }); + let expr = Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name( + SimpleId(name, None), + ))); + return Ok((expr, tail)); } if let Ok(tail) = consume(b"pp_", input) { @@ -6768,7 +6776,6 @@ where write!(ctx, "throw")?; Ok(()) } - Expression::ClangSubstPackNoise => write!(ctx, "{{clang-subst-pack-noise}}"), Expression::UnresolvedName(ref name) => name.demangle(ctx, scope), Expression::Primary(ref expr) => expr.demangle(ctx, scope), } @@ -10367,27 +10374,39 @@ mod tests { let mut subs = SubstitutionTable::new(); let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTPACK_...")) .expect("type _SUBSTPACK_ should parse"); - assert!(matches!(subs.get_type(&ty), Some(Type::ClangSubstPackNoise))); + assert!(matches!( + subs.get_type(&ty), + Some(Type::Builtin(BuiltinType::Extension(_))) + )); assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTBUILTINPACK_...")) .expect("type _SUBSTBUILTINPACK_ should parse"); - assert!(matches!(subs.get_type(&ty), Some(Type::ClangSubstPackNoise))); + assert!(matches!( + subs.get_type(&ty), + Some(Type::Builtin(BuiltinType::Extension(_))) + )); assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); let (expr, tail) = Expression::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTPACK_...")) .expect("expression _SUBSTPACK_ should parse"); - assert!(matches!(expr, Expression::ClangSubstPackNoise)); + assert!(matches!( + expr, + Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name(_))) + )); assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); let (expr, tail) = Expression::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTBUILTINPACK_...")) .expect("expression _SUBSTBUILTINPACK_ should parse"); - assert!(matches!(expr, Expression::ClangSubstPackNoise)); + assert!(matches!( + expr, + Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name(_))) + )); assert_eq!(tail.as_ref(), b"..."); } From b458d83f58e4b68b22b4bbb0531683d5e2b0bfad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Wed, 15 Apr 2026 10:03:29 +0000 Subject: [PATCH 4/9] cpp_demangle: render substpack placeholders without AST API changes --- src/ast.rs | 52 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index f21f6b2..35b8ec3 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2775,6 +2775,13 @@ impl SourceName { fn starts_with(byte: u8) -> bool { byte == b'0' || (b'0' <= byte && byte <= b'9') } + + #[inline] + fn is_clang_substpack_placeholder(&self, input: &[u8]) -> bool { + let SourceName(Identifier { start, end }) = self; + let ident = &input[*start..*end]; + ident == b"_SUBSTPACK_" || ident == b"_SUBSTBUILTINPACK_" + } } impl<'subs, W> Demangle<'subs, W> for SourceName @@ -2789,7 +2796,11 @@ where ) -> fmt::Result { let ctx = try_begin_demangle!(self, ctx, scope); - self.0.demangle(ctx, scope) + if self.is_clang_substpack_placeholder(ctx.input) { + write!(ctx, "{{clang-subst-pack-noise}}") + } else { + self.0.demangle(ctx, scope) + } } } @@ -8636,12 +8647,13 @@ mod tests { AbiTag, AbiTags, ArrayType, BareFunctionType, BaseUnresolvedName, BuiltinType, CallOffset, ClassEnumType, ClosureTypeName, ConstraintExpression, CtorDtorName, CvQualifiers, DataMemberPrefix, Decltype, DestructorName, Discriminator, Encoding, ExceptionSpec, - ExprPrimary, Expression, FoldExpr, FunctionParam, FunctionType, GlobalCtorDtor, Identifier, - Initializer, LambdaSig, LocalName, MangledName, MemberName, Name, NestedName, - NonSubstitution, Number, NvOffset, OperatorName, ParametricBuiltinType, Parse, - ParseContext, PointerToMemberType, Prefix, PrefixHandle, RefQualifier, ResourceName, SeqId, - SimpleId, SimpleOperatorName, SourceName, SpecialName, StandardBuiltinType, SubobjectExpr, - Substitution, TemplateArg, TemplateArgs, TemplateParam, TemplateParamDecl, + Demangle, DemangleContext, DemangleOptions, ExprPrimary, Expression, FoldExpr, + FunctionParam, FunctionType, GlobalCtorDtor, Identifier, Initializer, LambdaSig, + LocalName, MangledName, MemberName, Name, NestedName, NonSubstitution, Number, NvOffset, + OperatorName, ParametricBuiltinType, Parse, ParseContext, PointerToMemberType, Prefix, + PrefixHandle, RefQualifier, ResourceName, SeqId, SimpleId, SimpleOperatorName, + SourceName, SpecialName, StandardBuiltinType, SubobjectExpr, Substitution, TemplateArg, + TemplateArgs, TemplateParam, TemplateParamDecl, TemplateTemplateParam, TemplateTemplateParamHandle, Type, TypeHandle, UnnamedTypeName, UnqualifiedName, UnresolvedName, UnresolvedQualifierLevel, UnresolvedType, UnresolvedTypeHandle, UnscopedName, UnscopedTemplateName, UnscopedTemplateNameHandle, @@ -10378,6 +10390,13 @@ mod tests { subs.get_type(&ty), Some(Type::Builtin(BuiltinType::Extension(_))) )); + let mut out = String::new(); + let mut demangle_ctx = DemangleContext::new(&subs, b"_SUBSTPACK_", DemangleOptions::default(), &mut out); + subs.get_type(&ty) + .expect("parsed type backreference") + .demangle(&mut demangle_ctx, None) + .expect("type demangle"); + assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); @@ -10388,6 +10407,14 @@ mod tests { subs.get_type(&ty), Some(Type::Builtin(BuiltinType::Extension(_))) )); + let mut out = String::new(); + let mut demangle_ctx = + DemangleContext::new(&subs, b"_SUBSTBUILTINPACK_", DemangleOptions::default(), &mut out); + subs.get_type(&ty) + .expect("parsed type backreference") + .demangle(&mut demangle_ctx, None) + .expect("type demangle"); + assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); @@ -10397,6 +10424,11 @@ mod tests { expr, Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name(_))) )); + let mut out = String::new(); + let mut demangle_ctx = DemangleContext::new(&subs, b"_SUBSTPACK_", DemangleOptions::default(), &mut out); + expr.demangle(&mut demangle_ctx, None) + .expect("expression demangle"); + assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); @@ -10407,6 +10439,12 @@ mod tests { expr, Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name(_))) )); + let mut out = String::new(); + let mut demangle_ctx = + DemangleContext::new(&subs, b"_SUBSTBUILTINPACK_", DemangleOptions::default(), &mut out); + expr.demangle(&mut demangle_ctx, None) + .expect("expression demangle"); + assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); } From 3720d2a24efb1f808cff9746d0672f9d7b80f236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Wed, 15 Apr 2026 11:36:43 +0000 Subject: [PATCH 5/9] cpp_demangle: scope substpack noise and preserve substitutions --- src/ast.rs | 78 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 35b8ec3..5ca8138 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2776,11 +2776,42 @@ impl SourceName { byte == b'0' || (b'0' <= byte && byte <= b'9') } + #[inline] + fn has_length_prefix_in_input(&self, input: &[u8]) -> bool { + let start = self.0.start; + let end = self.0.end; + if start == 0 || end < start { + return false; + } + + let mut i = start; + while i > 0 && input[i - 1].is_ascii_digit() { + i -= 1; + } + if i == start { + return false; + } + + let mut parsed_len = 0usize; + for &digit in &input[i..start] { + parsed_len = match parsed_len + .checked_mul(10) + .and_then(|v| v.checked_add((digit - b'0') as usize)) + { + Some(v) => v, + None => return false, + }; + } + + parsed_len == (end - start) + } + #[inline] fn is_clang_substpack_placeholder(&self, input: &[u8]) -> bool { let SourceName(Identifier { start, end }) = self; let ident = &input[*start..*end]; - ident == b"_SUBSTPACK_" || ident == b"_SUBSTBUILTINPACK_" + (ident == b"_SUBSTPACK_" || ident == b"_SUBSTBUILTINPACK_") + && !self.has_length_prefix_in_input(input) } } @@ -3767,16 +3798,16 @@ impl Parse for TypeHandle { start: input.index(), end: input.index() + b"_SUBSTPACK_".len(), }); - let ty = Type::Builtin(BuiltinType::Extension(name)); - return insert_and_return_handle(ty, subs, tail); + let handle = TypeHandle::Builtin(BuiltinType::Extension(name)); + return Ok((handle, tail)); } if let Ok(tail) = consume(b"_SUBSTBUILTINPACK_", input) { let name = SourceName(Identifier { start: input.index(), end: input.index() + b"_SUBSTBUILTINPACK_".len(), }); - let ty = Type::Builtin(BuiltinType::Extension(name)); - return insert_and_return_handle(ty, subs, tail); + let handle = TypeHandle::Builtin(BuiltinType::Extension(name)); + return Ok((handle, tail)); } // ::= @@ -10386,16 +10417,10 @@ mod tests { let mut subs = SubstitutionTable::new(); let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTPACK_...")) .expect("type _SUBSTPACK_ should parse"); - assert!(matches!( - subs.get_type(&ty), - Some(Type::Builtin(BuiltinType::Extension(_))) - )); + assert!(matches!(ty, TypeHandle::Builtin(BuiltinType::Extension(_)))); let mut out = String::new(); let mut demangle_ctx = DemangleContext::new(&subs, b"_SUBSTPACK_", DemangleOptions::default(), &mut out); - subs.get_type(&ty) - .expect("parsed type backreference") - .demangle(&mut demangle_ctx, None) - .expect("type demangle"); + ty.demangle(&mut demangle_ctx, None).expect("type demangle"); assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); @@ -10403,17 +10428,11 @@ mod tests { let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTBUILTINPACK_...")) .expect("type _SUBSTBUILTINPACK_ should parse"); - assert!(matches!( - subs.get_type(&ty), - Some(Type::Builtin(BuiltinType::Extension(_))) - )); + assert!(matches!(ty, TypeHandle::Builtin(BuiltinType::Extension(_)))); let mut out = String::new(); let mut demangle_ctx = DemangleContext::new(&subs, b"_SUBSTBUILTINPACK_", DemangleOptions::default(), &mut out); - subs.get_type(&ty) - .expect("parsed type backreference") - .demangle(&mut demangle_ctx, None) - .expect("type demangle"); + ty.demangle(&mut demangle_ctx, None).expect("type demangle"); assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); @@ -10448,6 +10467,23 @@ mod tests { assert_eq!(tail.as_ref(), b"..."); } + #[test] + fn demangle_length_prefixed_substpack_identifier_is_not_noise() { + let mangled = b"_Z11_SUBSTPACK_v"; + let sym = Symbol::new(&mangled[..]).expect("symbol parse"); + let demangled = sym.demangle().expect("demangle"); + assert!( + demangled.contains("_SUBSTPACK_"), + "expected regular source name in demangled output: {}", + demangled + ); + assert!( + !demangled.contains("{clang-subst-pack-noise}"), + "did not expect substpack noise placeholder for length-prefixed source name: {}", + demangled + ); + } + #[test] fn parse_expression() { assert_parse!(Expression { From c449c078a9484b0ecb4397ee11b036479f754e27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Wed, 15 Apr 2026 13:19:53 +0000 Subject: [PATCH 6/9] cpp_demangle: dedupe substpack placeholders and tighten tests --- src/ast.rs | 77 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 5ca8138..a620085 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -17,6 +17,17 @@ use core::ops; use core::ptr; use core::str; +const CLANG_SUBSTPACK_PLACEHOLDER: &[u8] = b"_SUBSTPACK_"; +const CLANG_SUBSTBUILTINPACK_PLACEHOLDER: &[u8] = b"_SUBSTBUILTINPACK_"; + +#[inline] +fn clang_placeholder_source_name(start: usize, token: &[u8]) -> SourceName { + SourceName(Identifier { + start, + end: start + token.len(), + }) +} + macro_rules! r#try_recurse { ($expr:expr $(,)?) => { match $expr { @@ -2810,7 +2821,7 @@ impl SourceName { fn is_clang_substpack_placeholder(&self, input: &[u8]) -> bool { let SourceName(Identifier { start, end }) = self; let ident = &input[*start..*end]; - (ident == b"_SUBSTPACK_" || ident == b"_SUBSTBUILTINPACK_") + (ident == CLANG_SUBSTPACK_PLACEHOLDER || ident == CLANG_SUBSTBUILTINPACK_PLACEHOLDER) && !self.has_length_prefix_in_input(input) } } @@ -3793,19 +3804,14 @@ impl Parse for TypeHandle { // are not part of the Itanium grammar, so we parse them using existing // extension nodes to preserve demangling progress without changing the // public AST enum surface. - if let Ok(tail) = consume(b"_SUBSTPACK_", input) { - let name = SourceName(Identifier { - start: input.index(), - end: input.index() + b"_SUBSTPACK_".len(), - }); + if let Ok(tail) = consume(CLANG_SUBSTPACK_PLACEHOLDER, input) { + let name = clang_placeholder_source_name(input.index(), CLANG_SUBSTPACK_PLACEHOLDER); let handle = TypeHandle::Builtin(BuiltinType::Extension(name)); return Ok((handle, tail)); } - if let Ok(tail) = consume(b"_SUBSTBUILTINPACK_", input) { - let name = SourceName(Identifier { - start: input.index(), - end: input.index() + b"_SUBSTBUILTINPACK_".len(), - }); + if let Ok(tail) = consume(CLANG_SUBSTBUILTINPACK_PLACEHOLDER, input) { + let name = + clang_placeholder_source_name(input.index(), CLANG_SUBSTBUILTINPACK_PLACEHOLDER); let handle = TypeHandle::Builtin(BuiltinType::Extension(name)); return Ok((handle, tail)); } @@ -6150,21 +6156,16 @@ impl Parse for Expression { // Non-standard Clang extension markers for unmangleable // substitution-pack expressions. Keep parsing by mapping these to an // unresolved source name placeholder using existing AST nodes. - if let Ok(tail) = consume(b"_SUBSTPACK_", input) { - let name = SourceName(Identifier { - start: input.index(), - end: input.index() + b"_SUBSTPACK_".len(), - }); + if let Ok(tail) = consume(CLANG_SUBSTPACK_PLACEHOLDER, input) { + let name = clang_placeholder_source_name(input.index(), CLANG_SUBSTPACK_PLACEHOLDER); let expr = Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name( SimpleId(name, None), ))); return Ok((expr, tail)); } - if let Ok(tail) = consume(b"_SUBSTBUILTINPACK_", input) { - let name = SourceName(Identifier { - start: input.index(), - end: input.index() + b"_SUBSTBUILTINPACK_".len(), - }); + if let Ok(tail) = consume(CLANG_SUBSTBUILTINPACK_PLACEHOLDER, input) { + let name = + clang_placeholder_source_name(input.index(), CLANG_SUBSTBUILTINPACK_PLACEHOLDER); let expr = Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name( SimpleId(name, None), ))); @@ -10414,53 +10415,65 @@ mod tests { fn parse_clang_substpack_noise_tokens_directly() { let ctx = ParseContext::new(Default::default()); + let substpack_input = b"_SUBSTPACK_..."; + let substbuiltinpack_input = b"_SUBSTBUILTINPACK_..."; + let mut subs = SubstitutionTable::new(); - let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTPACK_...")) + let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(substpack_input)) .expect("type _SUBSTPACK_ should parse"); assert!(matches!(ty, TypeHandle::Builtin(BuiltinType::Extension(_)))); let mut out = String::new(); - let mut demangle_ctx = DemangleContext::new(&subs, b"_SUBSTPACK_", DemangleOptions::default(), &mut out); + let mut demangle_ctx = + DemangleContext::new(&subs, substpack_input, DemangleOptions::default(), &mut out); ty.demangle(&mut demangle_ctx, None).expect("type demangle"); assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); let (ty, tail) = - TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTBUILTINPACK_...")) + TypeHandle::parse(&ctx, &mut subs, IndexStr::new(substbuiltinpack_input)) .expect("type _SUBSTBUILTINPACK_ should parse"); assert!(matches!(ty, TypeHandle::Builtin(BuiltinType::Extension(_)))); let mut out = String::new(); - let mut demangle_ctx = - DemangleContext::new(&subs, b"_SUBSTBUILTINPACK_", DemangleOptions::default(), &mut out); + let mut demangle_ctx = DemangleContext::new( + &subs, + substbuiltinpack_input, + DemangleOptions::default(), + &mut out, + ); ty.demangle(&mut demangle_ctx, None).expect("type demangle"); assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); - let (expr, tail) = Expression::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTPACK_...")) + let (expr, tail) = Expression::parse(&ctx, &mut subs, IndexStr::new(substpack_input)) .expect("expression _SUBSTPACK_ should parse"); assert!(matches!( expr, Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name(_))) )); let mut out = String::new(); - let mut demangle_ctx = DemangleContext::new(&subs, b"_SUBSTPACK_", DemangleOptions::default(), &mut out); + let mut demangle_ctx = + DemangleContext::new(&subs, substpack_input, DemangleOptions::default(), &mut out); expr.demangle(&mut demangle_ctx, None) .expect("expression demangle"); assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); - let (expr, tail) = - Expression::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTBUILTINPACK_...")) + let (expr, tail) = Expression::parse(&ctx, &mut subs, IndexStr::new(substbuiltinpack_input)) .expect("expression _SUBSTBUILTINPACK_ should parse"); assert!(matches!( expr, Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name(_))) )); let mut out = String::new(); - let mut demangle_ctx = - DemangleContext::new(&subs, b"_SUBSTBUILTINPACK_", DemangleOptions::default(), &mut out); + let mut demangle_ctx = DemangleContext::new( + &subs, + substbuiltinpack_input, + DemangleOptions::default(), + &mut out, + ); expr.demangle(&mut demangle_ctx, None) .expect("expression demangle"); assert_eq!(out, "{clang-subst-pack-noise}"); From 59959f99a7cfd2a2d827f7b965385bbe0fd88891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Wed, 15 Apr 2026 20:20:25 +0000 Subject: [PATCH 7/9] preserve substpack prefix EOF errors --- src/ast.rs | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/ast.rs b/src/ast.rs index a620085..2871a91 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -3803,12 +3803,23 @@ impl Parse for TypeHandle { // Clang's Itanium mangler (with FIXME comments in Clang source). They // are not part of the Itanium grammar, so we parse them using existing // extension nodes to preserve demangling progress without changing the - // public AST enum surface. + // public AST enum surface. They are intentionally non-substitutable so + // that synthetic recovery tokens do not perturb real `S...` indices. + if input.len() < CLANG_SUBSTPACK_PLACEHOLDER.len() + && CLANG_SUBSTPACK_PLACEHOLDER.starts_with(input.as_ref()) + { + return Err(error::Error::UnexpectedEnd); + } if let Ok(tail) = consume(CLANG_SUBSTPACK_PLACEHOLDER, input) { let name = clang_placeholder_source_name(input.index(), CLANG_SUBSTPACK_PLACEHOLDER); let handle = TypeHandle::Builtin(BuiltinType::Extension(name)); return Ok((handle, tail)); } + if input.len() < CLANG_SUBSTBUILTINPACK_PLACEHOLDER.len() + && CLANG_SUBSTBUILTINPACK_PLACEHOLDER.starts_with(input.as_ref()) + { + return Err(error::Error::UnexpectedEnd); + } if let Ok(tail) = consume(CLANG_SUBSTBUILTINPACK_PLACEHOLDER, input) { let name = clang_placeholder_source_name(input.index(), CLANG_SUBSTBUILTINPACK_PLACEHOLDER); @@ -6156,6 +6167,11 @@ impl Parse for Expression { // Non-standard Clang extension markers for unmangleable // substitution-pack expressions. Keep parsing by mapping these to an // unresolved source name placeholder using existing AST nodes. + if input.len() < CLANG_SUBSTPACK_PLACEHOLDER.len() + && CLANG_SUBSTPACK_PLACEHOLDER.starts_with(input.as_ref()) + { + return Err(error::Error::UnexpectedEnd); + } if let Ok(tail) = consume(CLANG_SUBSTPACK_PLACEHOLDER, input) { let name = clang_placeholder_source_name(input.index(), CLANG_SUBSTPACK_PLACEHOLDER); let expr = Expression::UnresolvedName(UnresolvedName::Name(BaseUnresolvedName::Name( @@ -6163,6 +6179,11 @@ impl Parse for Expression { ))); return Ok((expr, tail)); } + if input.len() < CLANG_SUBSTBUILTINPACK_PLACEHOLDER.len() + && CLANG_SUBSTBUILTINPACK_PLACEHOLDER.starts_with(input.as_ref()) + { + return Err(error::Error::UnexpectedEnd); + } if let Ok(tail) = consume(CLANG_SUBSTBUILTINPACK_PLACEHOLDER, input) { let name = clang_placeholder_source_name(input.index(), CLANG_SUBSTBUILTINPACK_PLACEHOLDER); @@ -10478,6 +10499,26 @@ mod tests { .expect("expression demangle"); assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); + + let mut subs = SubstitutionTable::new(); + let err = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTPACK")) + .expect_err("truncated _SUBSTPACK should return UnexpectedEnd"); + assert_eq!(err, Error::UnexpectedEnd); + + let mut subs = SubstitutionTable::new(); + let err = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTBUILTINP")) + .expect_err("truncated _SUBSTBUILTINPACK should return UnexpectedEnd"); + assert_eq!(err, Error::UnexpectedEnd); + + let mut subs = SubstitutionTable::new(); + let err = Expression::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTPACK")) + .expect_err("truncated expression _SUBSTPACK should return UnexpectedEnd"); + assert_eq!(err, Error::UnexpectedEnd); + + let mut subs = SubstitutionTable::new(); + let err = Expression::parse(&ctx, &mut subs, IndexStr::new(b"_SUBSTBUILTINP")) + .expect_err("truncated expression _SUBSTBUILTINPACK should return UnexpectedEnd"); + assert_eq!(err, Error::UnexpectedEnd); } #[test] From 248c3668f6b38a5ff74d617f730e3ca54caffd16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Fri, 17 Apr 2026 08:54:33 +0000 Subject: [PATCH 8/9] Apply rustfmt for PR 313 --- src/ast.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 2871a91..0315883 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -8699,16 +8699,15 @@ mod tests { use super::{ AbiTag, AbiTags, ArrayType, BareFunctionType, BaseUnresolvedName, BuiltinType, CallOffset, ClassEnumType, ClosureTypeName, ConstraintExpression, CtorDtorName, CvQualifiers, - DataMemberPrefix, Decltype, DestructorName, Discriminator, Encoding, ExceptionSpec, - Demangle, DemangleContext, DemangleOptions, ExprPrimary, Expression, FoldExpr, - FunctionParam, FunctionType, GlobalCtorDtor, Identifier, Initializer, LambdaSig, - LocalName, MangledName, MemberName, Name, NestedName, NonSubstitution, Number, NvOffset, - OperatorName, ParametricBuiltinType, Parse, ParseContext, PointerToMemberType, Prefix, - PrefixHandle, RefQualifier, ResourceName, SeqId, SimpleId, SimpleOperatorName, - SourceName, SpecialName, StandardBuiltinType, SubobjectExpr, Substitution, TemplateArg, - TemplateArgs, TemplateParam, TemplateParamDecl, - TemplateTemplateParam, TemplateTemplateParamHandle, Type, TypeHandle, UnnamedTypeName, - UnqualifiedName, UnresolvedName, UnresolvedQualifierLevel, UnresolvedType, + DataMemberPrefix, Decltype, Demangle, DemangleContext, DemangleOptions, DestructorName, + Discriminator, Encoding, ExceptionSpec, ExprPrimary, Expression, FoldExpr, FunctionParam, + FunctionType, GlobalCtorDtor, Identifier, Initializer, LambdaSig, LocalName, MangledName, + MemberName, Name, NestedName, NonSubstitution, Number, NvOffset, OperatorName, + ParametricBuiltinType, Parse, ParseContext, PointerToMemberType, Prefix, PrefixHandle, + RefQualifier, ResourceName, SeqId, SimpleId, SimpleOperatorName, SourceName, SpecialName, + StandardBuiltinType, SubobjectExpr, Substitution, TemplateArg, TemplateArgs, TemplateParam, + TemplateParamDecl, TemplateTemplateParam, TemplateTemplateParamHandle, Type, TypeHandle, + UnnamedTypeName, UnqualifiedName, UnresolvedName, UnresolvedQualifierLevel, UnresolvedType, UnresolvedTypeHandle, UnscopedName, UnscopedTemplateName, UnscopedTemplateNameHandle, VOffset, VectorType, WellKnownComponent, }; @@ -10451,9 +10450,8 @@ mod tests { assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); - let (ty, tail) = - TypeHandle::parse(&ctx, &mut subs, IndexStr::new(substbuiltinpack_input)) - .expect("type _SUBSTBUILTINPACK_ should parse"); + let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(substbuiltinpack_input)) + .expect("type _SUBSTBUILTINPACK_ should parse"); assert!(matches!(ty, TypeHandle::Builtin(BuiltinType::Extension(_)))); let mut out = String::new(); let mut demangle_ctx = DemangleContext::new( @@ -10482,7 +10480,8 @@ mod tests { assert_eq!(tail.as_ref(), b"..."); let mut subs = SubstitutionTable::new(); - let (expr, tail) = Expression::parse(&ctx, &mut subs, IndexStr::new(substbuiltinpack_input)) + let (expr, tail) = + Expression::parse(&ctx, &mut subs, IndexStr::new(substbuiltinpack_input)) .expect("expression _SUBSTBUILTINPACK_ should parse"); assert!(matches!( expr, From e6ca20f4507c49a24a57a5e320312d53f0f188db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristj=C3=A1n=20Valur=20J=C3=B3nsson?= Date: Fri, 17 Apr 2026 09:02:00 +0000 Subject: [PATCH 9/9] Handle SUBSTPACK placeholders as substitutable --- src/ast.rs | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 0315883..eb6b871 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -3803,8 +3803,8 @@ impl Parse for TypeHandle { // Clang's Itanium mangler (with FIXME comments in Clang source). They // are not part of the Itanium grammar, so we parse them using existing // extension nodes to preserve demangling progress without changing the - // public AST enum surface. They are intentionally non-substitutable so - // that synthetic recovery tokens do not perturb real `S...` indices. + // public AST enum surface. We still insert them into substitutions to + // match clang's emitted substitution references for these placeholders. if input.len() < CLANG_SUBSTPACK_PLACEHOLDER.len() && CLANG_SUBSTPACK_PLACEHOLDER.starts_with(input.as_ref()) { @@ -3812,8 +3812,8 @@ impl Parse for TypeHandle { } if let Ok(tail) = consume(CLANG_SUBSTPACK_PLACEHOLDER, input) { let name = clang_placeholder_source_name(input.index(), CLANG_SUBSTPACK_PLACEHOLDER); - let handle = TypeHandle::Builtin(BuiltinType::Extension(name)); - return Ok((handle, tail)); + let ty = Type::Builtin(BuiltinType::Extension(name)); + return insert_and_return_handle(ty, subs, tail); } if input.len() < CLANG_SUBSTBUILTINPACK_PLACEHOLDER.len() && CLANG_SUBSTBUILTINPACK_PLACEHOLDER.starts_with(input.as_ref()) @@ -3823,8 +3823,8 @@ impl Parse for TypeHandle { if let Ok(tail) = consume(CLANG_SUBSTBUILTINPACK_PLACEHOLDER, input) { let name = clang_placeholder_source_name(input.index(), CLANG_SUBSTBUILTINPACK_PLACEHOLDER); - let handle = TypeHandle::Builtin(BuiltinType::Extension(name)); - return Ok((handle, tail)); + let ty = Type::Builtin(BuiltinType::Extension(name)); + return insert_and_return_handle(ty, subs, tail); } // ::= @@ -10441,7 +10441,7 @@ mod tests { let mut subs = SubstitutionTable::new(); let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(substpack_input)) .expect("type _SUBSTPACK_ should parse"); - assert!(matches!(ty, TypeHandle::Builtin(BuiltinType::Extension(_)))); + assert!(matches!(ty, TypeHandle::BackReference(0))); let mut out = String::new(); let mut demangle_ctx = DemangleContext::new(&subs, substpack_input, DemangleOptions::default(), &mut out); @@ -10449,10 +10449,15 @@ mod tests { assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); + let (subst_ty, subst_tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"S_...")) + .expect("substitution of _SUBSTPACK_ should parse"); + assert!(matches!(subst_ty, TypeHandle::BackReference(0))); + assert_eq!(subst_tail.as_ref(), b"..."); + let mut subs = SubstitutionTable::new(); let (ty, tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(substbuiltinpack_input)) .expect("type _SUBSTBUILTINPACK_ should parse"); - assert!(matches!(ty, TypeHandle::Builtin(BuiltinType::Extension(_)))); + assert!(matches!(ty, TypeHandle::BackReference(0))); let mut out = String::new(); let mut demangle_ctx = DemangleContext::new( &subs, @@ -10464,6 +10469,11 @@ mod tests { assert_eq!(out, "{clang-subst-pack-noise}"); assert_eq!(tail.as_ref(), b"..."); + let (subst_ty, subst_tail) = TypeHandle::parse(&ctx, &mut subs, IndexStr::new(b"S_...")) + .expect("substitution of _SUBSTBUILTINPACK_ should parse"); + assert!(matches!(subst_ty, TypeHandle::BackReference(0))); + assert_eq!(subst_tail.as_ref(), b"..."); + let mut subs = SubstitutionTable::new(); let (expr, tail) = Expression::parse(&ctx, &mut subs, IndexStr::new(substpack_input)) .expect("expression _SUBSTPACK_ should parse");