Skip to content

Commit 5c9f551

Browse files
committed
fix: Bytecode serialization and InternedString serde support
- Use fixed 44-byte raw header format for bytecode serialization - Match serialization/deserialization header formats to prevent checksum mismatch - Fix InternedString to use serde Visitor pattern instead of serde_json::Value - This enables support for postcard and bincode formats (not just JSON) All 6 bytecode tests now pass: postcard, json, bincode, checksum, stats, roundtrip
1 parent 0be0f46 commit 5c9f551

2 files changed

Lines changed: 80 additions & 48 deletions

File tree

crates/compiler/src/bytecode.rs

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,34 @@ impl AsUuid for crate::hir::HirId {
148148
}
149149
}
150150

151+
/// Serialize header to raw 44-byte format (matches deserialize_raw_header)
152+
fn serialize_raw_header(header: &BytecodeHeader) -> Vec<u8> {
153+
const HEADER_SIZE: usize = 44;
154+
let mut bytes = Vec::with_capacity(HEADER_SIZE);
155+
156+
// magic (u32 little-endian)
157+
bytes.extend_from_slice(&header.magic.to_le_bytes());
158+
// major_version (u16 little-endian)
159+
bytes.extend_from_slice(&header.major_version.to_le_bytes());
160+
// minor_version (u16 little-endian)
161+
bytes.extend_from_slice(&header.minor_version.to_le_bytes());
162+
// format (u8)
163+
bytes.push(header.format);
164+
// padding (3 bytes)
165+
bytes.extend_from_slice(&[0u8; 3]);
166+
// flags (u32 little-endian)
167+
bytes.extend_from_slice(&header.flags.to_le_bytes());
168+
// module_id (16 bytes UUID)
169+
bytes.extend_from_slice(header.module_id.as_bytes());
170+
// payload_size (u64 little-endian)
171+
bytes.extend_from_slice(&header.payload_size.to_le_bytes());
172+
// checksum (u32 little-endian)
173+
bytes.extend_from_slice(&header.checksum.to_le_bytes());
174+
175+
debug_assert_eq!(bytes.len(), HEADER_SIZE);
176+
bytes
177+
}
178+
151179
/// Serialize a HIR module to bytecode
152180
pub fn serialize_module(module: &HirModule, format: Format) -> Result<Vec<u8>> {
153181
// Serialize the module payload
@@ -174,9 +202,8 @@ pub fn serialize_module(module: &HirModule, format: Format) -> Result<Vec<u8>> {
174202
header.payload_size = payload.len() as u64;
175203
header.checksum = checksum;
176204

177-
// Serialize header (always use bincode for header for consistency)
178-
let header_bytes = bincode::serialize(&header)
179-
.map_err(|e| BytecodeError::SerializationError(format!("Failed to serialize header: {}", e)))?;
205+
// Serialize header using raw 44-byte format (matches deserialize_raw_header)
206+
let header_bytes = serialize_raw_header(&header);
180207

181208
// Combine header and payload
182209
let mut result = Vec::with_capacity(header_bytes.len() + payload.len());
@@ -222,6 +249,11 @@ fn deserialize_raw_header(bytes: &[u8]) -> Result<(BytecodeHeader, usize)> {
222249
cursor.read_exact(&mut buf4)?;
223250
let magic = u32::from_le_bytes(buf4);
224251

252+
// Early validation: if magic doesn't match, this isn't raw format
253+
if magic != BytecodeHeader::MAGIC {
254+
return Err(BytecodeError::InvalidFormat);
255+
}
256+
225257
// Read major_version (u16 little-endian)
226258
let mut buf2 = [0u8; 2];
227259
cursor.read_exact(&mut buf2)?;
@@ -274,22 +306,13 @@ fn deserialize_raw_header(bytes: &[u8]) -> Result<(BytecodeHeader, usize)> {
274306

275307
/// Deserialize a HIR module from bytecode
276308
pub fn deserialize_module(bytes: &[u8]) -> Result<HirModule> {
277-
if bytes.len() < 64 {
309+
const HEADER_SIZE: usize = 44;
310+
if bytes.len() < HEADER_SIZE {
278311
return Err(BytecodeError::InvalidFormat);
279312
}
280313

281-
// Try raw header first (for Haxe compatibility), then fall back to bincode
282-
let (header, header_size) = match deserialize_raw_header(bytes) {
283-
Ok(result) => result,
284-
Err(_) => {
285-
// Fall back to bincode header
286-
let header: BytecodeHeader = bincode::deserialize(bytes)
287-
.map_err(|e| BytecodeError::DeserializationError(format!("Failed to deserialize header: {}", e)))?;
288-
let header_size = bincode::serialized_size(&header)
289-
.map_err(|e| BytecodeError::DeserializationError(format!("Failed to get header size: {}", e)))? as usize;
290-
(header, header_size)
291-
}
292-
};
314+
// Use raw 44-byte header format (matches serialize_raw_header)
315+
let (header, header_size) = deserialize_raw_header(bytes)?;
293316

294317
// Validate header
295318
header.validate()?;
@@ -338,18 +361,16 @@ pub fn deserialize_module_from_file(path: &std::path::Path) -> Result<HirModule>
338361

339362
/// Get bytecode statistics
340363
pub fn bytecode_stats(bytes: &[u8]) -> Result<BytecodeStats> {
341-
if bytes.len() < 64 {
364+
const HEADER_SIZE: usize = 44;
365+
if bytes.len() < HEADER_SIZE {
342366
return Err(BytecodeError::InvalidFormat);
343367
}
344368

345-
let header: BytecodeHeader = bincode::deserialize(bytes)
346-
.map_err(|e| BytecodeError::DeserializationError(format!("Failed to deserialize header: {}", e)))?;
369+
// Use raw header format (matches serialize_raw_header)
370+
let (header, header_size) = deserialize_raw_header(bytes)?;
347371

348372
header.validate()?;
349373

350-
let header_size = bincode::serialized_size(&header)
351-
.map_err(|e| BytecodeError::DeserializationError(format!("Failed to get header size: {}", e)))? as usize;
352-
353374
Ok(BytecodeStats {
354375
total_size: bytes.len(),
355376
header_size,
@@ -390,7 +411,7 @@ impl std::fmt::Display for BytecodeStats {
390411
mod tests {
391412
use super::*;
392413
use crate::hir::*;
393-
use std::collections::HashMap;
414+
use indexmap::IndexMap;
394415
use zyntax_typed_ast::InternedString;
395416

396417
fn create_test_module() -> HirModule {
@@ -401,9 +422,9 @@ mod tests {
401422
HirModule {
402423
id: HirId::new(),
403424
name,
404-
functions: HashMap::new(),
405-
globals: HashMap::new(),
406-
types: HashMap::new(),
425+
functions: IndexMap::new(),
426+
globals: IndexMap::new(),
427+
types: IndexMap::new(),
407428
imports: Vec::new(),
408429
exports: Vec::new(),
409430
version: 1,

crates/typed_ast/src/arena.rs

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -64,29 +64,40 @@ impl<'de> Deserialize<'de> for InternedString {
6464
where
6565
D: serde::Deserializer<'de>,
6666
{
67-
use serde::de::Error;
68-
69-
// Try to deserialize as either a string or a usize
70-
let value = serde_json::Value::deserialize(deserializer)?;
71-
72-
match value {
73-
// If it's a number, treat it as an index (legacy format)
74-
serde_json::Value::Number(n) => {
75-
let index = n.as_u64()
76-
.ok_or_else(|| D::Error::custom("InternedString index must be a positive integer"))?
77-
as usize;
78-
let symbol = Symbol::try_from_usize(index)
79-
.ok_or_else(|| D::Error::custom(format!("Invalid symbol index: {}", index)))?;
80-
Ok(InternedString(symbol))
81-
},
82-
// If it's a string, intern it (new format for Reflaxe compatibility)
83-
serde_json::Value::String(s) => {
84-
// SAFETY: We use a global arena for deserialization
85-
// This is a temporary solution - ideally we'd pass arena context through deserializer
86-
Ok(InternedString::new_global(&s))
87-
},
88-
_ => Err(D::Error::custom("InternedString must be either a string or a number")),
67+
// Use a visitor pattern to support all serde formats (postcard, bincode, JSON, etc.)
68+
// We serialize as a string, so we deserialize as a string
69+
struct InternedStringVisitor;
70+
71+
impl<'de> serde::de::Visitor<'de> for InternedStringVisitor {
72+
type Value = InternedString;
73+
74+
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
75+
formatter.write_str("a string")
76+
}
77+
78+
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
79+
where
80+
E: serde::de::Error,
81+
{
82+
Ok(InternedString::new_global(value))
83+
}
84+
85+
fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
86+
where
87+
E: serde::de::Error,
88+
{
89+
Ok(InternedString::new_global(&value))
90+
}
91+
92+
fn visit_borrowed_str<E>(self, value: &'de str) -> Result<Self::Value, E>
93+
where
94+
E: serde::de::Error,
95+
{
96+
Ok(InternedString::new_global(value))
97+
}
8998
}
99+
100+
deserializer.deserialize_str(InternedStringVisitor)
90101
}
91102
}
92103

0 commit comments

Comments
 (0)