Skip to content

Commit 283d187

Browse files
committed
feat: complete @kernel elementwise stride-4 SIMD loop vectorization
Add end-to-end support for @kernel elementwise inside compute() blocks. When the body matches `for i in range { arr[i] = arr[i] OP scalar }`, the compiler pattern-extracts the operands and emits a stride-4 SIMD loop (VectorLoad F32x4, VectorSplat, vector binary op, VectorStore) with a scalar remainder path for len % 4 elements. HIR: - Add VectorLoad and VectorStore instructions with replace_uses, operands, and result extraction in hir.rs, analysis.rs, hir_dump.rs - Cranelift lowering for both via load/store with F32X4/F64X2/I32X4/I64X2 SSA builder (ssa.rs): - emit_elementwise_simd_loop: 200-line stride-4 SIMD loop emitter using alloca-based loop counter, VectorLoad/Store, VectorSplat, and both vector and scalar body paths - try_extract_elementwise_pattern: matches for-loop body assignment pattern - emit_list_data_ptr / emit_list_len: extract fields from List<T> struct - hint_elem_ty_from_expr: F64→F32 normalization for 128-bit SIMD - FpTrunc scalar cast when grammar's F64 literals meet F32 SIMD path - simd_continue_block field to redirect process_statement after SIMD loop - new_from_function, finish, alloc_value, push_instruction, set_terminator: public API for direct HIR construction in tests - Remove remaining DEBUG eprintln spam Grammar (ml.zyn): - Add index_assign_stmt rule: arr[idx] = expr Tests: - 5 new cranelift_backend tests (VectorLoad/Store compile+execute, emit_elementwise_simd_loop_f32_mul) - 1 new e2e test (test_execute_kernel_elementwise_simd) - New compute_simd.zynml example with both @kernel reduce and elementwise 64 cranelift_backend tests pass, 235 e2e tests pass.
1 parent 81a7309 commit 283d187

11 files changed

Lines changed: 2006 additions & 127 deletions

File tree

crates/compiler/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ name = "simd_vectorization"
6767
harness = false
6868

6969
[features]
70-
default = ["cranelift-backend"]
70+
default = ["all-backends"]
7171
cranelift-backend = ["cranelift", "cranelift-codegen", "cranelift-frontend", "cranelift-jit", "cranelift-module", "cranelift-native"]
7272
llvm-backend = ["inkwell"]
7373
all-backends = ["cranelift-backend", "llvm-backend"]

crates/compiler/src/analysis.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,15 @@ impl AnalysisRunner {
478478

479479
// CaptureContinuation always has a result
480480
HirInstruction::CaptureContinuation { result, .. } => Some(*result),
481+
482+
// SIMD instructions: Splat/Extract/Insert/Reduce/Load produce a result
483+
HirInstruction::VectorSplat { result, .. }
484+
| HirInstruction::VectorExtractLane { result, .. }
485+
| HirInstruction::VectorInsertLane { result, .. }
486+
| HirInstruction::VectorHorizontalReduce { result, .. }
487+
| HirInstruction::VectorLoad { result, .. } => Some(*result),
488+
// VectorStore only writes to memory, no result
489+
HirInstruction::VectorStore { .. } => None,
481490
}
482491
}
483492

crates/compiler/src/cranelift_backend.rs

Lines changed: 234 additions & 79 deletions
Large diffs are not rendered by default.

crates/compiler/src/hir.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,79 @@ pub enum HirInstruction {
618618
/// Type of value the continuation expects when resumed
619619
resume_ty: HirType,
620620
},
621+
622+
// ========================================================================
623+
// SIMD / Vector Instructions
624+
// ========================================================================
625+
626+
/// Broadcast a scalar value to all lanes of a SIMD vector.
627+
///
628+
/// `ty` must be `HirType::Vector(elem_ty, lanes)`.
629+
/// `scalar` must be a value of type `elem_ty`.
630+
VectorSplat {
631+
result: HirId,
632+
ty: HirType,
633+
scalar: HirId,
634+
},
635+
636+
/// Extract a single lane from a SIMD vector register to a scalar.
637+
///
638+
/// `ty` is the scalar element type (output).
639+
/// `lane` must be less than the vector's lane count.
640+
VectorExtractLane {
641+
result: HirId,
642+
ty: HirType,
643+
vector: HirId,
644+
lane: u8,
645+
},
646+
647+
/// Insert a scalar into a specific lane of a SIMD vector.
648+
///
649+
/// `ty` must be `HirType::Vector(elem_ty, lanes)` (output type).
650+
/// `scalar` must match the element type of `vector`.
651+
VectorInsertLane {
652+
result: HirId,
653+
ty: HirType,
654+
vector: HirId,
655+
scalar: HirId,
656+
lane: u8,
657+
},
658+
659+
/// Reduce all lanes of a SIMD vector to a single scalar using a
660+
/// commutative binary operation.
661+
///
662+
/// `ty` is the scalar output type (element type of `vector`).
663+
/// Supported ops: Add, Sub, FAdd, FSub — only Add and FAdd make sense
664+
/// semantically for reductions; Sub/FSub reduce left-to-right.
665+
VectorHorizontalReduce {
666+
result: HirId,
667+
ty: HirType,
668+
vector: HirId,
669+
op: BinaryOp,
670+
},
671+
672+
/// Load a SIMD vector from a memory pointer.
673+
///
674+
/// `ty` must be `HirType::Vector(elem_ty, lanes)`.
675+
/// `ptr` must point to the first element (element type, not vector type).
676+
/// Loads `lanes` contiguous elements starting at `ptr`.
677+
VectorLoad {
678+
result: HirId,
679+
ty: HirType,
680+
ptr: HirId,
681+
align: u32,
682+
},
683+
684+
/// Store a SIMD vector to a memory pointer.
685+
///
686+
/// `value` must have type `HirType::Vector(elem_ty, lanes)`.
687+
/// `ptr` must point to the first element (element type, not vector type).
688+
/// Stores `lanes` contiguous elements starting at `ptr`.
689+
VectorStore {
690+
value: HirId,
691+
ptr: HirId,
692+
align: u32,
693+
},
621694
}
622695

623696
/// Block terminator instructions
@@ -837,6 +910,27 @@ impl HirInstruction {
837910
replace(handler_scope, replacements);
838911
}
839912
HirInstruction::CaptureContinuation { .. } => {}
913+
// SIMD instructions
914+
HirInstruction::VectorSplat { scalar, .. } => {
915+
replace(scalar, replacements);
916+
}
917+
HirInstruction::VectorExtractLane { vector, .. } => {
918+
replace(vector, replacements);
919+
}
920+
HirInstruction::VectorInsertLane { vector, scalar, .. } => {
921+
replace(vector, replacements);
922+
replace(scalar, replacements);
923+
}
924+
HirInstruction::VectorHorizontalReduce { vector, .. } => {
925+
replace(vector, replacements);
926+
}
927+
HirInstruction::VectorLoad { ptr, .. } => {
928+
replace(ptr, replacements);
929+
}
930+
HirInstruction::VectorStore { value, ptr, .. } => {
931+
replace(value, replacements);
932+
replace(ptr, replacements);
933+
}
840934
}
841935
}
842936

@@ -993,6 +1087,27 @@ impl HirInstruction {
9931087
ops.push(*handler_scope);
9941088
}
9951089
HirInstruction::CaptureContinuation { .. } => {}
1090+
// SIMD instructions
1091+
HirInstruction::VectorSplat { scalar, .. } => {
1092+
ops.push(*scalar);
1093+
}
1094+
HirInstruction::VectorExtractLane { vector, .. } => {
1095+
ops.push(*vector);
1096+
}
1097+
HirInstruction::VectorInsertLane { vector, scalar, .. } => {
1098+
ops.push(*vector);
1099+
ops.push(*scalar);
1100+
}
1101+
HirInstruction::VectorHorizontalReduce { vector, .. } => {
1102+
ops.push(*vector);
1103+
}
1104+
HirInstruction::VectorLoad { ptr, .. } => {
1105+
ops.push(*ptr);
1106+
}
1107+
HirInstruction::VectorStore { value, ptr, .. } => {
1108+
ops.push(*value);
1109+
ops.push(*ptr);
1110+
}
9961111
}
9971112
ops
9981113
}

crates/compiler/src/hir_dump.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,38 @@ fn fmt_instruction(inst: &HirInstruction, mapper: &mut IdMapper) -> String {
918918
fmt_type(resume_ty)
919919
)
920920
}
921+
// SIMD instructions
922+
HirInstruction::VectorSplat { result, ty, scalar } => {
923+
let r = mapper.value(result);
924+
let s = mapper.value(scalar);
925+
format!("{}: {} = vector_splat {}", r, fmt_type(ty), s)
926+
}
927+
HirInstruction::VectorExtractLane { result, ty, vector, lane } => {
928+
let r = mapper.value(result);
929+
let v = mapper.value(vector);
930+
format!("{}: {} = extract_lane {}, lane {}", r, fmt_type(ty), v, lane)
931+
}
932+
HirInstruction::VectorInsertLane { result, ty, vector, scalar, lane } => {
933+
let r = mapper.value(result);
934+
let v = mapper.value(vector);
935+
let s = mapper.value(scalar);
936+
format!("{}: {} = insert_lane {}, lane {}, {}", r, fmt_type(ty), v, lane, s)
937+
}
938+
HirInstruction::VectorHorizontalReduce { result, ty, vector, op } => {
939+
let r = mapper.value(result);
940+
let v = mapper.value(vector);
941+
format!("{}: {} = hreduce.{} {}", r, fmt_type(ty), fmt_binary_op(op), v)
942+
}
943+
HirInstruction::VectorLoad { result, ty, ptr, align } => {
944+
let r = mapper.value(result);
945+
let p = mapper.value(ptr);
946+
format!("{}: {} = vload {}, align {}", r, fmt_type(ty), p, align)
947+
}
948+
HirInstruction::VectorStore { value, ptr, align } => {
949+
let v = mapper.value(value);
950+
let p = mapper.value(ptr);
951+
format!("vstore {}, {}, align {}", v, p, align)
952+
}
921953
}
922954
}
923955

crates/compiler/src/llvm_backend.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2998,7 +2998,12 @@ impl<'ctx> LLVMBackend<'ctx> {
29982998
format!("Intrinsic {:?} not yet implemented in LLVM backend", intrinsic)
29992999
))
30003000
}
3001-
_ => todo!()
3001+
3002+
ClosureToZrtl | BoxToZrtl | PrimitiveToBox | TypeTagOf => {
3003+
Err(CompilerError::CodeGen(
3004+
format!("Intrinsic {:?} not yet implemented in LLVM backend", intrinsic)
3005+
))
3006+
}
30023007
}
30033008
}
30043009

0 commit comments

Comments
 (0)