llvm · amitamd7 · Apr 9, 2026 · Meinersbur · Apr 22, 2026 · amitamd7
diff --git a/MicroBenchmarks/CMakeLists.txt b/MicroBenchmarks/CMakeLists.txt
@@ -8,5 +8,6 @@ add_subdirectory(harris)
 add_subdirectory(ImageProcessing)
 add_subdirectory(LoopInterchange)
 add_subdirectory(LoopVectorization)
+add_subdirectory(LoopSplit)
 add_subdirectory(MemFunctions)
 add_subdirectory(SLPVectorization)
diff --git a/MicroBenchmarks/LoopSplit/CMakeLists.txt b/MicroBenchmarks/LoopSplit/CMakeLists.txt
@@ -0,0 +1,14 @@
+# LoopSplit microbenchmark for #pragma omp split counts(...).
+# Copy this directory to llvm-test-suite/MicroBenchmarks/LoopSplit/
+# and add: add_subdirectory(LoopSplit) to MicroBenchmarks/CMakeLists.txt.
+#
+# Configure test-suite with a Clang that supports -fopenmp and -fopenmp-version=60.
-# Copy this directory to llvm-test-suite/MicroBenchmarks/LoopSplit/
-# and add: add_subdirectory(LoopSplit) to MicroBenchmarks/CMakeLists.txt.
-#
-# Configure test-suite with a Clang that supports -fopenmp and -fopenmp-version=60.
-# Copy this directory to llvm-test-suite/MicroBenchmarks/LoopSplit/
-# and add: add_subdirectory(LoopSplit) to MicroBenchmarks/CMakeLists.txt.
-#
-# Configure test-suite with a Clang that supports -fopenmp and -fopenmp-version=60.
+
+llvm_test_run(WORKDIR %S)
+llvm_test_verify(%b/${FPCMP} %S/LoopSplit.reference_output %S/LoopSplit.txt)
+
+llvm_test_executable(LoopSplit main.cpp)
+llvm_test_data(LoopSplit LoopSplit.reference_output)
+
+target_compile_options(LoopSplit PRIVATE -fopenmp -fopenmp-version=60)
+target_link_libraries(LoopSplit benchmark)
diff --git a/MicroBenchmarks/LoopSplit/LoopSplit.reference_output b/MicroBenchmarks/LoopSplit/LoopSplit.reference_output
@@ -0,0 +1 @@
+test1: 19999999900000000
diff --git a/MicroBenchmarks/LoopSplit/main.cpp b/MicroBenchmarks/LoopSplit/main.cpp
@@ -0,0 +1,57 @@
+// Microbenchmark for #pragma omp split counts(...).
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+
+#include "benchmark/benchmark.h"
+
+// Large N for measurable runs; lit verification uses same kernel once.
+#define N 200000000
+
+// Kernel: sum 0..(N-1) with split into four segments.
+static long run_split() {
+  long sum = 0;
+#pragma omp split counts(50000000, 50000000, 50000000, omp_fill)
+  for (int i = 0; i < N; ++i)
+    sum += i;
+  return sum;
+}
+
+// Baseline: same loop without split (for comparison).
+static long run_baseline() {
+  long sum = 0;
+  for (int i = 0; i < N; ++i)
+    sum += i;
+  return sum;
+}
+
+int main(int argc, char *argv[]) {
+  benchmark::Initialize(&argc, argv);
+
+  // Run kernel once and write result for lit verification.
+  std::ofstream myfile("LoopSplit.txt");
+  if (myfile.is_open()) {
+    long y = run_split();
+    myfile << "test1: " << y << "\n";
+    myfile.close();
+  } else {
+    return EXIT_FAILURE;
+  }
+
+  benchmark::RunSpecifiedBenchmarks();
+  return EXIT_SUCCESS;
+}
+
+static void BM_Split(benchmark::State &state) {
+  long x = 0;
+  for (auto _ : state)
+    benchmark::DoNotOptimize(x += run_split());
-    benchmark::DoNotOptimize(x += run_split());
+    auto x = run_split(); 
+    benchmark::DoNotOptimize(x);
-    benchmark::DoNotOptimize(x += run_split());
+    auto x = run_split(); 
+    benchmark::DoNotOptimize(x);
+}
+BENCHMARK(BM_Split)->Unit(benchmark::kMicrosecond)->MinTime(2.0);
+
+static void BM_Baseline(benchmark::State &state) {
+  long x = 0;
+  for (auto _ : state)
+    benchmark::DoNotOptimize(x += run_baseline());
+}
+BENCHMARK(BM_Baseline)->Unit(benchmark::kMicrosecond)->MinTime(2.0);