From dc9b44d8e71fb04138111b9fe2a312ac9e3e161c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 18 May 2026 00:47:23 +0000
Subject: [PATCH 1/8] Complete TODOs in scheduler.cpp and light_stack.cpp

Agent-Logs-Url: https://github.com/Melnytskyi/fast_task/sessions/57e0a87f-741a-4a2b-b21f-6a61b611b899

Co-authored-by: Melnytskyi <17990770+Melnytskyi@users.noreply.github.com>
---
 include/task/scheduler.hpp           |  2 +-
 src/tasks/_internal.hpp              |  1 +
 src/tasks/classes/task/scheduler.cpp |  3 ++-
 src/tasks/scheduler.cpp              | 26 ++++++++++++++++++++++----
 src/tasks/util/light_stack.cpp       | 22 ++++++++++------------
 5 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/include/task/scheduler.hpp b/include/task/scheduler.hpp
index d9945e2..88e83eb 100644
--- a/include/task/scheduler.hpp
+++ b/include/task/scheduler.hpp
@@ -57,7 +57,7 @@ namespace fast_task {
 
         uint16_t FT_API create_bind_only_executor(uint16_t fixed_count, bool allow_implicit_start, executor_policy policy = executor_policy::default_policy);
         void FT_API assign_bind_only_executor(uint16_t id, uint16_t fixed_count, bool allow_implicit_start, executor_policy policy = executor_policy::default_policy);
-        void FT_API close_bind_only_executor(uint16_t id);
+        void FT_API close_bind_only_executor(uint16_t id, bool abort_tasks = false);
 
         void FT_API create_executor(size_t count = 1);
         size_t FT_API total_executors();
diff --git a/src/tasks/_internal.hpp b/src/tasks/_internal.hpp
index cfb9ad7..60ab0a3 100644
--- a/src/tasks/_internal.hpp
+++ b/src/tasks/_internal.hpp
@@ -221,6 +221,7 @@ namespace fast_task {
         bool in_close : 1 = false;
         bool allow_implicit_start : 1 = false;
         bool fixed_size : 1 = false;
+        bool abort_tasks_on_close : 1 = false;
         scheduler::executor_policy policy = scheduler::executor_policy::default_policy;
     };
 
diff --git a/src/tasks/classes/task/scheduler.cpp b/src/tasks/classes/task/scheduler.cpp
index 06e3cb8..3f1ede7 100644
--- a/src/tasks/classes/task/scheduler.cpp
+++ b/src/tasks/classes/task/scheduler.cpp
@@ -125,7 +125,7 @@ namespace fast_task::scheduler {
         }
     }
 
-    void close_bind_only_executor(uint16_t id) {
+    void close_bind_only_executor(uint16_t id, bool abort_tasks) {
         mutex_unify unify(glob.binded_workers_safety);
         fast_task::unique_lock guard(unify);
         decltype(glob.binded_workers[id].tasks) transfer_tasks;
@@ -149,6 +149,7 @@ namespace fast_task::scheduler {
             if (context.in_close)
                 return;
             context.in_close = true;
+            context.abort_tasks_on_close = abort_tasks;
 
             std::swap(transfer_tasks, context.tasks);
             for (uint16_t i = 0; i < context.executors; i++) {
diff --git a/src/tasks/scheduler.cpp b/src/tasks/scheduler.cpp
index ee8dcf6..17c678c 100644
--- a/src/tasks/scheduler.cpp
+++ b/src/tasks/scheduler.cpp
@@ -281,7 +281,18 @@ namespace fast_task {
             data.started = true;
             data.result_notify.notify_all();
         } catch (...) {
-            loc.ex_ptr = std::current_exception(); //TODO pass this to the callback
+            loc.ex_ptr = std::current_exception();
+            if (data.callbacks.on_exception) {
+                try {
+                    data.callbacks.on_exception(data.callbacks.get_data(), loc.ex_ptr);
+                    loc.ex_ptr = nullptr;
+                } catch (const task_cancellation& cancel) {
+                    forceCancelCancellation(cancel);
+                    loc.ex_ptr = nullptr;
+                } catch (...) {
+                    loc.ex_ptr = std::current_exception();
+                }
+            }
             fast_task::lock_guard guard(data.no_race);
             data.end_of_life = true;
             data.started = true;
@@ -738,9 +749,16 @@ namespace fast_task {
             if (context.executors == 0) {
                 if (context.in_close) {
                     while (context.tasks.size_approx())
-                        while (context.tasks.try_dequeue(loc.curr_task)) { //TODO add option to abort if there still tasks in queue
-                            get_data(loc.curr_task).bind_to_worker_id = (uint16_t)-1;
-                            glob.tasks.enqueue(std::move(loc.curr_task));
+                        while (context.tasks.try_dequeue(loc.curr_task)) {
+                            if (context.abort_tasks_on_close) {
+                                fast_task::lock_guard task_guard(get_data(loc.curr_task).no_race);
+                                get_data(loc.curr_task).end_of_life = true;
+                                get_data(loc.curr_task).started = true;
+                                get_data(loc.curr_task).result_notify.notify_all();
+                            } else {
+                                get_data(loc.curr_task).bind_to_worker_id = (uint16_t)-1;
+                                glob.tasks.enqueue(std::move(loc.curr_task));
+                            }
                         }
                     glob.tasks_notifier.unsafe_notify_all();
                     context.on_closed_notifier.notify_all();
diff --git a/src/tasks/util/light_stack.cpp b/src/tasks/util/light_stack.cpp
index c71ea5a..ce5cf97 100644
--- a/src/tasks/util/light_stack.cpp
+++ b/src/tasks/util/light_stack.cpp
@@ -204,27 +204,25 @@ namespace fast_task {
         //old_data.init();
     }
 
-    //TODO create proper guard page
+    //create proper guard page
     stack_context create_stack(size_t size) {
         size_t total_size = std::max(size, page_size * 3);
-        void* vp = mmap(nullptr, total_size, /*PROT_NONE*/ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-        if (!vp)
+        void* vp = mmap(nullptr, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        if (vp == MAP_FAILED)
             throw std::bad_alloc();
 
-        // needs at least 3 pages to fully construct the coroutine and switch to it
-        //const auto init_commit_size = page_size * 3;
-        //auto commit_start = static_cast<uint8_t*>(vp) + total_size - init_commit_size;
-        //if (mprotect(commit_start, init_commit_size, PROT_READ | PROT_WRITE) == -1) {
-        //    munmap(vp, total_size);
-        //    throw std::bad_alloc();
-        //}
+        // Create a PROT_NONE guard page at the bottom of the stack to catch stack overflows
+        if (mprotect(vp, guard_page_size, PROT_NONE) == -1) {
+            munmap(vp, total_size);
+            throw std::bad_alloc();
+        }
+
         if (RUNNING_ON_VALGRIND) {
-            void* stack_bottom = vp;
+            void* stack_bottom = static_cast<uint8_t*>(vp) + guard_page_size;
             void* stack_top = static_cast<uint8_t*>(vp) + total_size;
             get_execution_data(loc.curr_task).valgrind_stack_id = VALGRIND_STACK_REGISTER(stack_bottom, stack_top);
         }
 
-        //PROT_NONE already used for guard page
         stack_context sctx;
         sctx.size = size;
         sctx.sp = static_cast<char*>(vp) + sctx.size;

From 6d6b368596b6e7af1581cff48ba30f86ea11d3cc Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 18 May 2026 12:33:23 +0000
Subject: [PATCH 2/8] Move on_exception call outside catch block in
 in_place_run to avoid UB

Agent-Logs-Url: https://github.com/Melnytskyi/fast_task/sessions/47d0fa60-d990-4aa9-95a7-48d5b37d8a91

Co-authored-by: Melnytskyi <17990770+Melnytskyi@users.noreply.github.com>
---
 src/tasks/scheduler.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tasks/scheduler.cpp b/src/tasks/scheduler.cpp
index 17c678c..9d56ac4 100644
--- a/src/tasks/scheduler.cpp
+++ b/src/tasks/scheduler.cpp
@@ -282,6 +282,8 @@ namespace fast_task {
             data.result_notify.notify_all();
         } catch (...) {
             loc.ex_ptr = std::current_exception();
+        }
+        if (loc.ex_ptr) {
             if (data.callbacks.on_exception) {
                 try {
                     data.callbacks.on_exception(data.callbacks.get_data(), loc.ex_ptr);

From b86894e0dacbdd247689b7ff99b8cf5cd1cc4f95 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 18 May 2026 13:10:05 +0000
Subject: [PATCH 3/8] feat: implement guard-page stack overflow recovery and
 add tests

- Add fast_task::stack_overflow exception to include/exceptions.hpp
- Implement Linux x86_64 SIGSEGV handler in light_stack.cpp:
  * Install per-thread alternate signal stack + process-wide sigaction
  * On guard-page fault: mprotect to RW, set up return address from [RBP+8]
    so the DWARF unwind chain is intact, redirect RIP to __stack_overflow_raise
  * Restore PROT_NONE on guard page when recycling stacks (unlimited/limited_buffer)
- Add CMake fallback to valgrind stub headers when system valgrind not installed
- Add tests/stackfull/test_stackfull_guard.cpp (4 tests, all pass)

Agent-Logs-Url: https://github.com/Melnytskyi/fast_task/sessions/7235f5d5-bf5b-4d9b-860b-d2d013dbcf4e

Co-authored-by: Melnytskyi <17990770+Melnytskyi@users.noreply.github.com>
---
 CMakeLists.txt                                |   9 +
 include/exceptions.hpp                        |   6 +
 src/tasks/util/light_stack.cpp                | 175 ++++++++++--------
 tests/stackfull/CMakeLists.txt                |   1 +
 tests/stackfull/test_stackfull_guard.cpp      | 139 ++++++++++++++
 .../valgrind-stubs/valgrind/memcheck.h        |  12 ++
 .../valgrind-stubs/valgrind/valgrind.h        |  12 ++
 7 files changed, 275 insertions(+), 79 deletions(-)
 create mode 100644 tests/stackfull/test_stackfull_guard.cpp
 create mode 100644 third_party/valgrind-stubs/valgrind/memcheck.h
 create mode 100644 third_party/valgrind-stubs/valgrind/valgrind.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c2e5ca7..9fb4623 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -175,6 +175,15 @@ else()
   target_compile_options(fast_task PRIVATE -Wall)
   target_compile_options(fast_task PRIVATE -Wextra)
   target_compile_options(fast_task PRIVATE --pedantic)
+
+  # Use real Valgrind headers when available; fall back to lightweight stubs otherwise.
+  find_path(VALGRIND_INCLUDE_DIR valgrind/valgrind.h)
+  if(VALGRIND_INCLUDE_DIR)
+    target_include_directories(fast_task PRIVATE ${VALGRIND_INCLUDE_DIR})
+  else()
+    target_include_directories(fast_task PRIVATE
+      ${CMAKE_SOURCE_DIR}/third_party/valgrind-stubs)
+  endif()
 endif()
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
diff --git a/include/exceptions.hpp b/include/exceptions.hpp
index fdb2e18..8847fbd 100644
--- a/include/exceptions.hpp
+++ b/include/exceptions.hpp
@@ -63,6 +63,12 @@ namespace fast_task {
         }
     };
 
+    struct FT_API stack_overflow final : public exception {
+        inline const char* what() const noexcept override {
+            return "Task stack overflow: the task exhausted its stack space.";
+        }
+    };
+
     //this exception should never be catched
     class FT_API task_cancellation {
         bool in_landing = false;
diff --git a/src/tasks/util/light_stack.cpp b/src/tasks/util/light_stack.cpp
index ce5cf97..7b99057 100644
--- a/src/tasks/util/light_stack.cpp
+++ b/src/tasks/util/light_stack.cpp
@@ -110,98 +110,110 @@ namespace fast_task {
     }
 }
 #elif PLATFORM_LINUX
+    #include <mutex>
     #include <signal.h>
     #include <sys/mman.h>
     #include <sys/stat.h>
     #include <unistd.h>
     #include <valgrind/memcheck.h>
     #include <valgrind/valgrind.h>
+    #if defined(__x86_64__)
+        #include <ucontext.h>
+    #endif
 
 namespace fast_task {
     static const size_t page_size = boost::context::stack_traits::page_size();
     static const size_t guard_page_size = boost::context::stack_traits::page_size();
 
-    //void stack_growth_handler(int sig, siginfo_t* si, void* ucontext);
-    //
-    //static thread_local struct old___ {
-    //    struct sigaction handler;
-    //    stack_t stack;
-    //    bool is_init = false;
-    //
-    //    void init() {
-    //        if (is_init)
-    //            return;
-    //        is_init = true;
-    //        stack_t ss;
-    //        ss.ss_sp = mmap(nullptr, SIGSTKSZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-    //        ss.ss_size = SIGSTKSZ;
-    //        ss.ss_flags = 0;
-    //        if (sigaltstack(&ss, &stack) == -1) {
-    //            perror("sigaltstack");
-    //            exit(EXIT_FAILURE);
-    //        }
-    //        struct sigaction sa;
-    //        sigemptyset(&sa.sa_mask);
-    //        sa.sa_sigaction = stack_growth_handler;
-    //        sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
-    //        if (sigaction(SIGSEGV, &sa, &handler) == -1) {
-    //            perror("sigaction");
-    //            exit(EXIT_FAILURE);
-    //        }
-    //    }
-    //
-    //    ~old___() {
-    //        if (sigaltstack(&stack, &stack) == -1)
-    //            perror("sigaltstack");
-    //        if (munmap(static_cast<char*>(stack.ss_sp), SIGSTKSZ) == -1)
-    //            perror("munmap");
-    //        if (sigaction(SIGSEGV, &handler, NULL) == -1)
-    //            perror("sigaction failed in library destructor");
-    //    }
-    //} old_data;
-    //
-    //void pass_handler(int sig, siginfo_t* si, void* ucontext) {
-    //    if (old_data.handler.sa_flags & SA_SIGINFO)
-    //        old_data.handler.sa_sigaction(sig, si, ucontext);
-    //    else if (old_data.handler.sa_handler == SIG_DFL) {
-    //        signal(sig, SIG_DFL);
-    //        raise(sig);
-    //    } else if (old_data.handler.sa_handler != SIG_IGN)
-    //        old_data.handler.sa_handler(sig);
-    //}
-    //
-    //void stack_growth_handler(int sig, siginfo_t* si, void* ucontext) {
-    //    if (!loc.curr_task) { //definitely not ours stack
-    //        pass_handler(sig, si, ucontext);
-    //        return;
-    //    } else if (!get_data(loc.curr_task).data) { //avoid alloc
-    //        pass_handler(sig, si, ucontext);
-    //        return;
-    //    }
-    //
-    //    void* fault_addr = si->si_addr;
-    //    void* stack_start = get_execution_data(loc.curr_task).stack_ptr;
-    //    void* stack_end = static_cast<char*>(stack_start) + get_execution_data(loc.curr_task).stack_size;
-    //
-    //    if (fault_addr >= stack_start && fault_addr < stack_end) {
-    //        void* page_start = (void*)((uintptr_t)fault_addr & ~(page_size - 1));
-    //        if (mprotect(page_start, page_size, PROT_READ | PROT_WRITE) == -1) {
-    //            if (is_debugger_attached()) {
-    //                pass_handler(sig, si, ucontext);
-    //                return;
-    //            }
-    //            psignal(sig, "mprotect failed in signal handler");
-    //            _exit(EXIT_FAILURE);
-    //        }
-    //        if (RUNNING_ON_VALGRIND)
-    //            VALGRIND_MAKE_MEM_DEFINED(page_start, page_size);
-    //        return;
-    //    } else
-    //        pass_handler(sig, si, ucontext);
-    //}
+    // Called when a stack overflow is detected: resumes (outside signal handler)
+    // on the task's now-accessible guard page and raises the stack_overflow exception.
+    // The C++ exception machinery then unwinds the task's call stack normally,
+    // running all destructors before the catch(...) in context_exec catches it.
+    [[noreturn]] __attribute__((noinline)) static void __stack_overflow_raise() {
+        throw stack_overflow();
+    }
+
+    static struct sigaction __old_sigsegv_action = {};
+
+    static void __sigsegv_handler(int sig, siginfo_t* si, void* ctx) {
+        bool handled = false;
+
+        // Only act when a task is currently executing on this thread and has a stack.
+        if (loc.curr_task) {
+            // Access execution_data directly to avoid any heap allocation inside a signal handler.
+            // Use auto* to avoid naming the private nested type task::execution_data.
+            auto* exdata = get_data(loc.curr_task).exdata;
+            if (exdata && exdata->stack_ptr) {
+                void* fault_addr = si->si_addr;
+                void* stack_bottom = exdata->stack_ptr;
+
+                // Guard page occupies [stack_bottom, stack_bottom + guard_page_size).
+                if (fault_addr >= stack_bottom &&
+                    fault_addr < static_cast<char*>(stack_bottom) + guard_page_size) {
+                    // Make the guard page accessible so the C++ unwinder has a little
+                    // room on the stack to execute landing pads and destructors.
+                    mprotect(stack_bottom, guard_page_size, PROT_READ | PROT_WRITE);
+
+#if defined(__x86_64__)
+                    ucontext_t* uc = static_cast<ucontext_t*>(ctx);
+
+                    // Position RSP at the high end of the (now accessible) guard page,
+                    // simulating a CALL instruction (RSP % 16 == 8, RA slot filled
+                    // with the actual return address so the DWARF unwinder can walk
+                    // through all existing recursion frames on the real stack above).
+                    uintptr_t guard_top = reinterpret_cast<uintptr_t>(stack_bottom) + guard_page_size;
+                    uintptr_t new_rsp = (guard_top & ~static_cast<uintptr_t>(15)) - 8;
+                    // Read the return address that the faulting frame would use so the
+                    // stack-unwind chain is intact: [RBP + sizeof(ptr)] holds the
+                    // return address in x86-64 ABI (after push rbp / mov rbp, rsp).
+                    greg_t rbp = uc->uc_mcontext.gregs[REG_RBP];
+                    uintptr_t ret_addr = *reinterpret_cast<uintptr_t*>(rbp + sizeof(uintptr_t));
+                    *reinterpret_cast<uintptr_t*>(new_rsp) = ret_addr;
+                    uc->uc_mcontext.gregs[REG_RSP] = static_cast<greg_t>(new_rsp);
+                    uc->uc_mcontext.gregs[REG_RIP] = reinterpret_cast<greg_t>(__stack_overflow_raise);
+                    handled = true;
+#endif
+                }
+            }
+        }
+
+        if (!handled) {
+            if (__old_sigsegv_action.sa_flags & SA_SIGINFO)
+                __old_sigsegv_action.sa_sigaction(sig, si, ctx);
+            else if (__old_sigsegv_action.sa_handler == SIG_DFL) {
+                signal(sig, SIG_DFL);
+                raise(sig);
+            } else if (__old_sigsegv_action.sa_handler != SIG_IGN)
+                __old_sigsegv_action.sa_handler(sig);
+        }
+    }
 
     void __install_signal_handler_mem() {
-        //old_data.init();
+        // Per-thread: allocate and register a dedicated alternate signal stack so the
+        // SIGSEGV handler can run even when the task's stack is exhausted.
+        static thread_local bool alt_stack_set = false;
+        if (!alt_stack_set) {
+            void* alt_mem = mmap(nullptr, SIGSTKSZ, PROT_READ | PROT_WRITE,
+                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+            if (alt_mem != MAP_FAILED) {
+                stack_t ss;
+                ss.ss_sp = alt_mem;
+                ss.ss_size = SIGSTKSZ;
+                ss.ss_flags = 0;
+                sigaltstack(&ss, nullptr);
+            }
+            alt_stack_set = true;
+        }
+
+        // Process-wide: install the SIGSEGV handler exactly once.
+        static std::once_flag handler_flag;
+        std::call_once(handler_flag, []() {
+            struct sigaction sa;
+            sigemptyset(&sa.sa_mask);
+            sa.sa_sigaction = __sigsegv_handler;
+            sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+            sigaction(SIGSEGV, &sa, &__old_sigsegv_action);
+        });
     }
 
     //create proper guard page
@@ -262,6 +274,9 @@ namespace fast_task {
     }
 
     void unlimited_buffer(stack_context& sctx) {
+        // Restore the guard page before returning the stack to the pool so the
+        // next task using this stack gets proper overflow detection.
+        mprotect(static_cast<char*>(sctx.sp) - sctx.size, guard_page_size, PROT_NONE);
         if (!stack_allocations.enqueue(sctx))
             destroy_stack(sctx);
         else
@@ -269,6 +284,8 @@ namespace fast_task {
     }
 
     void limited_buffer(stack_context& sctx) {
+        // Restore the guard page before returning the stack to the pool.
+        mprotect(static_cast<char*>(sctx.sp) - sctx.size, guard_page_size, PROT_NONE);
         if (++stack_allocations_buffer < light_stack::max_buffer_size) {
             if (!stack_allocations.enqueue(sctx)) {
                 destroy_stack(sctx);
diff --git a/tests/stackfull/CMakeLists.txt b/tests/stackfull/CMakeLists.txt
index fe6e92b..7600de5 100644
--- a/tests/stackfull/CMakeLists.txt
+++ b/tests/stackfull/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_ft_test(test_stackfull_basic test_stackfull_basic.cpp)
 add_ft_test(test_stackfull_sleep test_stackfull_sleep.cpp)
 add_ft_test(test_stackfull_sync  test_stackfull_sync.cpp)
+add_ft_test(test_stackfull_guard test_stackfull_guard.cpp)
diff --git a/tests/stackfull/test_stackfull_guard.cpp b/tests/stackfull/test_stackfull_guard.cpp
new file mode 100644
index 0000000..60b95b1
--- /dev/null
+++ b/tests/stackfull/test_stackfull_guard.cpp
@@ -0,0 +1,139 @@
+// Copyright Danyil Melnytskyi 2025-Present
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#include <helpers.hpp>
+#include <exceptions.hpp>
+#include <atomic>
+#include <stdexcept>
+
+class StackfullGuardTest : public SchedulerFixture {};
+
+// ---------------------------------------------------------------------------
+// Exception-callback tests (platform independent)
+// ---------------------------------------------------------------------------
+
+// Verify that a normal C++ exception thrown inside a task reaches the
+// on_exception callback with the correct type and message.
+TEST_F(StackfullGuardTest, ExceptionCallback_ReceivesCorrectException) {
+    std::atomic<bool> called{false};
+    std::string message;
+
+    auto t = std::make_shared<fast_task::task>(
+        [] { throw std::runtime_error("guard_test"); },
+        [&](const std::exception_ptr& ep) {
+            called.store(true);
+            try {
+                std::rethrow_exception(ep);
+            } catch (const std::runtime_error& e) {
+                message = e.what();
+            }
+        }
+    );
+    fast_task::scheduler::start(t);
+    t->await_task();
+
+    EXPECT_TRUE(called.load());
+    EXPECT_EQ(message, "guard_test");
+}
+
+// Verify that RAII destructors run for local objects when a normal exception
+// is thrown inside a task.
+TEST_F(StackfullGuardTest, ExceptionCallback_RaiiDestructorCalledOnException) {
+    struct DtorGuard {
+        std::atomic<bool>& flag;
+        ~DtorGuard() { flag.store(true, std::memory_order_release); }
+    };
+
+    std::atomic<bool> dtor_called{false};
+    std::atomic<bool> handler_called{false};
+
+    auto t = std::make_shared<fast_task::task>(
+        [&] {
+            DtorGuard guard{dtor_called};
+            throw std::runtime_error("raii_test");
+        },
+        [&](const std::exception_ptr&) {
+            handler_called.store(true);
+        }
+    );
+    fast_task::scheduler::start(t);
+    t->await_task();
+
+    EXPECT_TRUE(handler_called.load()) << "Exception handler must be called";
+    EXPECT_TRUE(dtor_called.load()) << "RAII destructor must run before handler";
+}
+
+// ---------------------------------------------------------------------------
+// Stack-overflow tests (Linux x86_64 only — requires the SIGSEGV handler that
+// redirects execution to throw fast_task::stack_overflow)
+// ---------------------------------------------------------------------------
+
+#if defined(__x86_64__) && defined(__linux__)
+
+// Recursive function that deliberately exhausts the task's 1 MB stack.
+// Each frame allocates a 4 kB volatile buffer to accelerate the overflow.
+[[noreturn]] __attribute__((noinline)) static void recurse_overflow() {
+    volatile char buf[4096] = {};
+    (void)buf[0];
+    recurse_overflow();
+}
+
+// Verify that a stack overflow inside a task is caught by the on_exception
+// callback and arrives as fast_task::stack_overflow.
+TEST_F(StackfullGuardTest, StackOverflow_ExceptionCallbackCalled) {
+    std::atomic<bool> handler_called{false};
+    std::atomic<bool> got_stack_overflow{false};
+
+    auto t = std::make_shared<fast_task::task>(
+        [] { recurse_overflow(); },
+        [&](const std::exception_ptr& ep) {
+            handler_called.store(true);
+            try {
+                std::rethrow_exception(ep);
+            } catch (const fast_task::stack_overflow&) {
+                got_stack_overflow.store(true);
+            } catch (...) {
+            }
+        }
+    );
+    fast_task::scheduler::start(t);
+    t->await_task();
+
+    EXPECT_TRUE(handler_called.load()) << "on_exception callback must be called on stack overflow";
+    EXPECT_TRUE(got_stack_overflow.load()) << "Exception must be fast_task::stack_overflow";
+}
+
+// Verify that RAII destructors for objects on the task stack are called even
+// when the stack overflows (i.e., C++ unwinding works through the guard-page
+// recovery path).
+TEST_F(StackfullGuardTest, StackOverflow_RaiiDestructorCalled) {
+    struct DtorGuard {
+        std::atomic<bool>& flag;
+        ~DtorGuard() { flag.store(true, std::memory_order_release); }
+    };
+
+    std::atomic<bool> dtor_called{false};
+    std::atomic<bool> handler_called{false};
+
+    auto t = std::make_shared<fast_task::task>(
+        [&] {
+            // The guard is in the outermost task frame; the unwinder will reach
+            // it while walking up from the overflow point.
+            DtorGuard guard{dtor_called};
+            recurse_overflow(); // never returns normally
+        },
+        [&](const std::exception_ptr&) {
+            handler_called.store(true);
+        }
+    );
+    fast_task::scheduler::start(t);
+    t->await_task();
+
+    EXPECT_TRUE(handler_called.load()) << "Exception handler must be called";
+    EXPECT_TRUE(dtor_called.load()) << "RAII destructor must run during stack-overflow unwind";
+}
+
+#endif // defined(__x86_64__) && defined(__linux__)
diff --git a/third_party/valgrind-stubs/valgrind/memcheck.h b/third_party/valgrind-stubs/valgrind/memcheck.h
new file mode 100644
index 0000000..78f22d5
--- /dev/null
+++ b/third_party/valgrind-stubs/valgrind/memcheck.h
@@ -0,0 +1,12 @@
+// Minimal memcheck stub used when the real Valgrind headers are not installed.
+#ifndef VALGRIND_MEMCHECK_STUB_H
+#define VALGRIND_MEMCHECK_STUB_H
+
+#include "valgrind.h"
+
+#define VALGRIND_MAKE_MEM_DEFINED(addr, size)   do {} while (0)
+#define VALGRIND_MAKE_MEM_UNDEFINED(addr, size) do {} while (0)
+#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)  do {} while (0)
+#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size) ((unsigned int)0)
+
+#endif /* VALGRIND_MEMCHECK_STUB_H */
diff --git a/third_party/valgrind-stubs/valgrind/valgrind.h b/third_party/valgrind-stubs/valgrind/valgrind.h
new file mode 100644
index 0000000..72013ed
--- /dev/null
+++ b/third_party/valgrind-stubs/valgrind/valgrind.h
@@ -0,0 +1,12 @@
+// Minimal valgrind stub used when the real Valgrind headers are not installed.
+// All macros expand to no-ops; RUNNING_ON_VALGRIND is always 0 at runtime.
+#ifndef VALGRIND_STUB_H
+#define VALGRIND_STUB_H
+
+#define RUNNING_ON_VALGRIND 0
+
+#define VALGRIND_STACK_REGISTER(start, end) ((unsigned int)0)
+#define VALGRIND_STACK_DEREGISTER(id)       do {} while (0)
+#define VALGRIND_STACK_CHANGE(id, start, end) do {} while (0)
+
+#endif /* VALGRIND_STUB_H */

From ab12a18b5f33d79cac2af1d43b0d1c90abc7bfc2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 18 May 2026 13:14:47 +0000
Subject: [PATCH 4/8] fix: address code-review issues in guard-page handler

- Check mprotect() return value in signal handler; fall back to old handler on failure
- Validate RBP is within valid (non-guard) stack bounds before dereferencing [RBP+8]
- Check sigaltstack() return value; free alt-stack memory on failure
- Discard stack in unlimited_buffer/limited_buffer if mprotect(PROT_NONE) fails

Agent-Logs-Url: https://github.com/Melnytskyi/fast_task/sessions/7235f5d5-bf5b-4d9b-860b-d2d013dbcf4e

Co-authored-by: Melnytskyi <17990770+Melnytskyi@users.noreply.github.com>
---
 src/tasks/util/light_stack.cpp | 64 +++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/src/tasks/util/light_stack.cpp b/src/tasks/util/light_stack.cpp
index 7b99057..6b6c878 100644
--- a/src/tasks/util/light_stack.cpp
+++ b/src/tasks/util/light_stack.cpp
@@ -146,37 +146,49 @@ namespace fast_task {
             if (exdata && exdata->stack_ptr) {
                 void* fault_addr = si->si_addr;
                 void* stack_bottom = exdata->stack_ptr;
+                uintptr_t stack_top = reinterpret_cast<uintptr_t>(stack_bottom) + exdata->stack_size;
 
                 // Guard page occupies [stack_bottom, stack_bottom + guard_page_size).
                 if (fault_addr >= stack_bottom &&
                     fault_addr < static_cast<char*>(stack_bottom) + guard_page_size) {
                     // Make the guard page accessible so the C++ unwinder has a little
                     // room on the stack to execute landing pads and destructors.
-                    mprotect(stack_bottom, guard_page_size, PROT_READ | PROT_WRITE);
+                    if (mprotect(stack_bottom, guard_page_size, PROT_READ | PROT_WRITE) != 0)
+                        goto pass_handler; // mprotect failed — fall back to default handling
 
 #if defined(__x86_64__)
-                    ucontext_t* uc = static_cast<ucontext_t*>(ctx);
-
-                    // Position RSP at the high end of the (now accessible) guard page,
-                    // simulating a CALL instruction (RSP % 16 == 8, RA slot filled
-                    // with the actual return address so the DWARF unwinder can walk
-                    // through all existing recursion frames on the real stack above).
-                    uintptr_t guard_top = reinterpret_cast<uintptr_t>(stack_bottom) + guard_page_size;
-                    uintptr_t new_rsp = (guard_top & ~static_cast<uintptr_t>(15)) - 8;
-                    // Read the return address that the faulting frame would use so the
-                    // stack-unwind chain is intact: [RBP + sizeof(ptr)] holds the
-                    // return address in x86-64 ABI (after push rbp / mov rbp, rsp).
-                    greg_t rbp = uc->uc_mcontext.gregs[REG_RBP];
-                    uintptr_t ret_addr = *reinterpret_cast<uintptr_t*>(rbp + sizeof(uintptr_t));
-                    *reinterpret_cast<uintptr_t*>(new_rsp) = ret_addr;
-                    uc->uc_mcontext.gregs[REG_RSP] = static_cast<greg_t>(new_rsp);
-                    uc->uc_mcontext.gregs[REG_RIP] = reinterpret_cast<greg_t>(__stack_overflow_raise);
-                    handled = true;
+                    {
+                        ucontext_t* uc = static_cast<ucontext_t*>(ctx);
+
+                        // Position RSP at the high end of the (now accessible) guard page,
+                        // simulating a CALL instruction (RSP % 16 == 8, RA slot filled
+                        // with the actual return address so the DWARF unwinder can walk
+                        // through all existing recursion frames on the real stack above).
+                        uintptr_t guard_top = reinterpret_cast<uintptr_t>(stack_bottom) + guard_page_size;
+                        uintptr_t new_rsp = (guard_top & ~static_cast<uintptr_t>(15)) - 8;
+
+                        // Read the return address that the faulting frame would use so the
+                        // stack-unwind chain is intact: [RBP + sizeof(ptr)] holds the
+                        // return address in x86-64 ABI (after push rbp / mov rbp, rsp).
+                        // Validate RBP points into the valid (non-guard) stack region
+                        // before dereferencing to avoid a second fault inside the handler.
+                        uintptr_t rbp = static_cast<uintptr_t>(uc->uc_mcontext.gregs[REG_RBP]);
+                        uintptr_t ra_addr = rbp + sizeof(uintptr_t);
+                        if (rbp < guard_top || ra_addr + sizeof(uintptr_t) > stack_top)
+                            goto pass_handler; // corrupt frame pointer — fall back
+
+                        uintptr_t ret_addr = *reinterpret_cast<uintptr_t*>(ra_addr);
+                        *reinterpret_cast<uintptr_t*>(new_rsp) = ret_addr;
+                        uc->uc_mcontext.gregs[REG_RSP] = static_cast<greg_t>(new_rsp);
+                        uc->uc_mcontext.gregs[REG_RIP] = reinterpret_cast<greg_t>(__stack_overflow_raise);
+                        handled = true;
+                    }
 #endif
                 }
             }
         }
 
+    pass_handler:
         if (!handled) {
             if (__old_sigsegv_action.sa_flags & SA_SIGINFO)
                 __old_sigsegv_action.sa_sigaction(sig, si, ctx);
@@ -200,7 +212,8 @@ namespace fast_task {
                 ss.ss_sp = alt_mem;
                 ss.ss_size = SIGSTKSZ;
                 ss.ss_flags = 0;
-                sigaltstack(&ss, nullptr);
+                if (sigaltstack(&ss, nullptr) != 0)
+                    munmap(alt_mem, SIGSTKSZ); // best-effort cleanup on failure
             }
             alt_stack_set = true;
         }
@@ -276,7 +289,12 @@ namespace fast_task {
     void unlimited_buffer(stack_context& sctx) {
         // Restore the guard page before returning the stack to the pool so the
         // next task using this stack gets proper overflow detection.
-        mprotect(static_cast<char*>(sctx.sp) - sctx.size, guard_page_size, PROT_NONE);
+        // If restoring protection fails, destroy the stack rather than recycling
+        // an unprotected one.
+        if (mprotect(static_cast<char*>(sctx.sp) - sctx.size, guard_page_size, PROT_NONE) != 0) {
+            destroy_stack(sctx);
+            return;
+        }
         if (!stack_allocations.enqueue(sctx))
             destroy_stack(sctx);
         else
@@ -285,7 +303,11 @@ namespace fast_task {
 
     void limited_buffer(stack_context& sctx) {
         // Restore the guard page before returning the stack to the pool.
-        mprotect(static_cast<char*>(sctx.sp) - sctx.size, guard_page_size, PROT_NONE);
+        // Destroy the stack if protection cannot be restored.
+        if (mprotect(static_cast<char*>(sctx.sp) - sctx.size, guard_page_size, PROT_NONE) != 0) {
+            destroy_stack(sctx);
+            return;
+        }
         if (++stack_allocations_buffer < light_stack::max_buffer_size) {
             if (!stack_allocations.enqueue(sctx)) {
                 destroy_stack(sctx);

From b5b0adec4e95389efcf20d0f509e85e2e0d9c4d9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 18 May 2026 15:20:41 +0000
Subject: [PATCH 5/8] refactor: guard pages as fast-fail, configurable count,
 remove overflow recovery

- Add FAST_TASK_GUARD_PAGE_COUNT cmake option (default 1); passes FT_GUARD_PAGE_COUNT
  to the library so guard page size is configurable at build time; 0 disables mprotect
- Remove stack_overflow exception type from exceptions.hpp
- Linux light_stack: remove SIGSEGV handler machinery (handler, alt-stack, sigaction);
  make __install_signal_handler_mem a no-op; drive guard_page_size from FT_GUARD_PAGE_COUNT;
  guard mprotect(PROT_NONE) in create_stack with #if FT_GUARD_PAGE_COUNT > 0;
  remove mprotect restoration from unlimited_buffer / limited_buffer
- Windows light_stack: drive guard_page_size from FT_GUARD_PAGE_COUNT; guard
  PAGE_GUARD commit with #if FT_GUARD_PAGE_COUNT > 0
- test_stackfull_guard: remove stack-overflow tests; keep 2 exception-callback tests

Agent-Logs-Url: https://github.com/Melnytskyi/fast_task/sessions/463eb9a4-b8be-4088-becb-33ec6fdfbd0e

Co-authored-by: Melnytskyi <17990770+Melnytskyi@users.noreply.github.com>
---
 CMakeLists.txt                           |   5 +
 include/exceptions.hpp                   |   6 -
 src/tasks/util/light_stack.cpp           | 144 ++++-------------------
 tests/stackfull/test_stackfull_guard.cpp |  73 ------------
 4 files changed, 25 insertions(+), 203 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9fb4623..d44dbc0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,9 @@ option(FAST_TASK_ENABLE_ABORT_IF_NEVER_STARTED "Abort if task's destructor calle
 option(FAST_TASK_ENABLE_PREEMPTIVE_SCHEDULER "Enables time sliced preemption for tasks, the tasks should use interrupt_unsafe_region on regions where preemption should be disabled." OFF)
 option(FAST_TASK_INCLUDE_THREAD_INTERRUPT_CODE "Allows the code to stop the thread and execute custom function on top of its stack, doesn't have effect if FAST_TASK_ENABLE_PREEMPTIVE_SCHEDULER enabled" ON)
 
+set(FAST_TASK_GUARD_PAGE_COUNT 1 CACHE STRING
+  "Number of PROT_NONE (Linux) / PAGE_GUARD (Windows) pages placed at the bottom of each task stack. Set to 0 to disable guard pages entirely.")
+
 
 set(FAST_TASK_EXCEPTION_POLICY "NONE" CACHE 
 STRING 
@@ -99,6 +102,8 @@ elseif(FAST_TASK_EXCEPTION_POLICY STREQUAL "PRESERVE")
   target_compile_definitions(fast_task PRIVATE FT_EXCEPTION_POLICY_PRESERVE)
 endif()
 
+target_compile_definitions(fast_task PRIVATE FT_GUARD_PAGE_COUNT=${FAST_TASK_GUARD_PAGE_COUNT})
+
 find_package(Boost COMPONENTS context lockfree)
 if(NOT Boost_FOUND)
   FetchContent_Declare(
diff --git a/include/exceptions.hpp b/include/exceptions.hpp
index 8847fbd..fdb2e18 100644
--- a/include/exceptions.hpp
+++ b/include/exceptions.hpp
@@ -63,12 +63,6 @@ namespace fast_task {
         }
     };
 
-    struct FT_API stack_overflow final : public exception {
-        inline const char* what() const noexcept override {
-            return "Task stack overflow: the task exhausted its stack space.";
-        }
-    };
-
     //this exception should never be catched
     class FT_API task_cancellation {
         bool in_landing = false;
diff --git a/src/tasks/util/light_stack.cpp b/src/tasks/util/light_stack.cpp
index 6b6c878..f7464b9 100644
--- a/src/tasks/util/light_stack.cpp
+++ b/src/tasks/util/light_stack.cpp
@@ -24,6 +24,11 @@ namespace fast_task {
 }
 #if PLATFORM_WINDOWS
     #include <Windows.h>
+
+    #ifndef FT_GUARD_PAGE_COUNT
+        #define FT_GUARD_PAGE_COUNT 1
+    #endif
+
 size_t page_size = []() {
     SYSTEM_INFO si;
     GetSystemInfo(&si);
@@ -32,7 +37,7 @@ size_t page_size = []() {
 
 namespace fast_task {
     stack_context create_stack(size_t size) {
-        const size_t guard_page_size = page_size;
+        const size_t guard_page_size = page_size * FT_GUARD_PAGE_COUNT;
 
         void* vp = ::VirtualAlloc(0, size, MEM_RESERVE, PAGE_READWRITE);
         if (!vp)
@@ -47,12 +52,15 @@ namespace fast_task {
             throw std::bad_alloc();
         }
 
-        // create guard page so the OS can catch page faults and grow our stack
+#if FT_GUARD_PAGE_COUNT > 0
+        // create guard page(s) so the OS can catch stack overflows (fast-fail)
         pPtr -= guard_page_size;
         if (!VirtualAlloc(pPtr, guard_page_size, MEM_COMMIT, PAGE_READWRITE | PAGE_GUARD)) {
             VirtualFree(vp, size, MEM_FREE);
             throw std::bad_alloc();
         }
+#endif
+
         stack_context sctx;
         sctx.size = size;
         sctx.sp = static_cast<char*>(vp) + sctx.size;
@@ -62,7 +70,7 @@ namespace fast_task {
     light_stack::light_stack(size_t size) BOOST_NOEXCEPT_OR_NOTHROW : size(size) {}
 
     stack_context light_stack::allocate() {
-        const size_t guard_page_size = page_size;
+        const size_t guard_page_size = page_size * FT_GUARD_PAGE_COUNT;
         const size_t pages = (size + guard_page_size + page_size - 1) / page_size;
         // add one page at bottom that will be used as guard-page
         const size_t size__ = (pages + 1) * page_size;
@@ -110,123 +118,22 @@ namespace fast_task {
     }
 }
 #elif PLATFORM_LINUX
-    #include <mutex>
-    #include <signal.h>
     #include <sys/mman.h>
     #include <sys/stat.h>
     #include <unistd.h>
     #include <valgrind/memcheck.h>
     #include <valgrind/valgrind.h>
-    #if defined(__x86_64__)
-        #include <ucontext.h>
+
+    #ifndef FT_GUARD_PAGE_COUNT
+        #define FT_GUARD_PAGE_COUNT 1
     #endif
 
 namespace fast_task {
     static const size_t page_size = boost::context::stack_traits::page_size();
-    static const size_t guard_page_size = boost::context::stack_traits::page_size();
-
-    // Called when a stack overflow is detected: resumes (outside signal handler)
-    // on the task's now-accessible guard page and raises the stack_overflow exception.
-    // The C++ exception machinery then unwinds the task's call stack normally,
-    // running all destructors before the catch(...) in context_exec catches it.
-    [[noreturn]] __attribute__((noinline)) static void __stack_overflow_raise() {
-        throw stack_overflow();
-    }
-
-    static struct sigaction __old_sigsegv_action = {};
-
-    static void __sigsegv_handler(int sig, siginfo_t* si, void* ctx) {
-        bool handled = false;
-
-        // Only act when a task is currently executing on this thread and has a stack.
-        if (loc.curr_task) {
-            // Access execution_data directly to avoid any heap allocation inside a signal handler.
-            // Use auto* to avoid naming the private nested type task::execution_data.
-            auto* exdata = get_data(loc.curr_task).exdata;
-            if (exdata && exdata->stack_ptr) {
-                void* fault_addr = si->si_addr;
-                void* stack_bottom = exdata->stack_ptr;
-                uintptr_t stack_top = reinterpret_cast<uintptr_t>(stack_bottom) + exdata->stack_size;
-
-                // Guard page occupies [stack_bottom, stack_bottom + guard_page_size).
-                if (fault_addr >= stack_bottom &&
-                    fault_addr < static_cast<char*>(stack_bottom) + guard_page_size) {
-                    // Make the guard page accessible so the C++ unwinder has a little
-                    // room on the stack to execute landing pads and destructors.
-                    if (mprotect(stack_bottom, guard_page_size, PROT_READ | PROT_WRITE) != 0)
-                        goto pass_handler; // mprotect failed — fall back to default handling
-
-#if defined(__x86_64__)
-                    {
-                        ucontext_t* uc = static_cast<ucontext_t*>(ctx);
-
-                        // Position RSP at the high end of the (now accessible) guard page,
-                        // simulating a CALL instruction (RSP % 16 == 8, RA slot filled
-                        // with the actual return address so the DWARF unwinder can walk
-                        // through all existing recursion frames on the real stack above).
-                        uintptr_t guard_top = reinterpret_cast<uintptr_t>(stack_bottom) + guard_page_size;
-                        uintptr_t new_rsp = (guard_top & ~static_cast<uintptr_t>(15)) - 8;
-
-                        // Read the return address that the faulting frame would use so the
-                        // stack-unwind chain is intact: [RBP + sizeof(ptr)] holds the
-                        // return address in x86-64 ABI (after push rbp / mov rbp, rsp).
-                        // Validate RBP points into the valid (non-guard) stack region
-                        // before dereferencing to avoid a second fault inside the handler.
-                        uintptr_t rbp = static_cast<uintptr_t>(uc->uc_mcontext.gregs[REG_RBP]);
-                        uintptr_t ra_addr = rbp + sizeof(uintptr_t);
-                        if (rbp < guard_top || ra_addr + sizeof(uintptr_t) > stack_top)
-                            goto pass_handler; // corrupt frame pointer — fall back
-
-                        uintptr_t ret_addr = *reinterpret_cast<uintptr_t*>(ra_addr);
-                        *reinterpret_cast<uintptr_t*>(new_rsp) = ret_addr;
-                        uc->uc_mcontext.gregs[REG_RSP] = static_cast<greg_t>(new_rsp);
-                        uc->uc_mcontext.gregs[REG_RIP] = reinterpret_cast<greg_t>(__stack_overflow_raise);
-                        handled = true;
-                    }
-#endif
-                }
-            }
-        }
-
-    pass_handler:
-        if (!handled) {
-            if (__old_sigsegv_action.sa_flags & SA_SIGINFO)
-                __old_sigsegv_action.sa_sigaction(sig, si, ctx);
-            else if (__old_sigsegv_action.sa_handler == SIG_DFL) {
-                signal(sig, SIG_DFL);
-                raise(sig);
-            } else if (__old_sigsegv_action.sa_handler != SIG_IGN)
-                __old_sigsegv_action.sa_handler(sig);
-        }
-    }
+    static const size_t guard_page_size = page_size * FT_GUARD_PAGE_COUNT;
 
     void __install_signal_handler_mem() {
-        // Per-thread: allocate and register a dedicated alternate signal stack so the
-        // SIGSEGV handler can run even when the task's stack is exhausted.
-        static thread_local bool alt_stack_set = false;
-        if (!alt_stack_set) {
-            void* alt_mem = mmap(nullptr, SIGSTKSZ, PROT_READ | PROT_WRITE,
-                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-            if (alt_mem != MAP_FAILED) {
-                stack_t ss;
-                ss.ss_sp = alt_mem;
-                ss.ss_size = SIGSTKSZ;
-                ss.ss_flags = 0;
-                if (sigaltstack(&ss, nullptr) != 0)
-                    munmap(alt_mem, SIGSTKSZ); // best-effort cleanup on failure
-            }
-            alt_stack_set = true;
-        }
-
-        // Process-wide: install the SIGSEGV handler exactly once.
-        static std::once_flag handler_flag;
-        std::call_once(handler_flag, []() {
-            struct sigaction sa;
-            sigemptyset(&sa.sa_mask);
-            sa.sa_sigaction = __sigsegv_handler;
-            sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
-            sigaction(SIGSEGV, &sa, &__old_sigsegv_action);
-        });
+        // Guard pages serve as fast-fail sentinels only; no signal handler is installed.
     }
 
     //create proper guard page
@@ -236,11 +143,14 @@ namespace fast_task {
         if (vp == MAP_FAILED)
             throw std::bad_alloc();
 
-        // Create a PROT_NONE guard page at the bottom of the stack to catch stack overflows
+#if FT_GUARD_PAGE_COUNT > 0
+        // Create PROT_NONE guard page(s) at the bottom of the stack.
+        // A stack overflow will trigger SIGSEGV, terminating the process fast.
         if (mprotect(vp, guard_page_size, PROT_NONE) == -1) {
             munmap(vp, total_size);
             throw std::bad_alloc();
         }
+#endif
 
         if (RUNNING_ON_VALGRIND) {
             void* stack_bottom = static_cast<uint8_t*>(vp) + guard_page_size;
@@ -287,14 +197,6 @@ namespace fast_task {
     }
 
     void unlimited_buffer(stack_context& sctx) {
-        // Restore the guard page before returning the stack to the pool so the
-        // next task using this stack gets proper overflow detection.
-        // If restoring protection fails, destroy the stack rather than recycling
-        // an unprotected one.
-        if (mprotect(static_cast<char*>(sctx.sp) - sctx.size, guard_page_size, PROT_NONE) != 0) {
-            destroy_stack(sctx);
-            return;
-        }
         if (!stack_allocations.enqueue(sctx))
             destroy_stack(sctx);
         else
@@ -302,12 +204,6 @@ namespace fast_task {
     }
 
     void limited_buffer(stack_context& sctx) {
-        // Restore the guard page before returning the stack to the pool.
-        // Destroy the stack if protection cannot be restored.
-        if (mprotect(static_cast<char*>(sctx.sp) - sctx.size, guard_page_size, PROT_NONE) != 0) {
-            destroy_stack(sctx);
-            return;
-        }
         if (++stack_allocations_buffer < light_stack::max_buffer_size) {
             if (!stack_allocations.enqueue(sctx)) {
                 destroy_stack(sctx);
diff --git a/tests/stackfull/test_stackfull_guard.cpp b/tests/stackfull/test_stackfull_guard.cpp
index 60b95b1..470390f 100644
--- a/tests/stackfull/test_stackfull_guard.cpp
+++ b/tests/stackfull/test_stackfull_guard.cpp
@@ -5,7 +5,6 @@
 // http://www.boost.org/LICENSE_1_0.txt)
 
 #include <helpers.hpp>
-#include <exceptions.hpp>
 #include <atomic>
 #include <stdexcept>
 
@@ -65,75 +64,3 @@ TEST_F(StackfullGuardTest, ExceptionCallback_RaiiDestructorCalledOnException) {
     EXPECT_TRUE(handler_called.load()) << "Exception handler must be called";
     EXPECT_TRUE(dtor_called.load()) << "RAII destructor must run before handler";
 }
-
-// ---------------------------------------------------------------------------
-// Stack-overflow tests (Linux x86_64 only — requires the SIGSEGV handler that
-// redirects execution to throw fast_task::stack_overflow)
-// ---------------------------------------------------------------------------
-
-#if defined(__x86_64__) && defined(__linux__)
-
-// Recursive function that deliberately exhausts the task's 1 MB stack.
-// Each frame allocates a 4 kB volatile buffer to accelerate the overflow.
-[[noreturn]] __attribute__((noinline)) static void recurse_overflow() {
-    volatile char buf[4096] = {};
-    (void)buf[0];
-    recurse_overflow();
-}
-
-// Verify that a stack overflow inside a task is caught by the on_exception
-// callback and arrives as fast_task::stack_overflow.
-TEST_F(StackfullGuardTest, StackOverflow_ExceptionCallbackCalled) {
-    std::atomic<bool> handler_called{false};
-    std::atomic<bool> got_stack_overflow{false};
-
-    auto t = std::make_shared<fast_task::task>(
-        [] { recurse_overflow(); },
-        [&](const std::exception_ptr& ep) {
-            handler_called.store(true);
-            try {
-                std::rethrow_exception(ep);
-            } catch (const fast_task::stack_overflow&) {
-                got_stack_overflow.store(true);
-            } catch (...) {
-            }
-        }
-    );
-    fast_task::scheduler::start(t);
-    t->await_task();
-
-    EXPECT_TRUE(handler_called.load()) << "on_exception callback must be called on stack overflow";
-    EXPECT_TRUE(got_stack_overflow.load()) << "Exception must be fast_task::stack_overflow";
-}
-
-// Verify that RAII destructors for objects on the task stack are called even
-// when the stack overflows (i.e., C++ unwinding works through the guard-page
-// recovery path).
-TEST_F(StackfullGuardTest, StackOverflow_RaiiDestructorCalled) {
-    struct DtorGuard {
-        std::atomic<bool>& flag;
-        ~DtorGuard() { flag.store(true, std::memory_order_release); }
-    };
-
-    std::atomic<bool> dtor_called{false};
-    std::atomic<bool> handler_called{false};
-
-    auto t = std::make_shared<fast_task::task>(
-        [&] {
-            // The guard is in the outermost task frame; the unwinder will reach
-            // it while walking up from the overflow point.
-            DtorGuard guard{dtor_called};
-            recurse_overflow(); // never returns normally
-        },
-        [&](const std::exception_ptr&) {
-            handler_called.store(true);
-        }
-    );
-    fast_task::scheduler::start(t);
-    t->await_task();
-
-    EXPECT_TRUE(handler_called.load()) << "Exception handler must be called";
-    EXPECT_TRUE(dtor_called.load()) << "RAII destructor must run during stack-overflow unwind";
-}
-
-#endif // defined(__x86_64__) && defined(__linux__)

From c2a5790d2de02583726155b2c99e0092517e91eb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 18 May 2026 15:47:03 +0000
Subject: [PATCH 6/8] Fix abort_tasks close path, guard-page clearing, and
 config validation

Agent-Logs-Url: https://github.com/Melnytskyi/fast_task/sessions/1508b0a8-36ab-4951-93df-4db9191e6658

Co-authored-by: Melnytskyi <17990770+Melnytskyi@users.noreply.github.com>
---
 CMakeLists.txt                         |  5 +++-
 src/tasks/classes/task/scheduler.cpp   | 28 ++++++++++++++++--
 src/tasks/scheduler.cpp                | 20 ++++++++++---
 src/tasks/util/light_stack.cpp         | 10 +++++--
 tests/scheduler/test_bind_executor.cpp | 39 ++++++++++++++++++++++++++
 5 files changed, 92 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d44dbc0..42c1b7e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,9 @@ option(FAST_TASK_INCLUDE_THREAD_INTERRUPT_CODE "Allows the code to stop the thre
 
 set(FAST_TASK_GUARD_PAGE_COUNT 1 CACHE STRING
   "Number of PROT_NONE (Linux) / PAGE_GUARD (Windows) pages placed at the bottom of each task stack. Set to 0 to disable guard pages entirely.")
+if(NOT FAST_TASK_GUARD_PAGE_COUNT MATCHES "^[0-9]+$")
+  message(FATAL_ERROR "FAST_TASK_GUARD_PAGE_COUNT must be a non-negative integer.")
+endif()
 
 
 set(FAST_TASK_EXCEPTION_POLICY "NONE" CACHE 
@@ -187,7 +190,7 @@ else()
     target_include_directories(fast_task PRIVATE ${VALGRIND_INCLUDE_DIR})
   else()
     target_include_directories(fast_task PRIVATE
-      ${CMAKE_SOURCE_DIR}/third_party/valgrind-stubs)
+      ${CMAKE_CURRENT_SOURCE_DIR}/third_party/valgrind-stubs)
   endif()
 endif()
 
diff --git a/src/tasks/classes/task/scheduler.cpp b/src/tasks/classes/task/scheduler.cpp
index 3f1ede7..2e19979 100644
--- a/src/tasks/classes/task/scheduler.cpp
+++ b/src/tasks/classes/task/scheduler.cpp
@@ -172,8 +172,32 @@ namespace fast_task::scheduler {
             glob.binded_workers.erase(id);
         }
         std::shared_ptr<task> task;
-        while (transfer_tasks.try_dequeue(task))
-            transfer_task(std::move(task));
+        while (transfer_tasks.try_dequeue(task)) {
+            if (!abort_tasks) {
+                transfer_task(std::move(task));
+                continue;
+            }
+            if (!task)
+                continue;
+
+            bool should_decrement = false;
+            {
+                fast_task::lock_guard task_guard(get_data(task).no_race);
+                if (!get_data(task).completed) {
+                    get_data(task).completed = true;
+                    get_data(task).end_of_life = true;
+                    get_data(task).started = true;
+                    should_decrement = true;
+                }
+                get_data(task).result_notify.notify_all();
+            }
+
+            if (should_decrement) {
+                --glob.executing_tasks;
+                fast_task::shared_lock notify_guard(glob.task_thread_safety);
+                glob.no_tasks_execute_notifier.notify_all_guarded();
+            }
+        }
     }
 
     void create_executor(size_t count) {
diff --git a/src/tasks/scheduler.cpp b/src/tasks/scheduler.cpp
index 9d56ac4..1f2f2ad 100644
--- a/src/tasks/scheduler.cpp
+++ b/src/tasks/scheduler.cpp
@@ -753,10 +753,22 @@ namespace fast_task {
                     while (context.tasks.size_approx())
                         while (context.tasks.try_dequeue(loc.curr_task)) {
                             if (context.abort_tasks_on_close) {
-                                fast_task::lock_guard task_guard(get_data(loc.curr_task).no_race);
-                                get_data(loc.curr_task).end_of_life = true;
-                                get_data(loc.curr_task).started = true;
-                                get_data(loc.curr_task).result_notify.notify_all();
+                                bool should_decrement = false;
+                                {
+                                    fast_task::lock_guard task_guard(get_data(loc.curr_task).no_race);
+                                    if (!get_data(loc.curr_task).completed) {
+                                        get_data(loc.curr_task).completed = true;
+                                        get_data(loc.curr_task).end_of_life = true;
+                                        get_data(loc.curr_task).started = true;
+                                        should_decrement = true;
+                                    }
+                                    get_data(loc.curr_task).result_notify.notify_all();
+                                }
+                                if (should_decrement) {
+                                    --glob.executing_tasks;
+                                    fast_task::shared_lock notify_guard(glob.task_thread_safety);
+                                    glob.no_tasks_execute_notifier.notify_all_guarded();
+                                }
                             } else {
                                 get_data(loc.curr_task).bind_to_worker_id = (uint16_t)-1;
                                 glob.tasks.enqueue(std::move(loc.curr_task));
diff --git a/src/tasks/util/light_stack.cpp b/src/tasks/util/light_stack.cpp
index f7464b9..f5823cc 100644
--- a/src/tasks/util/light_stack.cpp
+++ b/src/tasks/util/light_stack.cpp
@@ -81,7 +81,9 @@ namespace fast_task {
             if (!flush_used_stacks)
                 return result;
             else {
-                memset(static_cast<char*>(result.sp) - result.size, 0xCC, result.size);
+                auto* stack_base = static_cast<char*>(result.sp) - result.size;
+                const size_t clear_offset = std::min(guard_page_size, result.size);
+                memset(stack_base + clear_offset, 0xCC, result.size - clear_offset);
                 return result;
             }
         } else
@@ -189,7 +191,9 @@ namespace fast_task {
             if (!flush_used_stacks)
                 return result;
             else {
-                memset(static_cast<char*>(result.sp) - result.size, 0xCC, result.size);
+                auto* stack_base = static_cast<char*>(result.sp) - result.size;
+                const size_t clear_offset = std::min(guard_page_size, result.size);
+                memset(stack_base + clear_offset, 0xCC, result.size - clear_offset);
                 return result;
             }
         } else
@@ -228,4 +232,4 @@ namespace fast_task {
 
 #else
     #error Unsupported platform
-#endif
\ No newline at end of file
+#endif
diff --git a/tests/scheduler/test_bind_executor.cpp b/tests/scheduler/test_bind_executor.cpp
index d60922b..b6bdbbc 100644
--- a/tests/scheduler/test_bind_executor.cpp
+++ b/tests/scheduler/test_bind_executor.cpp
@@ -6,6 +6,8 @@
 
 #include <helpers.hpp>
 #include <atomic>
+#include <chrono>
+#include <thread>
 
 TEST(BindExecutor, CreateAndClose) {
     fast_task::scheduler::create_executor(2);
@@ -68,3 +70,40 @@ TEST(BindExecutor, SetWorkerIdOnTask) {
     fast_task::scheduler::close_bind_only_executor(id);
     fast_task::scheduler::shut_down();
 }
+
+TEST(BindExecutor, CloseAbortTasksAbortsQueuedAndAllowsShutdown) {
+    fast_task::scheduler::create_executor(2);
+    uint16_t id = fast_task::scheduler::create_bind_only_executor(1, true);
+
+    std::atomic<bool> blocker_started{false};
+    std::atomic<bool> blocker_release{false};
+    std::atomic<bool> queued_ran{false};
+
+    auto blocker = std::make_shared<fast_task::task>([&] {
+        blocker_started.store(true, std::memory_order_release);
+        while (!blocker_release.load(std::memory_order_acquire))
+            std::this_thread::yield();
+    });
+    blocker->set_worker_id(id);
+    fast_task::scheduler::start(blocker);
+
+    while (!blocker_started.load(std::memory_order_acquire))
+        std::this_thread::yield();
+
+    auto queued = std::make_shared<fast_task::task>([&] { queued_ran.store(true, std::memory_order_release); });
+    queued->set_worker_id(id);
+    fast_task::scheduler::start(queued);
+
+    fast_task::thread closer([&] { fast_task::scheduler::close_bind_only_executor(id, true); });
+    std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    blocker_release.store(true, std::memory_order_release);
+    closer.join();
+
+    queued->await_task();
+    blocker->await_task();
+
+    EXPECT_FALSE(queued_ran.load(std::memory_order_acquire));
+
+    fast_task::scheduler::await_no_tasks();
+    fast_task::scheduler::shut_down();
+}

From b4c6989b988802c6db95d52de0970871e6bf2d25 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 18 May 2026 15:51:03 +0000
Subject: [PATCH 7/8] Refine abort-close test determinism and guard-page clear
 safety

Agent-Logs-Url: https://github.com/Melnytskyi/fast_task/sessions/1508b0a8-36ab-4951-93df-4db9191e6658

Co-authored-by: Melnytskyi <17990770+Melnytskyi@users.noreply.github.com>
---
 src/tasks/util/light_stack.cpp         |  6 ++++--
 tests/scheduler/test_bind_executor.cpp | 26 ++------------------------
 2 files changed, 6 insertions(+), 26 deletions(-)

diff --git a/src/tasks/util/light_stack.cpp b/src/tasks/util/light_stack.cpp
index f5823cc..e9e09e3 100644
--- a/src/tasks/util/light_stack.cpp
+++ b/src/tasks/util/light_stack.cpp
@@ -83,7 +83,8 @@ namespace fast_task {
             else {
                 auto* stack_base = static_cast<char*>(result.sp) - result.size;
                 const size_t clear_offset = std::min(guard_page_size, result.size);
-                memset(stack_base + clear_offset, 0xCC, result.size - clear_offset);
+                if (clear_offset < result.size)
+                    memset(stack_base + clear_offset, 0xCC, result.size - clear_offset);
                 return result;
             }
         } else
@@ -193,7 +194,8 @@ namespace fast_task {
             else {
                 auto* stack_base = static_cast<char*>(result.sp) - result.size;
                 const size_t clear_offset = std::min(guard_page_size, result.size);
-                memset(stack_base + clear_offset, 0xCC, result.size - clear_offset);
+                if (clear_offset < result.size)
+                    memset(stack_base + clear_offset, 0xCC, result.size - clear_offset);
                 return result;
             }
         } else
diff --git a/tests/scheduler/test_bind_executor.cpp b/tests/scheduler/test_bind_executor.cpp
index b6bdbbc..87c854e 100644
--- a/tests/scheduler/test_bind_executor.cpp
+++ b/tests/scheduler/test_bind_executor.cpp
@@ -6,8 +6,6 @@
 
 #include <helpers.hpp>
 #include <atomic>
-#include <chrono>
-#include <thread>
 
 TEST(BindExecutor, CreateAndClose) {
     fast_task::scheduler::create_executor(2);
@@ -73,35 +71,15 @@ TEST(BindExecutor, SetWorkerIdOnTask) {
 
 TEST(BindExecutor, CloseAbortTasksAbortsQueuedAndAllowsShutdown) {
     fast_task::scheduler::create_executor(2);
-    uint16_t id = fast_task::scheduler::create_bind_only_executor(1, true);
-
-    std::atomic<bool> blocker_started{false};
-    std::atomic<bool> blocker_release{false};
+    uint16_t id = fast_task::scheduler::create_bind_only_executor(0, false);
     std::atomic<bool> queued_ran{false};
 
-    auto blocker = std::make_shared<fast_task::task>([&] {
-        blocker_started.store(true, std::memory_order_release);
-        while (!blocker_release.load(std::memory_order_acquire))
-            std::this_thread::yield();
-    });
-    blocker->set_worker_id(id);
-    fast_task::scheduler::start(blocker);
-
-    while (!blocker_started.load(std::memory_order_acquire))
-        std::this_thread::yield();
-
     auto queued = std::make_shared<fast_task::task>([&] { queued_ran.store(true, std::memory_order_release); });
     queued->set_worker_id(id);
     fast_task::scheduler::start(queued);
 
-    fast_task::thread closer([&] { fast_task::scheduler::close_bind_only_executor(id, true); });
-    std::this_thread::sleep_for(std::chrono::milliseconds(10));
-    blocker_release.store(true, std::memory_order_release);
-    closer.join();
-
+    fast_task::scheduler::close_bind_only_executor(id, true);
     queued->await_task();
-    blocker->await_task();
-
     EXPECT_FALSE(queued_ran.load(std::memory_order_acquire));
 
     fast_task::scheduler::await_no_tasks();

From 3be6541e78d003ea7c147218412ceab3ff458f92 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 18 May 2026 16:48:30 +0000
Subject: [PATCH 8/8] fix: allocate size + guard_page_size to preserve usable
 stack size

Agent-Logs-Url: https://github.com/Melnytskyi/fast_task/sessions/c5f400fb-8b0b-4f7e-8b0d-1be09730581f

Co-authored-by: Melnytskyi <17990770+Melnytskyi@users.noreply.github.com>
---
 src/tasks/util/light_stack.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/tasks/util/light_stack.cpp b/src/tasks/util/light_stack.cpp
index e9e09e3..d84ef25 100644
--- a/src/tasks/util/light_stack.cpp
+++ b/src/tasks/util/light_stack.cpp
@@ -71,9 +71,10 @@ namespace fast_task {
 
     stack_context light_stack::allocate() {
         const size_t guard_page_size = page_size * FT_GUARD_PAGE_COUNT;
-        const size_t pages = (size + guard_page_size + page_size - 1) / page_size;
-        // add one page at bottom that will be used as guard-page
-        const size_t size__ = (pages + 1) * page_size;
+        // Allocate size + guard_page_size so the usable portion is exactly 'size',
+        // regardless of guard page configuration (a large FT_GUARD_PAGE_COUNT would
+        // otherwise consume the entire requested allocation).
+        const size_t size__ = ((size + guard_page_size + page_size - 1) / page_size) * page_size;
 
         stack_context result;
         if (stack_allocations.try_dequeue(result)) {
@@ -182,9 +183,10 @@ namespace fast_task {
     light_stack::light_stack(size_t size) BOOST_NOEXCEPT_OR_NOTHROW : size(size) {}
 
     stack_context light_stack::allocate() {
-        const size_t pages = (size + guard_page_size + page_size - 1) / page_size;
-        // add one page at bottom that will be used as guard-page
-        const size_t size__ = (pages + 1) * page_size;
+        // Allocate size + guard_page_size so the usable portion is exactly 'size',
+        // regardless of guard page configuration (a large FT_GUARD_PAGE_COUNT would
+        // otherwise consume the entire requested allocation).
+        const size_t size__ = ((size + guard_page_size + page_size - 1) / page_size) * page_size;
 
         stack_context result;
         if (stack_allocations.try_dequeue(result)) {