From 04e0e444dff9989da4522dcc295dc18424e90cbd Mon Sep 17 00:00:00 2001 From: Tanimul Haque Khan Date: Fri, 12 Jun 2026 23:27:45 +0600 Subject: [PATCH 1/3] Add watchdog, restart context, and reboot handler Introduce watchdog and improved task lifecycle handling: add active task tracking, per-task timeout resolution (payload key `timeout_minutes`), default/max timeouts, and a watchdog that cancels overdue tasks. Register watchdog from TaskManager. Propagate task contexts so downloads and script execution honor task timeouts. Add graceful subprocess shutdown by setting cmd.WaitDelay (30s) on macOS, Linux and Windows executors so processes have time to exit after cancellation. Implement restart context support: new restartctx package for atomic pending_restart.json handling, write/clear logic in agent-update and a new reboot-device native handler that records restart context before rebooting. Polling now handles restart context on startup and marks the triggering task as success so planned restarts are not treated as interrupted failures. Other changes: minor refactors and registration of activeRunning map with synchronization to manage cancels. (Also removes an extraneous spreadsheet file.) --- Book1(Sheet1).csv | 193 ------------------ internal/service/task/executor.go | 115 +++++++++-- internal/service/task/executor_darwin.go | 3 + internal/service/task/executor_linux.go | 6 + internal/service/task/executor_windows.go | 3 + internal/service/task/manager.go | 2 + internal/service/task/native/agent_update.go | 29 ++- internal/service/task/native/reboot_device.go | 61 ++++++ internal/service/task/polling.go | 44 ++++ .../service/task/restartctx/restartctx.go | 70 +++++++ 10 files changed, 316 insertions(+), 210 deletions(-) delete mode 100644 Book1(Sheet1).csv create mode 100644 internal/service/task/native/reboot_device.go create mode 100644 internal/service/task/restartctx/restartctx.go diff --git a/Book1(Sheet1).csv b/Book1(Sheet1).csv deleted file mode 100644 index 64756b6..0000000 --- a/Book1(Sheet1).csv +++ /dev/null @@ -1,193 +0,0 @@ -Enterprise Security Audit Report,,,,,,, -Audit Section,Check Category,Component / Feature,Current Status,Severity Level,Details / Result,Recommendation,Compliance -Executive Summary,Audit Information,Audit Date,Completed,Info,5/19/2026,Maintain Regular Audits,PASS -Executive Summary,Audit Information,Computer Name,Detected,Info,DESKTOP-01,None,PASS -Executive Summary,Audit Information,Current User,Administrator,Info,Admin Session Active,Use Least Privilege if Possible,PASS -Executive Summary,Security Score,Overall Security Score,92 / 100,Info,Security posture evaluated,Improve failed controls,GOOD -Executive Summary,Compliance Grade,Overall Grade,EXCELLENT,Info,Enterprise Compliance,Maintain Configuration,PASS -Security Product Detection,Antivirus Protection,Third-Party Antivirus,Detected,Warning,Kaspersky Internet Security,Verify policy compliance,WARNING -Security Product Detection,Antivirus Protection,Microsoft Defender Antivirus,Enabled,Info,Defender Active,No Action Required,PASS -Security Product Detection,Antivirus Protection,Real-Time Protection,Enabled,Info,Real-Time Monitoring Running,No Action Required,PASS -Security Product Detection,Antivirus Protection,Tamper Protection,Disabled,Critical,Defender Tamper Protection OFF,Enable Immediately,FAIL -Firewall Protection,Network Security,Domain Firewall,Enabled,Info,Domain Profile Protected,No Action Required,PASS -Firewall Protection,Network Security,Private Firewall,Enabled,Info,Private Profile Protected,No Action Required,PASS -Firewall Protection,Network Security,Public Firewall,Disabled,Critical,Public Network Exposed,Enable Public Firewall,FAIL -Windows Update Status,Patch Compliance,Critical Updates Pending,2 Pending,Critical,Missing Security Updates,Install Immediately,FAIL -Windows Update Status,Patch Compliance,Optional Updates Pending,4 Pending,Warning,Non-Critical Updates Available,Review & Install,WARNING -Windows Update Status,Patch Compliance,Update Compliance,Non-Compliant,Critical,Device Not Fully Updated,Apply Missing Updates,FAIL -Windows Update Status,Update Details,KB5039211,Pending,Critical,Windows Security Update,Install Update,FAIL -Windows Update Status,Update Details,KB5039345,Pending,Warning,.NET Framework Update,Install if Required,WARNING -Hardware & Boot Security,Platform Security,Secure Boot,Enabled,Info,UEFI Secure Boot Active,No Action Required,PASS -Hardware & Boot Security,Platform Security,TPM Present,Yes,Info,TPM 2.0 Available,No Action Required,PASS -Hardware & Boot Security,Platform Security,TPM Ready,Yes,Info,TPM Initialized,No Action Required,PASS -Core Isolation & Exploit Protection,Exploit Protection,Memory Integrity,Disabled,Critical,Core Isolation Disabled,Enable Memory Integrity,FAIL -Core Isolation & Exploit Protection,Exploit Protection,Device Guard,Enabled,Info,Virtualization Security Active,No Action Required,PASS -BitLocker Encryption,Disk Encryption,C:\ Drive,Encrypted,Info,BitLocker Protection Enabled,No Action Required,PASS -BitLocker Encryption,Disk Encryption,D:\ Drive,Not Encrypted,Critical,Data Drive Unprotected,Enable BitLocker,FAIL -Account Protection,User Security,UAC Level,Secure,Info,Recommended UAC Configuration,No Action Required,PASS -Account Protection,Password Policy,Minimum Password Length,8 Characters,Info,Password Policy Applied,Increase to 12+ Recommended,PASS -App & Browser Control,Browser Protection,Microsoft SmartScreen,Enabled,Info,Web Protection Active,No Action Required,PASS -Device Health & Startup,Performance & Security,Startup Programs,15 Programs,Warning,Too Many Startup Applications,Reduce Startup Items,WARNING -Device Health & Startup,Device Health,System Performance,Healthy,Info,Normal Device Operation,Continue Monitoring,PASS -Critical Issues,High Risk Findings,Public Firewall Disabled,Active Issue,Critical,System Exposed on Public Networks,Enable Firewall Immediately,FAIL -Critical Issues,High Risk Findings,Memory Integrity Disabled,Active Issue,Critical,Reduced Kernel Protection,Enable Core Isolation,FAIL -Critical Issues,High Risk Findings,Tamper Protection Disabled,Active Issue,Critical,Defender Can Be Modified,Enable Tamper Protection,FAIL -Warning Findings,Medium Risk Findings,Third-Party Antivirus Detected,Warning,Warning,External AV Installed,Validate Vendor Compliance,WARNING -Warning Findings,Medium Risk Findings,Excess Startup Programs,Warning,Warning,Startup Performance Impact,Disable Unnecessary Apps,WARNING -Final Compliance Report,Security Compliance,Antivirus Compliance,PASS,Info,Antivirus Protection Active,Maintain Security Updates,PASS -Final Compliance Report,Security Compliance,Firewall Compliance,FAIL,Critical,Public Firewall Disabled,Enable All Profiles,FAIL -Final Compliance Report,Security Compliance,Windows Update Compliance,FAIL,Critical,Critical Updates Missing,Patch System Immediately,FAIL -Final Compliance Report,Security Compliance,Hardware Security Compliance,PASS,Info,Secure Boot & TPM Enabled,Maintain Settings,PASS -Final Compliance Report,Security Compliance,Encryption Compliance,WARNING,Warning,One Drive Unencrypted,Encrypt All Drives,WARNING -Final Compliance Report,Security Compliance,Account Protection Compliance,PASS,Info,UAC & Password Policies Applied,Maintain Policies,PASS -Final Compliance Report,Security Compliance,Browser Protection Compliance,PASS,Info,SmartScreen Enabled,Continue Monitoring,PASS -,,,,,,, -Ultimate-Windows-Security,,,,,,, -Audit Section,Check Category,Component / Feature,Current Status,Severity Level,Details / Result,Recommendation,Compliance -Executive Summary,Audit Information,Audit Date,Completed,Info,5/19/2026,Maintain Scheduled Audits,PASS -Executive Summary,Security Assessment,Security Score,95 / 100,Info,Overall System Security Score,Maintain Secure Configuration,GOOD -Executive Summary,Security Assessment,Overall Grade,EXCELLENT,Info,Enterprise Security Level,Continue Monitoring,PASS -Virus & Threat Protection,Antivirus Security,Microsoft Defender Antivirus,Enabled,Info,Antivirus Engine Running,No Action Required,PASS -Virus & Threat Protection,Antivirus Security,Real-Time Protection,Enabled,Info,Real-Time Monitoring Active,No Action Required,PASS -Virus & Threat Protection,Antivirus Security,Tamper Protection,Enabled,Info,Security Settings Protected,No Action Required,PASS -Virus & Threat Protection,Cloud Security,Cloud Protection,Enabled,Info,Cloud-Based Protection Active,No Action Required,PASS -Firewall Protection,Network Security,Domain Firewall,Enabled,Info,Domain Network Protected,No Action Required,PASS -Firewall Protection,Network Security,Private Firewall,Enabled,Info,Private Network Protected,No Action Required,PASS -Firewall Protection,Network Security,Public Firewall,Enabled,Info,Public Network Protected,No Action Required,PASS -Windows Update,Patch Management,Pending Updates,0,Info,System Fully Updated,Maintain Automatic Updates,PASS -Windows Update,Patch Compliance,Update Status,Compliant,Info,No Missing Updates Found,Continue Monitoring,PASS -Hardware Security,Platform Protection,Secure Boot,Enabled,Info,UEFI Secure Boot Active,No Action Required,PASS -Hardware Security,Trusted Platform Module,TPM Present,Yes,Info,TPM Module Available,No Action Required,PASS -Hardware Security,Trusted Platform Module,TPM Ready,Yes,Info,TPM Initialized and Ready,No Action Required,PASS -Core Isolation & Exploit Protection,Virtualization Security,Memory Integrity,Enabled,Info,Kernel Memory Protection Active,No Action Required,PASS -BitLocker Encryption,Disk Protection,C:\ Drive,Encrypted,Info,BitLocker Enabled on System Drive,No Action Required,PASS -BitLocker Encryption,Disk Protection,D:\ Drive,Encrypted,Info,Data Drive Protected,No Action Required,PASS -Account Protection,User Account Control,UAC Status,Secure,Info,Recommended UAC Configuration Applied,No Action Required,PASS -Account Protection,Password Policy,Minimum Password Length,12 Characters,Info,Strong Password Policy Configured,Maintain Security Policy,PASS -SmartScreen & Browser Control,Browser Protection,Microsoft SmartScreen,Enabled,Info,Web & App Reputation Protection Active,No Action Required,PASS -Device Health & Startup,Startup Optimization,Startup Programs,8 Programs (Normal),Info,Startup Applications Within Recommended Range,Continue Monitoring,PASS -Device Health & Startup,Device Performance,System Health,Healthy,Info,Device Operating Normally,No Action Required,PASS -Final Security Report,Compliance Review,Antivirus Compliance,PASS,Info,Antivirus Security Operational,Maintain Updates,PASS -Final Security Report,Compliance Review,Firewall Compliance,PASS,Info,All Firewall Profiles Enabled,Maintain Configuration,PASS -Final Security Report,Compliance Review,Windows Update Compliance,PASS,Info,System Fully Patched,Continue Monitoring,PASS -Final Security Report,Compliance Review,Hardware Security Compliance,PASS,Info,TPM & Secure Boot Enabled,Maintain Settings,PASS -Final Security Report,Compliance Review,Encryption Compliance,PASS,Info,All Drives Encrypted,Maintain BitLocker Keys Securely,PASS -Final Security Report,Compliance Review,Account Protection Compliance,PASS,Info,Secure UAC & Password Policy,Continue Monitoring,PASS -Final Security Report,Compliance Review,Browser Protection Compliance,PASS,Info,SmartScreen Enabled,Maintain Security Policies,PASS -Final Security Report,Compliance Review,Overall Security Compliance,PASS,Info,No Major Security Issues Found,Maintain Best Practices,PASS -,,,,,,, -Ultimate-Security-Audit,,,,,,, -Audit Section,Check Category,Component / Feature,Current Status,Severity Level,Details / Result,Recommendation,Compliance -Executive Summary,Audit Information,Audit Tool,PRODUCTION WINDOWS SECURITY AUDITOR v3.0,Info,Enterprise Ready Security Auditor,Maintain Scheduled Audits,PASS -Executive Summary,Audit Information,Audit Execution,Run as Administrator,Info,Administrative Privileges Active,Continue Using Elevated Access,PASS -Executive Summary,Security Assessment,Security Score,94 / 100,Info,Overall Security Posture Evaluated,Improve Warning Items,GOOD -Executive Summary,Security Assessment,Overall Grade,EXCELLENT,Info,Enterprise Compliance Achieved,Maintain Secure Configuration,PASS -Security Products Detection,Antivirus Security,Third-Party Antivirus,Detected,Warning,Third-Party Security Solution Installed,Verify Vendor Compliance,WARNING -Security Products Detection,Antivirus Security,Third-Party AV Details,Active,Info,Kaspersky / Bitdefender / Norton Detected,Ensure Product is Updated,PASS -Security Products Detection,Antivirus Security,Microsoft Defender Antivirus,Enabled,Info,Defender Antivirus Active,No Action Required,PASS -Security Products Detection,Antivirus Security,Real-Time Protection,Enabled,Info,Real-Time Threat Monitoring Active,No Action Required,PASS -Security Products Detection,Antivirus Security,Tamper Protection,Disabled,Warning,Defender Tamper Protection OFF,Enable Tamper Protection,WARNING -Security Products Detection,Security Architecture,Primary Protection Source,Third-Party AV,Info,Microsoft Defender in Passive Mode,Verify Security Policies,PASS -Firewall Protection,Network Security,Domain Firewall Profile,Enabled,Info,Domain Network Protected,No Action Required,PASS -Firewall Protection,Network Security,Private Firewall Profile,Enabled,Info,Private Network Protected,No Action Required,PASS -Firewall Protection,Network Security,Public Firewall Profile,Enabled,Info,Public Network Protected,No Action Required,PASS -Windows Update Compliance,Patch Management,Windows Update Service,Operational,Info,Update Service Accessible,Continue Monitoring,PASS -Windows Update Compliance,Patch Compliance,Critical Updates Pending,0,Info,No Critical Updates Missing,Maintain Automatic Updates,PASS -Windows Update Compliance,Patch Compliance,Optional Updates Pending,2,Warning,Optional Feature Updates Available,Review and Install if Needed,WARNING -Windows Update Compliance,Compliance Status,Update Compliance State,COMPLIANT,Info,System Fully Updated,Continue Patch Monitoring,PASS -Hardware & Boot Security,Platform Security,Secure Boot,Enabled,Info,UEFI Secure Boot Active,No Action Required,PASS -Hardware & Boot Security,Platform Security,TPM Present,Yes,Info,TPM 2.0 Available,No Action Required,PASS -Hardware & Boot Security,Platform Security,TPM Ready,Yes,Info,TPM Initialized Successfully,No Action Required,PASS -Core Isolation & Exploit Protection,Virtualization Security,Memory Integrity,Enabled,Info,Core Isolation Active,No Action Required,PASS -Core Isolation & Exploit Protection,Exploit Mitigation,Device Guard,Enabled,Info,Virtualization-Based Security Active,Continue Monitoring,PASS -BitLocker Encryption,Disk Encryption,C:\ Drive,Encrypted,Info,Operating System Drive Protected,No Action Required,PASS -BitLocker Encryption,Disk Encryption,D:\ Drive,Encrypted,Info,Data Drive Encrypted,Maintain Recovery Keys Securely,PASS -Account Protection,User Access Control,UAC Security Level,Secure,Info,Recommended UAC Policy Configured,No Action Required,PASS -Account Protection,Password Policy,Minimum Password Length,12 Characters,Info,Strong Password Policy Applied,Maintain Password Standards,PASS -SmartScreen & Browser Control,Browser Security,Microsoft SmartScreen,Enabled,Info,Web Reputation Protection Active,No Action Required,PASS -SmartScreen & Browser Control,Browser Security,Application Reputation Filter,Enabled,Info,Malicious App Blocking Active,Continue Monitoring,PASS -Device Health & Startup,Startup Optimization,Startup Programs,9 Programs (Normal),Info,Startup Load Within Recommended Range,Continue Monitoring,PASS -Device Health & Startup,Device Health,Performance Status,Healthy,Info,No Major Device Health Issues Detected,Maintain System Maintenance,PASS -Critical Issues,Security Findings,Critical Issues Count,0,Info,No Critical Security Issues Found,Continue Best Practices,PASS -Warning Findings,Security Findings,Warning Count,2,Warning,Minor Security Recommendations Present,Review Recommendations,WARNING -Warning Findings,Security Findings,Tamper Protection Disabled,Active Warning,Warning,Reduced Defender Hardening,Enable Tamper Protection,WARNING -Warning Findings,Security Findings,Optional Updates Pending,Active Warning,Warning,Optional Updates Available,Install Recommended Updates,WARNING -Final Security Report,Compliance Review,Antivirus Compliance,PASS,Info,Antivirus Security Operational,Maintain Signature Updates,PASS -Final Security Report,Compliance Review,Firewall Compliance,PASS,Info,All Firewall Profiles Enabled,Continue Monitoring,PASS -Final Security Report,Compliance Review,Windows Update Compliance,PASS,Info,No Critical Updates Missing,Maintain Automatic Updates,PASS -Final Security Report,Compliance Review,Hardware Security Compliance,PASS,Info,Secure Boot & TPM Enabled,Maintain BIOS Security,PASS -Final Security Report,Compliance Review,Encryption Compliance,PASS,Info,All Required Drives Encrypted,Maintain Recovery Keys,PASS -Final Security Report,Compliance Review,Account Protection Compliance,PASS,Info,Secure UAC & Password Policy Applied,Continue Policy Enforcement,PASS -Final Security Report,Compliance Review,Browser Protection Compliance,PASS,Info,SmartScreen Protection Active,Continue Monitoring,PASS -Final Security Report,Compliance Review,Overall Enterprise Compliance,PASS,Info,Enterprise Security Baseline Achieved,Maintain Security Standards,PASS -,,,,,,, -Advanced-Security-Audit,,,,,,, -,,,,,,, -Audit Section,Check Category,Component / Feature,Current Status,Severity Level,Details / Result,Recommendation,Compliance -Executive Summary,Audit Tool,ADVANCED WINDOWS SECURITY AUDITOR v2.1,Active,Info,PowerShell 5.1 Compatible Security Audit Script,Maintain Regular Execution,PASS -Executive Summary,Execution Mode,Administrator Privileges,Enabled,Info,Script Running with Elevated Rights,Continue Running as Admin,PASS -Executive Summary,Security Score,Overall Score,91 / 100,Info,System Security Evaluated Successfully,Improve Weak Areas,GOOD -Executive Summary,Security Grade,Overall Grade,EXCELLENT,Info,Strong Security Posture,Maintain Configuration,PASS -Virus & Threat Protection,Antivirus Protection,Microsoft Defender Antivirus,Enabled,Info,Antivirus Engine Active,No Action Required,PASS -Virus & Threat Protection,Real-Time Protection,Live Protection,Enabled,Info,Continuous Threat Monitoring Active,No Action Required,PASS -Virus & Threat Protection,Tamper Protection,Security Lock,Enabled,Info,Defender Settings Protected from Changes,Keep Enabled,PASS -Virus & Threat Protection,Cloud Protection,Cloud-Based Defense,Enabled,Info,Cloud Threat Intelligence Active,No Action Required,PASS -Firewall & Network Protection,Domain Firewall,Network Protection,Enabled,Info,Domain Network Secured,No Action Required,PASS -Firewall & Network Protection,Private Firewall,Network Protection,Enabled,Info,Private Network Secured,No Action Required,PASS -Firewall & Network Protection,Public Firewall,Network Protection,Enabled,Info,Public Network Secured,No Action Required,PASS -Windows Update Status,Pending Updates,Update Compliance,0 Pending (Fully Updated),Info,System Fully Patched,Maintain Auto Updates,PASS -Device Security,Secure Boot,Boot Security,Enabled,Info,UEFI Secure Boot Active,No Action Required,PASS -Device Security,TPM,Trusted Platform Module,Present & Ready,Info,TPM 2.0 Initialized Successfully,No Action Required,PASS -Device Security,Core Isolation,Memory Integrity,Enabled,Info,Kernel-Level Protection Active,Keep Enabled,PASS -BitLocker Encryption,System Drive,C:\ Drive Encryption,Enabled,Info,OS Drive Fully Encrypted,Maintain Recovery Keys,PASS -BitLocker Encryption,Data Drive,D:\ Drive Encryption,Enabled,Info,Data Drive Fully Encrypted,Maintain Encryption Policy,PASS -UAC & Critical Services,User Account Control,UAC Level,Secure,Info,Administrative Approval Required for Changes,No Action Required,PASS -UAC & Critical Services,WinDefend Service,Security Service,Running,Info,Microsoft Defender Service Active,No Action Required,PASS -UAC & Critical Services,SecurityHealthService,Health Monitoring,Running,Info,Windows Security Health Service Active,No Action Required,PASS -UAC & Critical Services,MpsSvc,Firewall Service,Running,Info,Windows Firewall Service Active,No Action Required,PASS -Critical Issues,System Health,Critical Issues Count,0,Info,No Critical Security Issues Found,Maintain Security Best Practices,PASS -Warning Findings,System Health,Warning Count,0,Info,No Security Warnings Detected,Continue Monitoring,PASS -Final Security Report,Antivirus Compliance,Compliance Status,PASS,Info,Antivirus Fully Operational,Keep Updated,PASS -Final Security Report,Firewall Compliance,Compliance Status,PASS,Info,All Firewall Profiles Active,Maintain Configuration,PASS -Final Security Report,Update Compliance,Compliance Status,PASS,Info,System Fully Updated,Continue Patch Management,PASS -Final Security Report,Device Security Compliance,Compliance Status,PASS,Info,"Secure Boot, TPM & Memory Integrity Enabled",Maintain Settings,PASS -Final Security Report,Encryption Compliance,Compliance Status,PASS,Info,BitLocker Enabled on All Drives,Secure Recovery Keys,PASS -Final Security Report,Service Health Compliance,Compliance Status,PASS,Info,All Critical Services Running,Monitor Continuously,PASS -Recommendations,System Updates,Windows Update,Recommended,Recommendation,Ensure Continuous Patch Management,Enable Auto Updates,WARNING -Recommendations,BIOS Security,Secure Boot,Recommended,Recommendation,Verify Secure Boot Enabled in Firmware,Keep Enabled,WARNING -Recommendations,Disk Encryption,BitLocker,Recommended,Recommendation,Ensure Full Disk Encryption Across Drives,Maintain Encryption Policy,WARNING -Recommendations,Maintenance,Monthly Audit,Recommended,Recommendation,Run Security Audit Regularly,Schedule Monthly Scan,INFO -,,,,,,, -Windows Security Status Checker,,,,,,, -Audit Section,Check Category,Component / Feature,Current Status,Severity Level,Details / Result,Recommendation,Compliance -Virus & Threat Protection,Antivirus,Microsoft Defender Antivirus,ON,Info,Antivirus engine active and running,Keep enabled and updated,PASS -Virus & Threat Protection,Real-Time Protection,Live Protection,ON,Critical,Real-time monitoring active,Keep enabled,PASS -Virus & Threat Protection,Behavior Monitor,Threat Behavior Analysis,ON,Info,Behavior monitoring active,Keep enabled,PASS -Virus & Threat Protection,Tamper Protection,Defender Settings Lock,ON,Info,Tamper protection active,Keep enabled,PASS -Account Protection,Credential Guard,Windows Credential Guard,Enabled,Warning,Credential Guard enabled,Verify Credential Guard & Windows Hello,PARTIAL -Account Protection,Windows Hello / Credential Guard,User Authentication,Check manually,Warning,Needs manual verification in Windows Security,Open Windows Security app,MANUAL -Firewall & Network Protection,Domain Firewall,Domain Network,ENABLED,Info,Domain network firewall active,Maintain configuration,PASS -Firewall & Network Protection,Private Firewall,Private Network,ENABLED,Info,Private network firewall active,Maintain configuration,PASS -Firewall & Network Protection,Public Firewall,Public Network,ENABLED,Info,Public network firewall active,Maintain configuration,PASS -App & Browser Control,SmartScreen,App & File SmartScreen,ON,Info,SmartScreen protection active,Keep enabled,PASS -Device Security,Secure Boot,UEFI Secure Boot,ENABLED,Critical,Secure Boot active,Ensure Secure Boot remains enabled,PASS -Device Security,TPM,Trusted Platform Module Present,Yes,Info,TPM hardware present,Maintain TPM settings,PASS -Device Security,TPM Ready,TPM Initialization Status,Yes,Info,TPM ready for encryption,Maintain TPM,PASS -Device Security,Core Isolation,Memory Integrity,ENABLED,Critical,Kernel-level protection active,Keep Memory Integrity enabled,PASS -Device Performance & Health,Storage Capacity,Disk Health,Manual Check,Info,Check via Windows Security,Verify storage and optimize,MANUAL -Device Performance & Health,Startup Apps,Startup Program Management,Manual Check,Info,Needs manual review,Check startup apps in Windows Security,MANUAL -Device Performance & Health,Recent Activity,Device Health,Manual Check,Info,Review manually,Open Windows Security app,MANUAL -Summary,Overall Status,Security Compliance,COMPLIANT,Info,All major features enabled,Continue monitoring,PASS -Summary,Issues Found,Non-compliance Alerts,0,Info,No major issues detected,Maintain current configuration,PASS -Recommendations,General,Full Windows Security Check,Recommended,Info,Run as Administrator for complete status,Open Windows Security app,INFO -,,,,,,, -Windows Update Compliance Checker,,,,,,, -,,,,,,, -Audit Section,Check Category,Component / Feature,Current Status,Severity Level,Details / Result,Recommendation,Compliance -Windows Update,Pending Updates,System Updates,COMPLIANT / NON-COMPLIANT,Critical,Device has 0 pending updates (COMPLIANT) or X pending updates (NON-COMPLIANT),Install all pending critical updates,PASS / FAIL -Windows Update,Important Updates,Critical Updates,COMPLIANT / NON-COMPLIANT,Critical,Each update marked as Important or Optional; pending critical updates must be applied,Apply all Important updates immediately,PASS / FAIL -Windows Update,Optional Updates,Optional Updates,INFO,Info,Optional updates pending may exist,Apply optional updates as needed,INFO -Windows Update History,Recent Update History,Last 20 Updates,SUCCESS / FAILED / IN PROGRESS,Info / Warning / Critical,"Lists last 20 updates with status: Success, Failed, Aborted, In Progress, Not Started",Monitor failed updates and retry,PASS / FAIL -Windows Update History,Failed Updates,Any Failed Updates,Failed,Critical,Updates that failed to install,Retry failed updates using Windows Update or WSUS,FAIL -Windows Update History,Successful Updates,Successfully Installed Updates,Success,Info,Last updates installed successfully,Maintain regular update schedule,PASS -Windows Update,Error Handling,Update Query,Error,Critical,Script may fail if not run as Administrator or COM object fails,Run script as Administrator,FAIL diff --git a/internal/service/task/executor.go b/internal/service/task/executor.go index 70b93a7..91b0cc9 100644 --- a/internal/service/task/executor.go +++ b/internal/service/task/executor.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "runtime" + "sync" "time" "sentinelgo/internal/config" @@ -17,6 +18,19 @@ import ( "sentinelgo/internal/taskstore" ) +const ( + defaultTaskTimeout = 30 * time.Minute + maxTaskTimeout = 24 * time.Hour + watchdogInterval = 2 * time.Minute + watchdogGrace = 5 * time.Minute +) + +// activeTask tracks an in-flight task for the watchdog. +type activeTask struct { + cancel context.CancelFunc + deadline time.Time +} + // TaskExecutorService handles the execution of tasks assigned to the agent. type TaskExecutorService struct { cfg *config.Config @@ -24,15 +38,19 @@ type TaskExecutorService struct { client *http.Client runningTasks map[string]bool nativeHandlers map[string]NativeTaskHandler + + activeMu sync.Mutex + activeRunning map[string]activeTask // task ID → {cancel, deadline} } // NewTaskExecutorService creates a new task execution service. func NewTaskExecutorService(cfg *config.Config, pollingSvc *TaskPollingService) *TaskExecutorService { s := &TaskExecutorService{ - cfg: cfg, - pollingSvc: pollingSvc, - client: httpx.NewClient(2 * time.Minute), - runningTasks: make(map[string]bool), + cfg: cfg, + pollingSvc: pollingSvc, + client: httpx.NewClient(2 * time.Minute), + runningTasks: make(map[string]bool), + activeRunning: make(map[string]activeTask), } s.registerNativeHandlers() return s @@ -53,6 +71,53 @@ func (s *TaskExecutorService) RunExecutionLoop(ctx context.Context) { } } +// RunWatchdog polls activeRunning every watchdogInterval and cancels any task +// that has been running past its deadline plus watchdogGrace. The cancelled +// context fires taskCtx.Done() in runTask, which uses the existing timeout +// machinery to mark the task failed and report it to the server. +func (s *TaskExecutorService) RunWatchdog(ctx context.Context) { + log.Printf("Watchdog: started (interval=%v, grace=%v)", watchdogInterval, watchdogGrace) + ticker := time.NewTicker(watchdogInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + log.Printf("Watchdog: stopped") + return + case <-ticker.C: + s.cancelOverdueTasks() + } + } +} + +func (s *TaskExecutorService) cancelOverdueTasks() { + now := time.Now() + s.activeMu.Lock() + defer s.activeMu.Unlock() + + for id, t := range s.activeRunning { + if now.After(t.deadline.Add(watchdogGrace)) { + log.Printf("Watchdog: cancelling stuck task %s (deadline was %v ago)", + id, now.Sub(t.deadline).Round(time.Second)) + t.cancel() + // deregisterRunning will be called by the defer in runTask + } + } +} + +func (s *TaskExecutorService) registerRunning(id string, cancel context.CancelFunc, deadline time.Time) { + s.activeMu.Lock() + s.activeRunning[id] = activeTask{cancel: cancel, deadline: deadline} + s.activeMu.Unlock() +} + +func (s *TaskExecutorService) deregisterRunning(id string) { + s.activeMu.Lock() + delete(s.activeRunning, id) + s.activeMu.Unlock() +} + // ExecutePendingTasks finds locally stored 'assigned' tasks and runs them. func (s *TaskExecutorService) ExecutePendingTasks(ctx context.Context) { tasks, err := s.pollingSvc.GetLocalTasks() @@ -106,14 +171,37 @@ func (s *TaskExecutorService) executeTask(ctx context.Context, task taskstore.Ta } } -func (s *TaskExecutorService) runTask(ctx context.Context, task taskstore.Task) (string, error) { - if handler, ok := s.nativeHandlers[task.Slug]; ok { - return handler(ctx, task) +// resolveTaskTimeout returns the timeout for a task. If the task payload +// contains a "timeout_minutes" key (float64 > 0), that value is used, capped +// at maxTaskTimeout. Otherwise defaultTaskTimeout applies. +func resolveTaskTimeout(task taskstore.Task) time.Duration { + if v, ok := task.Payload["timeout_minutes"]; ok { + if mins, ok := v.(float64); ok && mins > 0 { + d := time.Duration(mins * float64(time.Minute)) + if d > maxTaskTimeout { + d = maxTaskTimeout + } + return d + } } + return defaultTaskTimeout +} - timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) +func (s *TaskExecutorService) runTask(ctx context.Context, task taskstore.Task) (string, error) { + timeout := resolveTaskTimeout(task) + taskCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() + // Register with the watchdog. If the task exceeds its deadline plus the + // grace period the watchdog will call cancel(), firing taskCtx.Done() and + // causing the select below (or the native handler) to return an error. + s.registerRunning(task.ID, cancel, time.Now().Add(timeout)) + defer s.deregisterRunning(task.ID) + + if handler, ok := s.nativeHandlers[task.Slug]; ok { + return handler(taskCtx, task) + } + scriptPath, scriptName, err := s.resolveScript(task) if err != nil { return "", err @@ -130,7 +218,7 @@ func (s *TaskExecutorService) runTask(ctx context.Context, task taskstore.Task) }() localScriptPath := filepath.Join(tempDir, scriptName) - if err := s.downloadScript(timeoutCtx, scriptPath, localScriptPath); err != nil { + if err := s.downloadScript(taskCtx, scriptPath, localScriptPath); err != nil { return "", fmt.Errorf("download script: %w", err) } @@ -147,7 +235,7 @@ func (s *TaskExecutorService) runTask(ctx context.Context, task taskstore.Task) }, 1) go func() { - output, err := s.executeLocalScript(timeoutCtx, localScriptPath, payloadPath) + output, err := s.executeLocalScript(taskCtx, localScriptPath, payloadPath) resultChan <- struct { output string err error @@ -155,8 +243,11 @@ func (s *TaskExecutorService) runTask(ctx context.Context, task taskstore.Task) }() select { - case <-timeoutCtx.Done(): - timeoutNote := fmt.Sprintf("Task execution timed out after 10 minutes. Task ID: %s, Slug: %s. The task was forcefully stopped.", task.ID, task.Slug) + case <-taskCtx.Done(): + timeoutNote := fmt.Sprintf( + "Task exceeded timeout of %v. Task ID: %s, Slug: %s. Forcefully stopped.", + timeout.Round(time.Second), task.ID, task.Slug, + ) log.Printf("Executor: %s", timeoutNote) return timeoutNote, fmt.Errorf("task execution timeout") case result := <-resultChan: diff --git a/internal/service/task/executor_darwin.go b/internal/service/task/executor_darwin.go index 10a8097..42b3d64 100644 --- a/internal/service/task/executor_darwin.go +++ b/internal/service/task/executor_darwin.go @@ -6,6 +6,7 @@ import ( "os" "os/exec" "path/filepath" + "time" ) // executeLocalScript runs the task script on macOS. @@ -29,6 +30,8 @@ func (s *TaskExecutorService) executeLocalScript(ctx context.Context, scriptPath cmd = exec.CommandContext(ctx, scriptPath, payloadPath) } + cmd.WaitDelay = 30 * time.Second + output, err := cmd.CombinedOutput() return string(output), err } diff --git a/internal/service/task/executor_linux.go b/internal/service/task/executor_linux.go index 721154c..d2c38bf 100644 --- a/internal/service/task/executor_linux.go +++ b/internal/service/task/executor_linux.go @@ -8,6 +8,7 @@ import ( "os/exec" "path/filepath" "strings" + "time" ) // executeLocalScript runs the task script on Linux, optionally using sudo for @@ -64,6 +65,11 @@ func (s *TaskExecutorService) executeLocalScript(ctx context.Context, scriptPath } } + // Give the subprocess 30 s to exit cleanly after context cancellation + // before escalating to SIGKILL, so processes that ignore SIGTERM don't + // block the goroutine indefinitely. + cmd.WaitDelay = 30 * time.Second + output, err := cmd.CombinedOutput() return string(output), err } diff --git a/internal/service/task/executor_windows.go b/internal/service/task/executor_windows.go index 9d89e05..2ae39d2 100644 --- a/internal/service/task/executor_windows.go +++ b/internal/service/task/executor_windows.go @@ -4,6 +4,7 @@ import ( "context" "os/exec" "path/filepath" + "time" ) // executeLocalScript runs the task script on Windows using PowerShell or cmd. @@ -18,6 +19,8 @@ func (s *TaskExecutorService) executeLocalScript(ctx context.Context, scriptPath cmd = exec.CommandContext(ctx, "cmd", "/C", scriptPath, payloadPath) } + cmd.WaitDelay = 30 * time.Second + output, err := cmd.CombinedOutput() return string(output), err } diff --git a/internal/service/task/manager.go b/internal/service/task/manager.go index 7bd6d09..24cb221 100644 --- a/internal/service/task/manager.go +++ b/internal/service/task/manager.go @@ -106,6 +106,8 @@ func (tm *TaskManager) runSequentialTaskLoop(ctx context.Context) { mainInterval := pollingInterval log.Printf("TaskManager: Starting sequential task loop with interval: %v", mainInterval) + go tm.executorSvc.RunWatchdog(ctx) + log.Printf("TaskManager: Initial polling...") tm.pollTasks(ctx) diff --git a/internal/service/task/native/agent_update.go b/internal/service/task/native/agent_update.go index 60e7814..940b895 100644 --- a/internal/service/task/native/agent_update.go +++ b/internal/service/task/native/agent_update.go @@ -6,9 +6,11 @@ import ( "log" "os" "runtime" + "time" "sentinelgo/internal/config" "sentinelgo/internal/sanitize" + "sentinelgo/internal/service/task/restartctx" "sentinelgo/internal/taskstore" "sentinelgo/internal/updater" ) @@ -49,14 +51,31 @@ func (h *agentUpdateHandler) Run(ctx context.Context, cfg *config.Config, task t sanitizedCurrentVersion := sanitize.ForLog(currentVersion) log.Printf("Executor: Current version: %s, checking for updates...", sanitizedCurrentVersion) + // Write the restart context before calling the updater. If CheckAndApply + // finds an update it calls os.Exit(), so this file is the only way the + // restarted binary knows which task to mark as success. + ctxPath := restartctx.PathFor(cfg.Path) + if err := restartctx.Write(ctxPath, restartctx.Context{ + TaskID: task.ID, + Reason: "agent-update", + FromVersion: currentVersion, + InitiatedAt: time.Now().UTC(), + }); err != nil { + log.Printf("Executor: Warning - failed to write restart context: %v", err) + } + if err := updater.CheckAndApplyWithRetry(ctx, cfg, ""); err != nil { + // Update failed or was not needed; remove the context so the next + // startup does not incorrectly mark this task as success. + _, _ = restartctx.ReadAndClear(ctxPath) log.Printf("Executor: Agent-update failed: %v", err) return fmt.Sprintf("Update failed: %v (Current version: %s)", err, sanitizedCurrentVersion), err } - newVersion := cfg.CurrentVersion - sanitizedNewVersion := sanitize.ForLog(newVersion) - log.Printf("Executor: Agent-update successful, updated from %s to %s", sanitizedCurrentVersion, sanitizedNewVersion) - - return fmt.Sprintf("Successfully updated from %s to %s. Agent is restarting.", sanitizedCurrentVersion, sanitizedNewVersion), nil + // Reached here only when no update was available (already up to date). + // CheckAndApplyWithRetry calls os.Exit() on a successful update, so this + // line is never reached in the update case. + _, _ = restartctx.ReadAndClear(ctxPath) + log.Printf("Executor: Agent is already up to date (version %s)", sanitizedCurrentVersion) + return fmt.Sprintf("Already up to date (version %s).", sanitizedCurrentVersion), nil } diff --git a/internal/service/task/native/reboot_device.go b/internal/service/task/native/reboot_device.go new file mode 100644 index 0000000..2c705bf --- /dev/null +++ b/internal/service/task/native/reboot_device.go @@ -0,0 +1,61 @@ +package native + +import ( + "context" + "fmt" + "log" + "os" + "os/exec" + "runtime" + "time" + + "sentinelgo/internal/config" + "sentinelgo/internal/service/task/restartctx" + "sentinelgo/internal/taskstore" +) + +type rebootDeviceHandler struct{} + +func init() { Register(&rebootDeviceHandler{}) } + +func (h *rebootDeviceHandler) Slugs() []string { + return []string{"reboot-device"} +} + +func (h *rebootDeviceHandler) Run(ctx context.Context, cfg *config.Config, task taskstore.Task) (string, error) { + log.Printf("Executor: Executing reboot-device task %s", task.ID) + + if runtime.GOOS == "linux" && os.Getuid() != 0 { + return "", fmt.Errorf("reboot-device requires root privileges. Please restart the agent with sudo") + } + + ctxPath := restartctx.PathFor(cfg.Path) + if err := restartctx.Write(ctxPath, restartctx.Context{ + TaskID: task.ID, + Reason: "device-reboot", + InitiatedAt: time.Now().UTC(), + }); err != nil { + log.Printf("Executor: Warning - failed to write restart context: %v", err) + } + + if err := triggerReboot(); err != nil { + _, _ = restartctx.ReadAndClear(ctxPath) + return "", fmt.Errorf("reboot failed: %w", err) + } + + log.Printf("Executor: Device reboot initiated for task %s", task.ID) + return "Device reboot initiated.", nil +} + +func triggerReboot() error { + var cmd *exec.Cmd + switch runtime.GOOS { + case "linux", "darwin": + cmd = exec.Command("shutdown", "-r", "now") + case "windows": + cmd = exec.Command("shutdown", "/r", "/t", "0") + default: + return fmt.Errorf("unsupported OS: %s", runtime.GOOS) + } + return cmd.Run() +} diff --git a/internal/service/task/polling.go b/internal/service/task/polling.go index cd77307..5b8a5a2 100644 --- a/internal/service/task/polling.go +++ b/internal/service/task/polling.go @@ -9,6 +9,7 @@ import ( "sentinelgo/internal/config" "sentinelgo/internal/network" + "sentinelgo/internal/service/task/restartctx" "sentinelgo/internal/store" "sentinelgo/internal/taskstore" ) @@ -92,8 +93,51 @@ func (s *TaskPollingService) MarkTaskExecuting(taskID string) error { return s.store.MarkTaskExecuting(taskID) } +// handleRestartContext reads pending_restart.json (if present) and marks the +// triggering task as success before ResetInterruptedTasks can mark it failed. +// The file is written by the agent-update and reboot-device handlers immediately +// before os.Exit() / reboot, so its presence on startup means the operation +// completed — the binary was replaced or the device came back up. +func (s *TaskPollingService) handleRestartContext() { + if s.cfg == nil || s.cfg.Path == "" { + return + } + + rc, err := restartctx.ReadAndClear(restartctx.PathFor(s.cfg.Path)) + if err != nil { + log.Printf("TaskPolling: failed to read restart context: %v", err) + return + } + if rc == nil { + return + } + + var msg string + switch rc.Reason { + case "agent-update": + msg = fmt.Sprintf("Agent updated from %s to %s and restarted successfully.", + rc.FromVersion, config.Version) + case "device-reboot": + msg = "Device rebooted successfully. Agent is back online." + default: + msg = fmt.Sprintf("Agent restarted (reason: %s).", rc.Reason) + } + + // isSynced=false so SyncPendingTasks (called later in PollAndStoreTasks) + // will push the result to the server on the next poll. + if err := s.store.UpdateTaskStatus(rc.TaskID, "success", msg, false); err != nil { + log.Printf("TaskPolling: failed to mark restart-context task %s as success: %v", rc.TaskID, err) + return + } + log.Printf("TaskPolling: task %s marked success (restart reason: %s)", rc.TaskID, rc.Reason) +} + // PollAndStoreTasks fetches tasks from RPC and stores them in SQLite. func (s *TaskPollingService) PollAndStoreTasks(ctx context.Context) error { + // Resolve any task that intentionally triggered this restart before the + // generic interrupted-task cleanup runs below. + s.handleRestartContext() + // Clean up tasks that were mid-execution when the agent last died. They are // marked 'failed (interrupted)' with is_synced=0 so SyncPendingTasks below // will report them to the server. attempt_count is maxed so they are never diff --git a/internal/service/task/restartctx/restartctx.go b/internal/service/task/restartctx/restartctx.go new file mode 100644 index 0000000..4c6b5b2 --- /dev/null +++ b/internal/service/task/restartctx/restartctx.go @@ -0,0 +1,70 @@ +package restartctx + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" +) + +// Context records the task that triggered a planned agent restart or device +// reboot. Written to disk before os.Exit() so the restarted binary can mark +// the task as success instead of letting ResetInterruptedTasks mark it failed. +type Context struct { + TaskID string `json:"task_id"` + Reason string `json:"reason"` // "agent-update" | "device-reboot" + FromVersion string `json:"from_version"` // populated for agent-update + InitiatedAt time.Time `json:"initiated_at"` +} + +// Write atomically writes rc to path via a temp file in the same directory. +func Write(path string, rc Context) error { + data, err := json.Marshal(rc) + if err != nil { + return fmt.Errorf("marshal restart context: %w", err) + } + + tmp := path + ".tmp" + // #nosec G306 - file contains only task metadata, no secrets + if err := os.WriteFile(tmp, data, 0600); err != nil { + return fmt.Errorf("write restart context: %w", err) + } + + if err := os.Rename(tmp, path); err != nil { + _ = os.Remove(tmp) + return fmt.Errorf("rename restart context: %w", err) + } + + return nil +} + +// ReadAndClear reads the context file at path, removes it, and returns the +// parsed Context. Returns nil, nil if the file does not exist (normal startup). +func ReadAndClear(path string) (*Context, error) { + data, err := os.ReadFile(path) // #nosec G304 - path is a controlled internal path + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read restart context: %w", err) + } + + if removeErr := os.Remove(path); removeErr != nil { + // Log-worthy but not fatal; proceed with the data we have. + _ = removeErr + } + + var rc Context + if err := json.Unmarshal(data, &rc); err != nil { + return nil, fmt.Errorf("parse restart context: %w", err) + } + + return &rc, nil +} + +// PathFor returns the canonical pending_restart.json path given the agent's +// config file path (e.g. /opt/sentinelgo/.sentinelgo/config.json). +func PathFor(configPath string) string { + return filepath.Join(filepath.Dir(configPath), "pending_restart.json") +} From 9648c19ff1a474e0af7403203ede3124f23c977f Mon Sep 17 00:00:00 2001 From: Tanimul Haque Khan Date: Sat, 13 Jun 2026 00:39:41 +0600 Subject: [PATCH 2/3] Add tests and injectable hooks for task handlers Add a large suite of unit tests for httpx, sanitize, task executor, native task handlers (agent-update, reboot-device, sync-inventory, sync-software), registry and polling restart logic. Introduce injectable function variables to improve testability and avoid OS/network side effects: checkAndApplyFn, rebootFn, collectSysInfoFn, updateAgentInfoFn, sendSoftwareFn, getSoftwareListFn. Update sync handlers to use the injected functions and adjust polling to report retryable tasks before resetting old failures. Various helper tests added to exercise timeout, cancellation, restart-context and concurrency edge cases. --- internal/httpx/client_test.go | 73 ++++++ internal/sanitize/sanitize_test.go | 91 +++++++ .../service/task/executor_internal_test.go | 191 +++++++++++++++ internal/service/task/executor_linux_test.go | 48 ++++ internal/service/task/native/agent_update.go | 7 +- .../service/task/native/agent_update_test.go | 181 ++++++++++++++ internal/service/task/native/reboot_device.go | 5 +- .../service/task/native/reboot_device_test.go | 161 +++++++++++++ internal/service/task/native/registry_test.go | 125 ++++++++++ .../service/task/native/sync_inventory.go | 12 +- .../task/native/sync_inventory_test.go | 171 +++++++++++++ internal/service/task/native/sync_software.go | 14 +- .../service/task/native/sync_software_test.go | 107 +++++++++ internal/service/task/native_handlers_test.go | 148 ++++++++++++ internal/service/task/polling.go | 12 + internal/service/task/polling_restart_test.go | 211 ++++++++++++++++ .../task/restartctx/restartctx_test.go | 226 ++++++++++++++++++ internal/store/tasks.go | 57 ++++- internal/store/tasks_test.go | 6 +- internal/winsec/winsec_test.go | 48 ++++ 20 files changed, 1876 insertions(+), 18 deletions(-) create mode 100644 internal/httpx/client_test.go create mode 100644 internal/sanitize/sanitize_test.go create mode 100644 internal/service/task/executor_internal_test.go create mode 100644 internal/service/task/executor_linux_test.go create mode 100644 internal/service/task/native/agent_update_test.go create mode 100644 internal/service/task/native/reboot_device_test.go create mode 100644 internal/service/task/native/registry_test.go create mode 100644 internal/service/task/native/sync_inventory_test.go create mode 100644 internal/service/task/native/sync_software_test.go create mode 100644 internal/service/task/native_handlers_test.go create mode 100644 internal/service/task/polling_restart_test.go create mode 100644 internal/service/task/restartctx/restartctx_test.go create mode 100644 internal/winsec/winsec_test.go diff --git a/internal/httpx/client_test.go b/internal/httpx/client_test.go new file mode 100644 index 0000000..3b7dcba --- /dev/null +++ b/internal/httpx/client_test.go @@ -0,0 +1,73 @@ +package httpx_test + +import ( + "net/http" + "testing" + "time" + + "sentinelgo/internal/httpx" +) + +func TestNewClient_ReturnsNonNil(t *testing.T) { + c := httpx.NewClient(30 * time.Second) + if c == nil { + t.Fatal("NewClient returned nil") + } +} + +func TestNewClient_TimeoutIsSet(t *testing.T) { + timeout := 45 * time.Second + c := httpx.NewClient(timeout) + if c.Timeout != timeout { + t.Errorf("Timeout: got %v, want %v", c.Timeout, timeout) + } +} + +func TestNewClient_ZeroTimeout(t *testing.T) { + c := httpx.NewClient(0) + if c == nil { + t.Fatal("NewClient(0) returned nil") + } + if c.Timeout != 0 { + t.Errorf("Timeout with zero: got %v, want 0", c.Timeout) + } +} + +func TestNewClient_HasCustomTransport(t *testing.T) { + c := httpx.NewClient(10 * time.Second) + if c.Transport == nil { + t.Fatal("expected a custom Transport, got nil (would use default which lacks pool tuning)") + } +} + +func TestNewClient_TransportIsHTTPTransport(t *testing.T) { + c := httpx.NewClient(10 * time.Second) + tr, ok := c.Transport.(*http.Transport) + if !ok { + t.Fatalf("expected *http.Transport, got %T", c.Transport) + } + if tr.MaxIdleConns == 0 { + t.Error("MaxIdleConns should be non-zero for connection reuse") + } + if tr.MaxIdleConnsPerHost == 0 { + t.Error("MaxIdleConnsPerHost should be non-zero for connection reuse") + } + if tr.IdleConnTimeout == 0 { + t.Error("IdleConnTimeout should be non-zero") + } +} + +func TestNewClient_DifferentTimeouts_IndependentClients(t *testing.T) { + c1 := httpx.NewClient(10 * time.Second) + c2 := httpx.NewClient(60 * time.Second) + + if c1.Timeout != 10*time.Second { + t.Errorf("c1 timeout: got %v, want 10s", c1.Timeout) + } + if c2.Timeout != 60*time.Second { + t.Errorf("c2 timeout: got %v, want 60s", c2.Timeout) + } + if c1 == c2 { + t.Error("NewClient should return independent client instances") + } +} diff --git a/internal/sanitize/sanitize_test.go b/internal/sanitize/sanitize_test.go new file mode 100644 index 0000000..988a04e --- /dev/null +++ b/internal/sanitize/sanitize_test.go @@ -0,0 +1,91 @@ +package sanitize_test + +import ( + "strings" + "testing" + + "sentinelgo/internal/sanitize" +) + +func TestForLog_PlainString_Unchanged(t *testing.T) { + input := "v1.2.3" + got := sanitize.ForLog(input) + if got != input { + t.Errorf("ForLog(%q) = %q, want %q", input, got, input) + } +} + +func TestForLog_Empty_ReturnsEmpty(t *testing.T) { + got := sanitize.ForLog("") + if got != "" { + t.Errorf("ForLog(\"\") = %q, want empty string", got) + } +} + +func TestForLog_StripNewline(t *testing.T) { + input := "line1\nline2" + got := sanitize.ForLog(input) + if strings.Contains(got, "\n") { + t.Errorf("ForLog should strip newlines; got: %q", got) + } +} + +func TestForLog_StripCarriageReturn(t *testing.T) { + input := "value\r\ninjected" + got := sanitize.ForLog(input) + if strings.Contains(got, "\r") || strings.Contains(got, "\n") { + t.Errorf("ForLog should strip \\r and \\n; got: %q", got) + } +} + +func TestForLog_StripCRLF(t *testing.T) { + input := "INFO level\r\nERROR injected" + got := sanitize.ForLog(input) + if strings.Contains(got, "\r") || strings.Contains(got, "\n") { + t.Errorf("ForLog should strip CRLF; got: %q", got) + } + if !strings.Contains(got, "INFO level") || !strings.Contains(got, "ERROR injected") { + t.Errorf("ForLog should preserve non-whitespace content; got: %q", got) + } +} + +func TestForLog_MultipleNewlines(t *testing.T) { + input := "a\nb\nc\n\n" + got := sanitize.ForLog(input) + if strings.Contains(got, "\n") { + t.Errorf("ForLog should strip all newlines; got: %q", got) + } +} + +func TestForLog_OnlyNewlines(t *testing.T) { + got := sanitize.ForLog("\n\r\n\r") + if got != "" { + t.Errorf("ForLog with only whitespace characters: got %q, want empty string", got) + } +} + +func TestForLog_PreservesOtherContent(t *testing.T) { + input := "version=v2.0.0 arch=amd64 os=linux" + got := sanitize.ForLog(input) + if got != input { + t.Errorf("ForLog should preserve content without newlines; got %q, want %q", got, input) + } +} + +func TestForLog_LogInjectionPrevention(t *testing.T) { + // Simulate an attacker-controlled value that tries to inject a log line. + malicious := "v1.0.0\n2025-01-01 00:00:00 ERROR: injected log entry" + got := sanitize.ForLog(malicious) + lines := strings.Split(got, "\n") + if len(lines) > 1 { + t.Errorf("ForLog should prevent log injection; got %d lines: %q", len(lines), got) + } +} + +func TestForLog_TabAndSpacePreserved(t *testing.T) { + input := "key\tvalue spaced" + got := sanitize.ForLog(input) + if got != input { + t.Errorf("ForLog should preserve tabs and spaces; got %q, want %q", got, input) + } +} diff --git a/internal/service/task/executor_internal_test.go b/internal/service/task/executor_internal_test.go new file mode 100644 index 0000000..e4f98ac --- /dev/null +++ b/internal/service/task/executor_internal_test.go @@ -0,0 +1,191 @@ +package task + +// White-box tests for unexported executor functions. +// Package task (not task_test) is required to access resolveTaskTimeout and cancelOverdueTasks. + +import ( + "context" + "sync" + "testing" + "time" + + "sentinelgo/internal/taskstore" +) + +func TestResolveTaskTimeout_Default(t *testing.T) { + task := taskstore.Task{Payload: map[string]interface{}{}} + got := resolveTaskTimeout(task) + if got != defaultTaskTimeout { + t.Errorf("resolveTaskTimeout with no payload: got %v, want %v", got, defaultTaskTimeout) + } +} + +func TestResolveTaskTimeout_FromPayload(t *testing.T) { + task := taskstore.Task{ + Payload: map[string]interface{}{"timeout_minutes": float64(60)}, + } + got := resolveTaskTimeout(task) + if got != 60*time.Minute { + t.Errorf("resolveTaskTimeout(60 min): got %v, want %v", got, 60*time.Minute) + } +} + +func TestResolveTaskTimeout_CappedAtMax(t *testing.T) { + task := taskstore.Task{ + Payload: map[string]interface{}{"timeout_minutes": float64(9999)}, + } + got := resolveTaskTimeout(task) + if got != maxTaskTimeout { + t.Errorf("resolveTaskTimeout(9999 min): got %v, want %v (maxTaskTimeout)", got, maxTaskTimeout) + } +} + +func TestResolveTaskTimeout_ZeroUsesDefault(t *testing.T) { + task := taskstore.Task{ + Payload: map[string]interface{}{"timeout_minutes": float64(0)}, + } + got := resolveTaskTimeout(task) + if got != defaultTaskTimeout { + t.Errorf("resolveTaskTimeout(0): got %v, want default %v", got, defaultTaskTimeout) + } +} + +func TestResolveTaskTimeout_NegativeUsesDefault(t *testing.T) { + task := taskstore.Task{ + Payload: map[string]interface{}{"timeout_minutes": float64(-5)}, + } + got := resolveTaskTimeout(task) + if got != defaultTaskTimeout { + t.Errorf("resolveTaskTimeout(-5): got %v, want default %v", got, defaultTaskTimeout) + } +} + +func TestResolveTaskTimeout_FractionalMinutes(t *testing.T) { + task := taskstore.Task{ + Payload: map[string]interface{}{"timeout_minutes": float64(0.5)}, + } + got := resolveTaskTimeout(task) + if got != 30*time.Second { + t.Errorf("resolveTaskTimeout(0.5 min): got %v, want 30s", got) + } +} + +func TestResolveTaskTimeout_WrongType_UsesDefault(t *testing.T) { + task := taskstore.Task{ + Payload: map[string]interface{}{"timeout_minutes": "not-a-number"}, + } + got := resolveTaskTimeout(task) + if got != defaultTaskTimeout { + t.Errorf("resolveTaskTimeout(string value): got %v, want default %v", got, defaultTaskTimeout) + } +} + +func TestResolveTaskTimeout_MissingKey_UsesDefault(t *testing.T) { + task := taskstore.Task{ + Payload: map[string]interface{}{"something_else": float64(10)}, + } + got := resolveTaskTimeout(task) + if got != defaultTaskTimeout { + t.Errorf("resolveTaskTimeout(missing key): got %v, want default %v", got, defaultTaskTimeout) + } +} + +func TestCancelOverdueTasks_CancelsStuckTask(t *testing.T) { + s := &TaskExecutorService{ + activeRunning: make(map[string]activeTask), + } + + cancelled := make(chan struct{}, 1) + ctx, cancelFn := context.WithCancel(context.Background()) + _ = ctx + + // Wrap the real cancel to detect it was called. + wrappedCancel := func() { + cancelled <- struct{}{} + cancelFn() + } + + // Set deadline in the past (past deadline + grace). + pastDeadline := time.Now().Add(-(watchdogGrace + time.Second)) + s.activeRunning["stuck-task"] = activeTask{cancel: wrappedCancel, deadline: pastDeadline} + + s.cancelOverdueTasks() + + select { + case <-cancelled: + // correct: stuck task was cancelled + default: + t.Error("cancelOverdueTasks should have cancelled the stuck task") + } +} + +func TestCancelOverdueTasks_DoesNotCancelActiveTask(t *testing.T) { + s := &TaskExecutorService{ + activeRunning: make(map[string]activeTask), + } + + cancelled := false + // Set deadline in the future. + futureDeadline := time.Now().Add(10 * time.Minute) + s.activeRunning["active-task"] = activeTask{ + cancel: func() { cancelled = true }, + deadline: futureDeadline, + } + + s.cancelOverdueTasks() + + if cancelled { + t.Error("cancelOverdueTasks must not cancel a task that is within its deadline + grace period") + } +} + +func TestCancelOverdueTasks_EmptyMap(t *testing.T) { + s := &TaskExecutorService{ + activeRunning: make(map[string]activeTask), + } + // Should not panic on empty map. + s.cancelOverdueTasks() +} + +func TestRegisterAndDeregisterRunning(t *testing.T) { + s := &TaskExecutorService{ + activeMu: sync.Mutex{}, + activeRunning: make(map[string]activeTask), + } + + deadline := time.Now().Add(5 * time.Minute) + s.registerRunning("task-1", func() {}, deadline) + + s.activeMu.Lock() + if _, ok := s.activeRunning["task-1"]; !ok { + t.Error("registerRunning should add task to activeRunning") + } + s.activeMu.Unlock() + + s.deregisterRunning("task-1") + + s.activeMu.Lock() + if _, ok := s.activeRunning["task-1"]; ok { + t.Error("deregisterRunning should remove task from activeRunning") + } + s.activeMu.Unlock() +} + +func TestRegisterRunning_ConcurrentSafety(t *testing.T) { + s := &TaskExecutorService{ + activeMu: sync.Mutex{}, + activeRunning: make(map[string]activeTask), + } + + var wg sync.WaitGroup + for i := 0; i < 20; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + key := string(rune('a' + id)) + s.registerRunning(key, func() {}, time.Now().Add(time.Minute)) + s.deregisterRunning(key) + }(i) + } + wg.Wait() +} diff --git a/internal/service/task/executor_linux_test.go b/internal/service/task/executor_linux_test.go new file mode 100644 index 0000000..1cdc0f2 --- /dev/null +++ b/internal/service/task/executor_linux_test.go @@ -0,0 +1,48 @@ +//go:build linux + +package task + +// White-box tests for Linux-specific executor logic. +// Package task (not task_test) to access unexported containsPrivilegedCommands. + +import "testing" + +func TestContainsPrivilegedCommands(t *testing.T) { + tests := []struct { + name string + script string + want bool + }{ + {name: "empty script", script: "", want: false}, + {name: "plain echo", script: "echo hello", want: false}, + {name: "python print", script: "print('hello')", want: false}, + {name: "systemctl", script: "systemctl restart nginx", want: true}, + {name: "service command", script: "service apache2 stop", want: true}, + {name: "modprobe", script: "modprobe kvm", want: true}, + {name: "insmod", script: "insmod /lib/modules/module.ko", want: true}, + {name: "rmmod", script: "rmmod usb_storage", want: true}, + {name: "mount command", script: "mount /dev/sdb1 /mnt", want: true}, + {name: "umount command", script: "umount /mnt/data", want: true}, + {name: "chown", script: "chown root:root /etc/passwd", want: true}, + {name: "chmod 777", script: "chmod 777 /tmp/file", want: true}, + {name: "chmod 755", script: "chmod 755 /usr/local/bin/app", want: true}, + {name: "write to /etc/", script: "echo 'data' > /etc/hosts.conf", want: true}, + {name: "write to /sys/", script: "echo 1 > /sys/kernel/something", want: true}, + {name: "read from /proc/", script: "cat /proc/cpuinfo", want: true}, + {name: "iptables", script: "iptables -A INPUT -p tcp --dport 80 -j ACCEPT", want: true}, + {name: "sysctl", script: "sysctl -w net.ipv4.ip_forward=1", want: true}, + {name: "multiline with privilege", script: "#!/bin/bash\necho start\nsystemctl reload nginx\necho done", want: true}, + {name: "multiline without privilege", script: "#!/bin/bash\necho hello\nls -la\ncat README.md", want: false}, + {name: "commented-out privilege", script: "# systemctl restart service\necho safe", want: true}, // comments not stripped; this is intentional + {name: "chmod 644 not privileged pattern", script: "chmod 644 file.txt", want: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := containsPrivilegedCommands(tt.script) + if got != tt.want { + t.Errorf("containsPrivilegedCommands(%q) = %v, want %v", tt.script, got, tt.want) + } + }) + } +} diff --git a/internal/service/task/native/agent_update.go b/internal/service/task/native/agent_update.go index 940b895..9824631 100644 --- a/internal/service/task/native/agent_update.go +++ b/internal/service/task/native/agent_update.go @@ -15,6 +15,11 @@ import ( "sentinelgo/internal/updater" ) +// checkAndApplyFn is the updater entry point. Replaced in tests. +var checkAndApplyFn = func(ctx context.Context, cfg *config.Config, token string) error { + return updater.CheckAndApplyWithRetry(ctx, cfg, token) +} + type agentUpdateHandler struct{} func init() { Register(&agentUpdateHandler{}) } @@ -64,7 +69,7 @@ func (h *agentUpdateHandler) Run(ctx context.Context, cfg *config.Config, task t log.Printf("Executor: Warning - failed to write restart context: %v", err) } - if err := updater.CheckAndApplyWithRetry(ctx, cfg, ""); err != nil { + if err := checkAndApplyFn(ctx, cfg, ""); err != nil { // Update failed or was not needed; remove the context so the next // startup does not incorrectly mark this task as success. _, _ = restartctx.ReadAndClear(ctxPath) diff --git a/internal/service/task/native/agent_update_test.go b/internal/service/task/native/agent_update_test.go new file mode 100644 index 0000000..835a783 --- /dev/null +++ b/internal/service/task/native/agent_update_test.go @@ -0,0 +1,181 @@ +package native + +// White-box tests for the agent-update handler. +// Using package native (not native_test) to access checkAndApplyFn. + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + "testing" + + "sentinelgo/internal/config" + "sentinelgo/internal/service/task/restartctx" + "sentinelgo/internal/taskstore" +) + +func testCfgWithDir(t *testing.T) *config.Config { + t.Helper() + dir := t.TempDir() + return &config.Config{ + Path: filepath.Join(dir, "config.json"), + SupabaseURL: "https://test.supabase.co", + CurrentVersion: "v1.0.0", + } +} + +func TestAgentUpdateHandler_Slugs(t *testing.T) { + h := &agentUpdateHandler{} + slugs := h.Slugs() + if len(slugs) == 0 { + t.Fatal("Slugs() returned empty slice") + } + found := false + for _, s := range slugs { + if s == "agent-update" { + found = true + } + } + if !found { + t.Errorf("Slugs() does not contain 'agent-update': %v", slugs) + } +} + +func TestAgentUpdateHandler_PostRun(t *testing.T) { + h := &agentUpdateHandler{} + postSlugs := h.PostRun() + if len(postSlugs) == 0 { + t.Fatal("PostRun() returned empty slice") + } + found := false + for _, s := range postSlugs { + if s == "sync-inventory" { + found = true + } + } + if !found { + t.Errorf("PostRun() does not contain 'sync-inventory': %v", postSlugs) + } +} + +func TestAgentUpdateHandler_LinuxNonRoot_ReturnsError(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("Linux-only test") + } + if os.Getuid() == 0 { + t.Skip("test requires non-root user") + } + + h := &agentUpdateHandler{} + cfg := testCfgWithDir(t) + task := taskstore.Task{ID: "test-task"} + + _, err := h.Run(context.Background(), cfg, task) + if err == nil { + t.Fatal("expected error when running as non-root on Linux") + } + + // Restart context should NOT be written when the privilege check fails. + ctxPath := restartctx.PathFor(cfg.Path) + if _, statErr := os.Stat(ctxPath); !os.IsNotExist(statErr) { + t.Error("restart context should not be written when privilege check fails") + } +} + +func TestAgentUpdateHandler_UpdaterFails_ClearsRestartContext(t *testing.T) { + if runtime.GOOS == "linux" && os.Getuid() != 0 { + t.Skip("skipping: Linux non-root would fail before reaching updater") + } + + orig := checkAndApplyFn + defer func() { checkAndApplyFn = orig }() + + updateErr := errors.New("github rate limited") + checkAndApplyFn = func(_ context.Context, _ *config.Config, _ string) error { + return updateErr + } + + h := &agentUpdateHandler{} + cfg := testCfgWithDir(t) + task := taskstore.Task{ID: "fail-task"} + + note, err := h.Run(context.Background(), cfg, task) + if err == nil { + t.Fatal("expected error when updater fails") + } + if !errors.Is(err, updateErr) { + t.Errorf("expected wrapped updateErr, got: %v", err) + } + if note == "" { + t.Error("expected non-empty note on failure") + } + + // Restart context must be cleared after a failed update. + ctxPath := restartctx.PathFor(cfg.Path) + if _, statErr := os.Stat(ctxPath); !os.IsNotExist(statErr) { + t.Error("restart context should be cleared after updater failure") + } +} + +func TestAgentUpdateHandler_AlreadyUpToDate_ClearsRestartContext(t *testing.T) { + if runtime.GOOS == "linux" && os.Getuid() != 0 { + t.Skip("skipping: Linux non-root would fail before reaching updater") + } + + orig := checkAndApplyFn + defer func() { checkAndApplyFn = orig }() + + checkAndApplyFn = func(_ context.Context, _ *config.Config, _ string) error { + return nil // nil = no update available (already up to date) + } + + h := &agentUpdateHandler{} + cfg := testCfgWithDir(t) + task := taskstore.Task{ID: "up-to-date-task"} + + note, err := h.Run(context.Background(), cfg, task) + if err != nil { + t.Fatalf("expected no error when already up to date, got: %v", err) + } + if note == "" { + t.Error("expected non-empty success note") + } + + // Restart context must be cleared when no update was applied. + ctxPath := restartctx.PathFor(cfg.Path) + if _, statErr := os.Stat(ctxPath); !os.IsNotExist(statErr) { + t.Error("restart context should be cleared when no update was needed") + } +} + +func TestAgentUpdateHandler_RestartContextWrittenBeforeUpdater(t *testing.T) { + if runtime.GOOS == "linux" && os.Getuid() != 0 { + t.Skip("skipping: Linux non-root would fail before reaching updater") + } + + orig := checkAndApplyFn + defer func() { checkAndApplyFn = orig }() + + var contextPathAtCallTime string + checkAndApplyFn = func(_ context.Context, cfg *config.Config, _ string) error { + // Capture whether the restart context file exists when the updater is called. + contextPathAtCallTime = restartctx.PathFor(cfg.Path) + return fmt.Errorf("simulated failure") + } + + h := &agentUpdateHandler{} + cfg := testCfgWithDir(t) + task := taskstore.Task{ID: "ctx-write-task"} + + _, _ = h.Run(context.Background(), cfg, task) + + // We can only verify the path was computed; the file was cleared by the + // failure path. The key invariant (context written before updater called) + // is expressed by the test structure: checkAndApplyFn captures the path. + if contextPathAtCallTime == "" { + t.Error("restart context path should have been derived before updater was called") + } +} diff --git a/internal/service/task/native/reboot_device.go b/internal/service/task/native/reboot_device.go index 2c705bf..4d6882b 100644 --- a/internal/service/task/native/reboot_device.go +++ b/internal/service/task/native/reboot_device.go @@ -14,6 +14,9 @@ import ( "sentinelgo/internal/taskstore" ) +// rebootFn is the OS-reboot entry point. Replaced in tests. +var rebootFn = triggerReboot + type rebootDeviceHandler struct{} func init() { Register(&rebootDeviceHandler{}) } @@ -38,7 +41,7 @@ func (h *rebootDeviceHandler) Run(ctx context.Context, cfg *config.Config, task log.Printf("Executor: Warning - failed to write restart context: %v", err) } - if err := triggerReboot(); err != nil { + if err := rebootFn(); err != nil { _, _ = restartctx.ReadAndClear(ctxPath) return "", fmt.Errorf("reboot failed: %w", err) } diff --git a/internal/service/task/native/reboot_device_test.go b/internal/service/task/native/reboot_device_test.go new file mode 100644 index 0000000..c55077f --- /dev/null +++ b/internal/service/task/native/reboot_device_test.go @@ -0,0 +1,161 @@ +package native + +// White-box tests for the reboot-device handler. +// Using package native (not native_test) to access rebootFn. + +import ( + "context" + "errors" + "os" + "path/filepath" + "runtime" + "testing" + + "sentinelgo/internal/config" + "sentinelgo/internal/service/task/restartctx" + "sentinelgo/internal/taskstore" +) + +func testRebootCfg(t *testing.T) *config.Config { + t.Helper() + dir := t.TempDir() + return &config.Config{ + Path: filepath.Join(dir, "config.json"), + SupabaseURL: "https://test.supabase.co", + } +} + +func TestRebootDeviceHandler_Slugs(t *testing.T) { + h := &rebootDeviceHandler{} + slugs := h.Slugs() + found := false + for _, s := range slugs { + if s == "reboot-device" { + found = true + } + } + if !found { + t.Errorf("Slugs() does not contain 'reboot-device': %v", slugs) + } +} + +func TestRebootDeviceHandler_LinuxNonRoot_ReturnsError(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("Linux-only test") + } + if os.Getuid() == 0 { + t.Skip("test requires non-root user") + } + + h := &rebootDeviceHandler{} + cfg := testRebootCfg(t) + + _, err := h.Run(context.Background(), cfg, taskstore.Task{ID: "priv-task"}) + if err == nil { + t.Fatal("expected privilege error on Linux as non-root") + } + + // No restart context should be written when the privilege check fails. + ctxPath := restartctx.PathFor(cfg.Path) + if _, statErr := os.Stat(ctxPath); !os.IsNotExist(statErr) { + t.Error("restart context must not be written when privilege check fails") + } +} + +func TestRebootDeviceHandler_RebootSuccess_WritesContextAndReturnsSuccess(t *testing.T) { + if runtime.GOOS == "linux" && os.Getuid() != 0 { + t.Skip("skipping: Linux non-root would fail before reaching reboot") + } + + orig := rebootFn + defer func() { rebootFn = orig }() + rebootFn = func() error { return nil } + + h := &rebootDeviceHandler{} + cfg := testRebootCfg(t) + task := taskstore.Task{ID: "reboot-ok"} + + note, err := h.Run(context.Background(), cfg, task) + if err != nil { + t.Fatalf("expected no error on successful reboot, got: %v", err) + } + if note == "" { + t.Error("expected non-empty success note") + } + + // The restart context file should exist after a successful reboot call so + // the agent can mark the task success after coming back up. + ctxPath := restartctx.PathFor(cfg.Path) + rc, err := restartctx.ReadAndClear(ctxPath) + if err != nil { + t.Fatalf("ReadAndClear: %v", err) + } + if rc == nil { + t.Fatal("restart context should exist after successful reboot initiation") + } + if rc.TaskID != task.ID { + t.Errorf("restart context TaskID: got %q, want %q", rc.TaskID, task.ID) + } + if rc.Reason != "device-reboot" { + t.Errorf("restart context Reason: got %q, want %q", rc.Reason, "device-reboot") + } +} + +func TestRebootDeviceHandler_RebootFails_ClearsContext(t *testing.T) { + if runtime.GOOS == "linux" && os.Getuid() != 0 { + t.Skip("skipping: Linux non-root would fail before reaching reboot") + } + + orig := rebootFn + defer func() { rebootFn = orig }() + rebootErr := errors.New("shutdown: permission denied") + rebootFn = func() error { return rebootErr } + + h := &rebootDeviceHandler{} + cfg := testRebootCfg(t) + + _, err := h.Run(context.Background(), cfg, taskstore.Task{ID: "reboot-fail"}) + if err == nil { + t.Fatal("expected error when reboot command fails") + } + + // Restart context must be cleared so the next startup does not mark a + // non-existent task as success. + ctxPath := restartctx.PathFor(cfg.Path) + if _, statErr := os.Stat(ctxPath); !os.IsNotExist(statErr) { + t.Error("restart context should be cleared after reboot failure") + } +} + +func TestRebootDeviceHandler_RestartContextWrittenBeforeReboot(t *testing.T) { + if runtime.GOOS == "linux" && os.Getuid() != 0 { + t.Skip("skipping: Linux non-root would fail before reaching reboot") + } + + orig := rebootFn + defer func() { rebootFn = orig }() + + var ctxExistedAtReboot bool + rebootFn = func() error { + // The restart context must already exist when reboot is triggered + // so it survives the shutdown. + return nil + } + + h := &rebootDeviceHandler{} + cfg := testRebootCfg(t) + task := taskstore.Task{ID: "order-test"} + + // We intercept rebootFn and check the file exists at that moment. + rebootFn = func() error { + ctxPath := restartctx.PathFor(cfg.Path) + _, err := os.Stat(ctxPath) + ctxExistedAtReboot = err == nil + return nil + } + + _, _ = h.Run(context.Background(), cfg, task) + if !ctxExistedAtReboot { + t.Error("restart context must be written before the reboot command is issued") + } +} diff --git a/internal/service/task/native/registry_test.go b/internal/service/task/native/registry_test.go new file mode 100644 index 0000000..787c26a --- /dev/null +++ b/internal/service/task/native/registry_test.go @@ -0,0 +1,125 @@ +package native_test + +import ( + "context" + "testing" + + "sentinelgo/internal/config" + "sentinelgo/internal/service/task/native" + "sentinelgo/internal/taskstore" +) + +func TestRegistry_NotEmpty(t *testing.T) { + handlers := native.Registry() + if len(handlers) == 0 { + t.Fatal("Registry() returned no handlers; expected at least one registered handler") + } +} + +func TestRegistry_ContainsAllExpectedSlugs(t *testing.T) { + expectedSlugs := []string{ + "agent-update", + "reboot-device", + "sync-inventory", + "sync-software", + } + for _, slug := range expectedSlugs { + h := native.Find(slug) + if h == nil { + t.Errorf("Find(%q) returned nil; handler should be registered", slug) + } + } +} + +func TestFind_ReturnsNilForUnknownSlug(t *testing.T) { + h := native.Find("no-such-slug-xyz-999") + if h != nil { + t.Errorf("Find(unknown slug) returned non-nil: %v", h) + } +} + +func TestRegistry_SlugUniqueness(t *testing.T) { + seen := make(map[string]string) // slug → handler type + for _, h := range native.Registry() { + for _, slug := range h.Slugs() { + if prev, exists := seen[slug]; exists { + t.Errorf("slug %q is registered by multiple handlers: %T and %s", slug, h, prev) + } + seen[slug] = "" + } + } +} + +func TestRegistry_ReturnsCopy(t *testing.T) { + r1 := native.Registry() + if len(r1) == 0 { + t.Skip("registry is empty, nothing to test") + } + original := r1[0] + r1[0] = nil + + r2 := native.Registry() + if r2[0] == nil { + t.Error("Registry() returned a shared underlying slice; mutation of one result affected another") + } + if r2[0] != original { + t.Error("Registry() returned unexpected first element after mutation test") + } +} + +func TestFind_ReturnsCorrectHandler(t *testing.T) { + slugs := []string{"agent-update", "reboot-device", "sync-inventory", "sync-software"} + for _, slug := range slugs { + h := native.Find(slug) + if h == nil { + t.Errorf("Find(%q) returned nil", slug) + continue + } + found := false + for _, s := range h.Slugs() { + if s == slug { + found = true + break + } + } + if !found { + t.Errorf("Find(%q) returned a handler whose Slugs() does not contain %q: %v", slug, slug, h.Slugs()) + } + } +} + +func TestRegistry_AllHandlersImplementInterface(t *testing.T) { + for _, h := range native.Registry() { + if len(h.Slugs()) == 0 { + t.Errorf("handler %T returned empty Slugs()", h) + } + // Verify Run signature is callable (compile-time check via interface, no network call). + var _ native.Handler = h + } +} + +func TestPostRunnerHandlers_HaveValidPostSlugs(t *testing.T) { + for _, h := range native.Registry() { + pr, ok := h.(native.PostRunner) + if !ok { + continue + } + for _, postSlug := range pr.PostRun() { + ph := native.Find(postSlug) + if ph == nil { + t.Errorf("handler %T declares post-run slug %q but no handler is registered for it", h, postSlug) + } + } + } +} + +// Verify that native.Handler and native.PostRunner are the interfaces the +// package expects — compile-time guard using a local test-only implementation. +type noopHandler struct{} + +func (noopHandler) Slugs() []string { return []string{"noop"} } +func (noopHandler) Run(_ context.Context, _ *config.Config, _ taskstore.Task) (string, error) { + return "", nil +} + +var _ native.Handler = noopHandler{} diff --git a/internal/service/task/native/sync_inventory.go b/internal/service/task/native/sync_inventory.go index 80ef5c2..cf34fc2 100644 --- a/internal/service/task/native/sync_inventory.go +++ b/internal/service/task/native/sync_inventory.go @@ -14,6 +14,13 @@ import ( const syncInventoryTimeout = 90 * time.Second +// collectSysInfoFn and updateAgentInfoFn are the real production implementations; +// replaced in tests to avoid network calls and OS-level collection. +var collectSysInfoFn = func() *shared.SystemInfo { return osinfo.Collect() } +var updateAgentInfoFn = func(ctx context.Context, cfg *config.Config, info *shared.SystemInfo) error { + return agentsvc.NewAgentService().UpdateAgentInfo(ctx, cfg, info) +} + type syncInventoryHandler struct{} func init() { Register(&syncInventoryHandler{}) } @@ -28,7 +35,7 @@ func (h *syncInventoryHandler) Run(ctx context.Context, cfg *config.Config, _ ta type collectResult struct{ info *shared.SystemInfo } ch := make(chan collectResult, 1) - go func() { ch <- collectResult{osinfo.Collect()} }() + go func() { ch <- collectResult{collectSysInfoFn()} }() var sysInfo *shared.SystemInfo select { @@ -42,8 +49,7 @@ func (h *syncInventoryHandler) Run(ctx context.Context, cfg *config.Config, _ ta return "", fmt.Errorf("sync-inventory: system info collection returned no data") } - agentSvc := agentsvc.NewAgentService() - if err := agentSvc.UpdateAgentInfo(tctx, cfg, sysInfo); err != nil { + if err := updateAgentInfoFn(tctx, cfg, sysInfo); err != nil { return "", fmt.Errorf("sync-inventory: %w", err) } return "inventory synced successfully", nil diff --git a/internal/service/task/native/sync_inventory_test.go b/internal/service/task/native/sync_inventory_test.go new file mode 100644 index 0000000..0175a05 --- /dev/null +++ b/internal/service/task/native/sync_inventory_test.go @@ -0,0 +1,171 @@ +package native + +// White-box tests for the sync-inventory handler. +// Using package native (not native_test) to access collectSysInfoFn and updateAgentInfoFn. + +import ( + "context" + "errors" + "path/filepath" + "testing" + + "sentinelgo/internal/config" + "sentinelgo/internal/osinfo/shared" + "sentinelgo/internal/taskstore" +) + +func testInvCfg(t *testing.T) *config.Config { + t.Helper() + return &config.Config{ + Path: filepath.Join(t.TempDir(), "config.json"), + SupabaseURL: "https://test.supabase.co", + AgentID: "test-agent-id", + } +} + +func TestSyncInventoryHandler_Slugs(t *testing.T) { + h := &syncInventoryHandler{} + slugs := h.Slugs() + found := false + for _, s := range slugs { + if s == "sync-inventory" { + found = true + } + } + if !found { + t.Errorf("Slugs() does not contain 'sync-inventory': %v", slugs) + } +} + +func TestSyncInventoryHandler_CollectorReturnsNil_Error(t *testing.T) { + origCollect := collectSysInfoFn + origUpdate := updateAgentInfoFn + defer func() { + collectSysInfoFn = origCollect + updateAgentInfoFn = origUpdate + }() + + collectSysInfoFn = func() *shared.SystemInfo { return nil } + updateAgentInfoFn = func(_ context.Context, _ *config.Config, _ *shared.SystemInfo) error { + t.Error("UpdateAgentInfo should not be called when collect returns nil") + return nil + } + + h := &syncInventoryHandler{} + _, err := h.Run(context.Background(), testInvCfg(t), taskstore.Task{}) + if err == nil { + t.Fatal("expected error when collector returns nil") + } +} + +func TestSyncInventoryHandler_UpdateAgentInfoFails_ReturnsError(t *testing.T) { + origCollect := collectSysInfoFn + origUpdate := updateAgentInfoFn + defer func() { + collectSysInfoFn = origCollect + updateAgentInfoFn = origUpdate + }() + + collectSysInfoFn = func() *shared.SystemInfo { return &shared.SystemInfo{} } + + updateErr := errors.New("supabase unavailable") + updateAgentInfoFn = func(_ context.Context, _ *config.Config, _ *shared.SystemInfo) error { + return updateErr + } + + h := &syncInventoryHandler{} + _, err := h.Run(context.Background(), testInvCfg(t), taskstore.Task{}) + if err == nil { + t.Fatal("expected error when UpdateAgentInfo fails") + } + if !errors.Is(err, updateErr) { + t.Errorf("expected wrapped updateErr, got: %v", err) + } +} + +func TestSyncInventoryHandler_Success(t *testing.T) { + origCollect := collectSysInfoFn + origUpdate := updateAgentInfoFn + defer func() { + collectSysInfoFn = origCollect + updateAgentInfoFn = origUpdate + }() + + collectSysInfoFn = func() *shared.SystemInfo { return &shared.SystemInfo{Hostname: "test-host"} } + updateAgentInfoFn = func(_ context.Context, _ *config.Config, _ *shared.SystemInfo) error { return nil } + + h := &syncInventoryHandler{} + note, err := h.Run(context.Background(), testInvCfg(t), taskstore.Task{}) + if err != nil { + t.Fatalf("expected no error on success, got: %v", err) + } + if note == "" { + t.Error("expected non-empty success note") + } +} + +func TestSyncInventoryHandler_Timeout(t *testing.T) { + origCollect := collectSysInfoFn + origUpdate := updateAgentInfoFn + defer func() { + collectSysInfoFn = origCollect + updateAgentInfoFn = origUpdate + }() + + // Use a pre-cancelled context to simulate a timeout without actually waiting 90s. + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + // The collector never returns, so the handler must respect the context cancellation. + // But since syncInventoryTimeout creates its own context, we need to use a + // blocking collector + expired parent context. + blocking := make(chan struct{}) + collectSysInfoFn = func() *shared.SystemInfo { + <-blocking // blocks forever + return nil + } + updateAgentInfoFn = func(_ context.Context, _ *config.Config, _ *shared.SystemInfo) error { return nil } + + h := &syncInventoryHandler{} + _, err := h.Run(ctx, testInvCfg(t), taskstore.Task{}) + // Close the blocking channel to unblock the goroutine and prevent a goroutine leak. + close(blocking) + + if err == nil { + t.Fatal("expected error when context is cancelled before collection completes") + } +} + +func TestSyncInventoryHandler_GoroutineDoesNotLeak(t *testing.T) { + origCollect := collectSysInfoFn + origUpdate := updateAgentInfoFn + defer func() { + collectSysInfoFn = origCollect + updateAgentInfoFn = origUpdate + }() + + // Verify the buffered channel (cap=1) prevents goroutine leak on timeout: + // even after the handler returns, the goroutine can still send on the channel. + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + done := make(chan struct{}) + collectSysInfoFn = func() *shared.SystemInfo { + close(done) + return nil + } + updateAgentInfoFn = func(_ context.Context, _ *config.Config, _ *shared.SystemInfo) error { return nil } + + h := &syncInventoryHandler{} + h.Run(ctx, testInvCfg(t), taskstore.Task{}) //nolint:errcheck + + // If done is never closed, the goroutine leaked. We verify it can close. + // The select with a short timeout proves the goroutine eventually ran. + select { + case <-done: + // Goroutine completed; no leak. + default: + // done was closed synchronously because the context was already cancelled + // and the goroutine may have run before or after — this is acceptable. + } +} diff --git a/internal/service/task/native/sync_software.go b/internal/service/task/native/sync_software.go index 86fbbc7..38958ab 100644 --- a/internal/service/task/native/sync_software.go +++ b/internal/service/task/native/sync_software.go @@ -14,6 +14,16 @@ import ( "sentinelgo/internal/taskstore" ) +// sendSoftwareFn is the RPC send entry point. Replaced in tests. +var sendSoftwareFn = func(ctx context.Context, svc *swsvc.SoftwareService, deviceID string, catalog []swsvc.SoftwareInfo, cfg *config.Config) error { + return svc.SendByRPC(ctx, deviceID, catalog, cfg) +} + +// getSoftwareListFn returns the installed software list. Replaced in tests. +var getSoftwareListFn = func(svc *swsvc.SoftwareService) []swsvc.SoftwareInfo { + return svc.GetSoftwareList() +} + type syncSoftwareHandler struct{} func init() { Register(&syncSoftwareHandler{}) } @@ -38,7 +48,7 @@ func (h *syncSoftwareHandler) Run(ctx context.Context, cfg *config.Config, _ tas svc.SetSupabaseURL(cfg.SupabaseURL) svc.SetEdgeFunctionConfig(cfg.SupabaseURL+"/functions/v1/sync-software", cfg.AccessToken) - freshList := svc.GetSoftwareList() + freshList := getSoftwareListFn(svc) if err := swStore.SyncBatch(freshList, time.Now()); err != nil { log.Printf("sync-software: store sync error: %v", err) @@ -56,7 +66,7 @@ func (h *syncSoftwareHandler) Run(ctx context.Context, cfg *config.Config, _ tas return "software catalog is empty, nothing to upload", nil } - if err := svc.SendByRPC(ctx, cfg.DeviceID, catalog, cfg); err != nil { + if err := sendSoftwareFn(ctx, svc, cfg.DeviceID, catalog, cfg); err != nil { return "", fmt.Errorf("sync-software: send data: %w", err) } diff --git a/internal/service/task/native/sync_software_test.go b/internal/service/task/native/sync_software_test.go new file mode 100644 index 0000000..38ce611 --- /dev/null +++ b/internal/service/task/native/sync_software_test.go @@ -0,0 +1,107 @@ +package native + +// White-box tests for the sync-software handler. +// Using package native (not native_test) to access sendSoftwareFn. + +import ( + "context" + "errors" + "path/filepath" + "testing" + + swsvc "sentinelgo/internal/service/software" + + "sentinelgo/internal/config" + "sentinelgo/internal/taskstore" +) + +func testSwCfg(t *testing.T) *config.Config { + t.Helper() + dir := t.TempDir() + return &config.Config{ + Path: filepath.Join(dir, "config.json"), + SupabaseURL: "https://test.supabase.co", + DeviceID: "test-device", + AccessToken: "test-token", + } +} + +func TestSyncSoftwareHandler_Slugs(t *testing.T) { + h := &syncSoftwareHandler{} + slugs := h.Slugs() + found := false + for _, s := range slugs { + if s == "sync-software" { + found = true + } + } + if !found { + t.Errorf("Slugs() does not contain 'sync-software': %v", slugs) + } +} + +func TestSyncSoftwareHandler_StoreFails_WhenDirAbsent(t *testing.T) { + h := &syncSoftwareHandler{} + // Point cfg.Path at a file inside a non-existent subdirectory so that + // NewSoftwareStore fails to create the SQLite file. + cfg := &config.Config{ + Path: filepath.Join(t.TempDir(), "nodir", "config.json"), + } + + _, err := h.Run(context.Background(), cfg, taskstore.Task{}) + if err == nil { + t.Fatal("expected error when software store directory does not exist") + } +} + +func TestSyncSoftwareHandler_EmptyCatalog_ReturnsSuccess(t *testing.T) { + origSend := sendSoftwareFn + origList := getSoftwareListFn + defer func() { + sendSoftwareFn = origSend + getSoftwareListFn = origList + }() + + // Force an empty software list so the catalog path is empty. + getSoftwareListFn = func(_ *swsvc.SoftwareService) []swsvc.SoftwareInfo { return nil } + sendSoftwareFn = func(_ context.Context, _ *swsvc.SoftwareService, _ string, _ []swsvc.SoftwareInfo, _ *config.Config) error { + return errors.New("SendByRPC must not be called for empty catalog") + } + + h := &syncSoftwareHandler{} + note, err := h.Run(context.Background(), testSwCfg(t), taskstore.Task{}) + if err != nil { + t.Fatalf("expected no error for empty catalog, got: %v", err) + } + if note == "" { + t.Error("expected non-empty note for empty catalog") + } +} + +func TestSyncSoftwareHandler_SendFails_ReturnsError(t *testing.T) { + origSend := sendSoftwareFn + origList := getSoftwareListFn + defer func() { + sendSoftwareFn = origSend + getSoftwareListFn = origList + }() + + // Return one fake package so the catalog is non-empty and SendByRPC is reached. + getSoftwareListFn = func(_ *swsvc.SoftwareService) []swsvc.SoftwareInfo { + return []swsvc.SoftwareInfo{{Name: "fake-pkg", InstalledVersion: "1.0"}} + } + + sendErr := errors.New("RPC unavailable") + sendSoftwareFn = func(_ context.Context, _ *swsvc.SoftwareService, _ string, _ []swsvc.SoftwareInfo, _ *config.Config) error { + return sendErr + } + + h := &syncSoftwareHandler{} + _, err := h.Run(context.Background(), testSwCfg(t), taskstore.Task{}) + if err == nil { + t.Fatal("expected error when SendByRPC fails") + } + if !errors.Is(err, sendErr) { + t.Errorf("expected wrapped sendErr, got: %v", err) + } +} diff --git a/internal/service/task/native_handlers_test.go b/internal/service/task/native_handlers_test.go new file mode 100644 index 0000000..5cc4812 --- /dev/null +++ b/internal/service/task/native_handlers_test.go @@ -0,0 +1,148 @@ +package task + +// White-box tests for runWithPostHooks. +// Package task (not task_test) to access the unexported method. + +import ( + "context" + "errors" + "path/filepath" + "testing" + + "sentinelgo/internal/config" + "sentinelgo/internal/service/task/native" + "sentinelgo/internal/taskstore" +) + +// testOnlyHandler is a Handler used exclusively in this test file. +// It is NOT registered in the native registry, so it never appears in production. +type testOnlyHandler struct { + slugs []string + note string + err error + postSlugs []string +} + +func (h *testOnlyHandler) Slugs() []string { return h.slugs } +func (h *testOnlyHandler) PostRun() []string { return h.postSlugs } +func (h *testOnlyHandler) Run(_ context.Context, _ *config.Config, _ taskstore.Task) (string, error) { + return h.note, h.err +} + +// Compile-time check. +var _ native.Handler = (*testOnlyHandler)(nil) +var _ native.PostRunner = (*testOnlyHandler)(nil) + +func testExecutorForHooks(t *testing.T) *TaskExecutorService { + t.Helper() + dir := t.TempDir() + cfg := &config.Config{ + SupabaseURL: "https://placeholder.supabase.co", + Path: filepath.Join(dir, "config.json"), + } + // We don't need a real polling service for runWithPostHooks tests. + // Use a nil pollingSvc — runWithPostHooks never calls it. + s := &TaskExecutorService{ + cfg: cfg, + nativeHandlers: make(map[string]NativeTaskHandler), + activeRunning: make(map[string]activeTask), + } + return s +} + +func TestRunWithPostHooks_PrimaryError_SkipsPostHooks(t *testing.T) { + s := testExecutorForHooks(t) + + postCalled := false + // Inject a post-hook handler directly into the nativeHandlers map so + // runWithPostHooks can find it via native.Find — but we can't register it + // in the global registry. Instead we override the nativeHandlers map entry + // and verify via the note. + // Since runWithPostHooks uses native.Find() for post-hooks, and testOnlyHandler + // is not in the registry, we test the "post-hook not found" path instead. + + primary := &testOnlyHandler{ + slugs: []string{"test-primary"}, + note: "primary note", + err: errors.New("primary failed"), + postSlugs: []string{"test-post"}, + } + _ = postCalled + + note, err := s.runWithPostHooks(context.Background(), primary, "test-primary", taskstore.Task{}) + if err == nil { + t.Fatal("expected error from primary handler failure") + } + if note != "primary note" { + t.Errorf("note should come from primary handler, got %q", note) + } +} + +func TestRunWithPostHooks_PrimarySuccess_NoPostRunner(t *testing.T) { + s := testExecutorForHooks(t) + + // A handler without PostRunner — no post-hooks should run. + primary := &noPostHandler{note: "done"} + + note, err := s.runWithPostHooks(context.Background(), primary, "no-post", taskstore.Task{}) + if err != nil { + t.Fatalf("expected no error, got: %v", err) + } + if note != "done" { + t.Errorf("note: got %q, want %q", note, "done") + } +} + +func TestRunWithPostHooks_PostSlugNotFound_ContinuesWithPrimaryNote(t *testing.T) { + s := testExecutorForHooks(t) + + primary := &testOnlyHandler{ + slugs: []string{"test-primary"}, + note: "primary ok", + postSlugs: []string{"nonexistent-slug-xyz"}, + } + + note, err := s.runWithPostHooks(context.Background(), primary, "test-primary", taskstore.Task{}) + if err != nil { + t.Fatalf("post-hook not found should not cause primary to fail; got: %v", err) + } + if note != "primary ok" { + t.Errorf("note should be primary note when post-hook slug not found; got %q", note) + } +} + +func TestRunWithPostHooks_NotesConcatenated_WhenPostRunSucceeds(t *testing.T) { + // This test verifies the note concatenation with "; " separator. + // It uses real registered handlers: agent-update (with PostRun → sync-inventory). + // We can't easily test this without real network, so we verify the logic by + // running a no-op primary with a known post slug. + // + // Since testOnlyHandler.PostRun returns slugs that native.Find would look up, + // and we can't register in the global registry during tests safely, + // we test with an empty postSlugs list (no concatenation). + s := testExecutorForHooks(t) + + primary := &testOnlyHandler{ + slugs: []string{"x"}, + note: "primary", + postSlugs: []string{}, // no post-run slugs + } + + note, err := s.runWithPostHooks(context.Background(), primary, "x", taskstore.Task{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if note != "primary" { + t.Errorf("note: got %q, want %q", note, "primary") + } +} + +// noPostHandler implements Handler but NOT PostRunner. +type noPostHandler struct{ note string } + +func (h *noPostHandler) Slugs() []string { return []string{"no-post"} } +func (h *noPostHandler) Run(_ context.Context, _ *config.Config, _ taskstore.Task) (string, error) { + return h.note, nil +} + +var _ native.Handler = (*noPostHandler)(nil) diff --git a/internal/service/task/polling.go b/internal/service/task/polling.go index 5b8a5a2..e2be85e 100644 --- a/internal/service/task/polling.go +++ b/internal/service/task/polling.go @@ -148,6 +148,18 @@ func (s *TaskPollingService) PollAndStoreTasks(ctx context.Context) error { log.Printf("TaskPolling: marked %d interrupted task(s) as failed (were 'executing' at restart)", n) } + if retryable, err := s.store.GetRetryableTasks(5 * time.Minute); err != nil { + log.Printf("TaskPolling: Failed to get retryable tasks: %v", err) + } else { + for _, t := range retryable { + note := fmt.Sprintf("Attempt %d of %d failed; retrying.", + t.AttemptCount+1, store.MaxRetryAttempts+1) + if err := s.client.UpdateTask(ctx, t.ID, "retrying", note); err != nil { + log.Printf("TaskPolling: Failed to report retrying status for task %s: %v", t.ID, err) + } + } + } + if resetCount, err := s.store.ResetOldFailedTasks(5 * time.Minute); err != nil { log.Printf("TaskPolling: Failed to reset old failed tasks: %v", err) } else if resetCount > 0 { diff --git a/internal/service/task/polling_restart_test.go b/internal/service/task/polling_restart_test.go new file mode 100644 index 0000000..ee1f393 --- /dev/null +++ b/internal/service/task/polling_restart_test.go @@ -0,0 +1,211 @@ +package task_test + +// Tests for handleRestartContext and getTasksWithRetry edge cases. +// Uses the existing task_test package and mock helpers from helpers_test.go. + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "testing" + + "sentinelgo/internal/service/task" + "sentinelgo/internal/service/task/restartctx" + "sentinelgo/internal/store" + "sentinelgo/internal/taskstore" +) + +func TestHandleRestartContext_NoFile_NoOp(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "tasks.db") + + cfg := loadTestConfig(t) + cfg.Path = filepath.Join(dir, "config.json") + + svc, err := task.NewTaskPollingServiceWithClient(cfg, dbPath, &mockTaskClient{}) + if err != nil { + t.Fatalf("NewTaskPollingServiceWithClient: %v", err) + } + defer func() { _ = svc.Close() }() + + // PollAndStoreTasks calls handleRestartContext internally. + // With no restart context file, it should proceed without error. + if err := svc.PollAndStoreTasks(context.Background()); err != nil { + t.Fatalf("PollAndStoreTasks with no restart context: %v", err) + } +} + +func TestHandleRestartContext_ValidFile_MarksTaskSuccess(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "tasks.db") + + cfg := loadTestConfig(t) + cfg.Path = filepath.Join(dir, "config.json") + + // Insert a task in 'executing' state — simulating a task that triggered the restart. + ts, err := store.NewTaskStore(dbPath) + if err != nil { + t.Fatalf("NewTaskStore: %v", err) + } + taskID := "restart-task-001" + if err := ts.StoreTasks([]taskstore.Task{ + { + ID: taskID, + Slug: "agent-update", + Name: "Agent Update", + Status: "executing", + Payload: map[string]interface{}{}, + Scripts: map[string]interface{}{}, + }, + }); err != nil { + t.Fatalf("StoreTasks: %v", err) + } + _ = ts.Close() + + // Write the restart context file that the restarted agent would have written. + ctxPath := restartctx.PathFor(cfg.Path) + if err := restartctx.Write(ctxPath, restartctx.Context{ + TaskID: taskID, + Reason: "agent-update", + }); err != nil { + t.Fatalf("Write restart context: %v", err) + } + + svc, err := task.NewTaskPollingServiceWithClient(cfg, dbPath, &mockTaskClient{}) + if err != nil { + t.Fatalf("NewTaskPollingServiceWithClient: %v", err) + } + defer func() { _ = svc.Close() }() + + if err := svc.PollAndStoreTasks(context.Background()); err != nil { + t.Fatalf("PollAndStoreTasks: %v", err) + } + + // Context file must be consumed. + if _, statErr := os.Stat(ctxPath); !os.IsNotExist(statErr) { + t.Error("restart context file should be deleted after being processed") + } + + // The task should no longer appear in 'assigned' (it was moved to 'success'). + assigned, err := svc.GetLocalTasks() + if err != nil { + t.Fatalf("GetLocalTasks: %v", err) + } + for _, tk := range assigned { + if tk.ID == taskID { + t.Errorf("task %s should not be 'assigned' after restart context was processed", taskID) + } + } +} + +func TestHandleRestartContext_FileAlwaysDeletedAfterProcessing(t *testing.T) { + dir := t.TempDir() + cfg := loadTestConfig(t) + cfg.Path = filepath.Join(dir, "config.json") + + ctxPath := restartctx.PathFor(cfg.Path) + if err := restartctx.Write(ctxPath, restartctx.Context{ + TaskID: "any-task", + Reason: "device-reboot", + }); err != nil { + t.Fatalf("Write: %v", err) + } + + svc, err := task.NewTaskPollingServiceWithClient(cfg, filepath.Join(dir, "tasks.db"), &mockTaskClient{}) + if err != nil { + t.Fatalf("NewTaskPollingServiceWithClient: %v", err) + } + defer func() { _ = svc.Close() }() + + _ = svc.PollAndStoreTasks(context.Background()) + + if _, err := os.Stat(ctxPath); !os.IsNotExist(err) { + t.Error("restart context file should always be deleted after PollAndStoreTasks, even if the task is unknown") + } +} + +func TestGetTasksWithRetry_Non401_NoRefresh(t *testing.T) { + dir := t.TempDir() + cfg := loadTestConfig(t) + + networkErr := errors.New("dial tcp: connection refused") + client := &mockTaskClient{getErr: networkErr} + refresher := &mockTokenRefresher{} + + svc, err := task.NewTaskPollingServiceWithClient(cfg, filepath.Join(dir, "tasks.db"), client) + if err != nil { + t.Fatalf("NewTaskPollingServiceWithClient: %v", err) + } + defer func() { _ = svc.Close() }() + svc.SetTokenRefresher(refresher) + + err = svc.PollAndStoreTasks(context.Background()) + if err == nil { + t.Fatal("expected error from network failure") + } + if refresher.called { + t.Error("token refresher must not be called for non-401 errors") + } +} + +func TestGetTasksWithRetry_401_CallsRefresher(t *testing.T) { + dir := t.TempDir() + cfg := loadTestConfig(t) + + // Client always returns 401 (even after a refresh the mock is stateless). + client := &mockTaskClient{getErr: fmt.Errorf("authentication failed: status 401")} + refresher := &mockTokenRefresher{token: "new-token"} + + svc, err := task.NewTaskPollingServiceWithClient(cfg, filepath.Join(dir, "tasks.db"), client) + if err != nil { + t.Fatalf("NewTaskPollingServiceWithClient: %v", err) + } + defer func() { _ = svc.Close() }() + svc.SetTokenRefresher(refresher) + + _ = svc.PollAndStoreTasks(context.Background()) + + if !refresher.called { + t.Error("token refresher must be called on 401 error") + } +} + +func TestGetTasksWithRetry_401_RefreshFails_ReturnsOriginalError(t *testing.T) { + dir := t.TempDir() + cfg := loadTestConfig(t) + + client := &mockTaskClient{getErr: fmt.Errorf("authentication failed: status 401")} + refresher := &mockTokenRefresher{err: errors.New("refresh server down")} + + svc, err := task.NewTaskPollingServiceWithClient(cfg, filepath.Join(dir, "tasks.db"), client) + if err != nil { + t.Fatalf("NewTaskPollingServiceWithClient: %v", err) + } + defer func() { _ = svc.Close() }() + svc.SetTokenRefresher(refresher) + + err = svc.PollAndStoreTasks(context.Background()) + if err == nil { + t.Fatal("expected error when refresh fails") + } +} + +func TestGetTasksWithRetry_NoRefresherSet_Returns401Error(t *testing.T) { + dir := t.TempDir() + cfg := loadTestConfig(t) + + client := &mockTaskClient{getErr: fmt.Errorf("authentication failed: status 401")} + svc, err := task.NewTaskPollingServiceWithClient(cfg, filepath.Join(dir, "tasks.db"), client) + if err != nil { + t.Fatalf("NewTaskPollingServiceWithClient: %v", err) + } + defer func() { _ = svc.Close() }() + // No tokenRefresher set — must handle gracefully. + + err = svc.PollAndStoreTasks(context.Background()) + if err == nil { + t.Fatal("expected error when 401 and no refresher configured") + } +} diff --git a/internal/service/task/restartctx/restartctx_test.go b/internal/service/task/restartctx/restartctx_test.go new file mode 100644 index 0000000..ed03d94 --- /dev/null +++ b/internal/service/task/restartctx/restartctx_test.go @@ -0,0 +1,226 @@ +package restartctx_test + +import ( + "os" + "path/filepath" + "testing" + "time" + + "sentinelgo/internal/service/task/restartctx" +) + +func TestPathFor(t *testing.T) { + tests := []struct { + name string + configPath string + wantBase string // we only assert the filename, avoiding OS-specific separators + }{ + { + name: "deep path", + configPath: filepath.Join("opt", "sentinelgo", ".sentinelgo", "config.json"), + wantBase: "pending_restart.json", + }, + { + name: "basename only", + configPath: "config.json", + wantBase: "pending_restart.json", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := restartctx.PathFor(tt.configPath) + if filepath.Base(got) != tt.wantBase { + t.Errorf("PathFor(%q) base = %q, want %q", tt.configPath, filepath.Base(got), tt.wantBase) + } + if filepath.Dir(got) != filepath.Dir(tt.configPath) { + t.Errorf("PathFor(%q) dir = %q, want %q", tt.configPath, filepath.Dir(got), filepath.Dir(tt.configPath)) + } + }) + } +} + +func TestPathFor_SameDirectoryAsConfig(t *testing.T) { + dir := t.TempDir() + configPath := filepath.Join(dir, "config.json") + got := restartctx.PathFor(configPath) + if filepath.Dir(got) != dir { + t.Errorf("PathFor should return a path in the same directory as config: got dir %q, want %q", filepath.Dir(got), dir) + } + if filepath.Base(got) != "pending_restart.json" { + t.Errorf("PathFor should return pending_restart.json as filename, got %q", filepath.Base(got)) + } +} + +func TestWrite_RoundTrip(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "pending_restart.json") + rc := restartctx.Context{ + TaskID: "task-abc-123", + Reason: "agent-update", + FromVersion: "v1.2.3", + InitiatedAt: time.Date(2025, 6, 1, 10, 30, 0, 0, time.UTC), + } + + if err := restartctx.Write(path, rc); err != nil { + t.Fatalf("Write: %v", err) + } + + got, err := restartctx.ReadAndClear(path) + if err != nil { + t.Fatalf("ReadAndClear: %v", err) + } + if got == nil { + t.Fatal("ReadAndClear returned nil for existing file") + } + if got.TaskID != rc.TaskID { + t.Errorf("TaskID: got %q, want %q", got.TaskID, rc.TaskID) + } + if got.Reason != rc.Reason { + t.Errorf("Reason: got %q, want %q", got.Reason, rc.Reason) + } + if got.FromVersion != rc.FromVersion { + t.Errorf("FromVersion: got %q, want %q", got.FromVersion, rc.FromVersion) + } + if !got.InitiatedAt.Equal(rc.InitiatedAt) { + t.Errorf("InitiatedAt: got %v, want %v", got.InitiatedAt, rc.InitiatedAt) + } +} + +func TestWrite_CreatesFileWithRestrictedPermissions(t *testing.T) { + if os.Getuid() == 0 { + t.Skip("permission check skipped when running as root") + } + // Windows does not honour Unix-style mode bits; skip there. + if os.PathSeparator == '\\' { + t.Skip("file permission check not applicable on Windows") + } + dir := t.TempDir() + path := filepath.Join(dir, "pending_restart.json") + rc := restartctx.Context{TaskID: "perm-test"} + + if err := restartctx.Write(path, rc); err != nil { + t.Fatalf("Write: %v", err) + } + + info, err := os.Stat(path) + if err != nil { + t.Fatalf("Stat: %v", err) + } + // On non-Windows the file should be 0600. + if info.Mode().Perm() != 0600 { + t.Errorf("file permissions: got %o, want 0600", info.Mode().Perm()) + } +} + +func TestWrite_NoTempFileAfterSuccess(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "pending_restart.json") + tmp := path + ".tmp" + + if err := restartctx.Write(path, restartctx.Context{TaskID: "atomic"}); err != nil { + t.Fatalf("Write: %v", err) + } + + if _, err := os.Stat(tmp); !os.IsNotExist(err) { + t.Error(".tmp file should not remain after a successful Write") + } +} + +func TestWrite_FailsAndCleansTempFile_WhenDirAbsent(t *testing.T) { + // Path inside a non-existent subdirectory → rename fails → .tmp must be removed. + path := filepath.Join(t.TempDir(), "nodir", "pending_restart.json") + + err := restartctx.Write(path, restartctx.Context{TaskID: "err-test"}) + if err == nil { + t.Fatal("expected error writing to non-existent directory") + } + + if _, statErr := os.Stat(path + ".tmp"); !os.IsNotExist(statErr) { + t.Error(".tmp file should be cleaned up after rename failure") + } +} + +func TestReadAndClear_FileNotExist(t *testing.T) { + path := filepath.Join(t.TempDir(), "nope.json") + rc, err := restartctx.ReadAndClear(path) + if err != nil { + t.Fatalf("expected nil error for missing file, got: %v", err) + } + if rc != nil { + t.Fatalf("expected nil context for missing file, got: %+v", rc) + } +} + +func TestReadAndClear_DeletesFileAfterRead(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "pending_restart.json") + if err := restartctx.Write(path, restartctx.Context{TaskID: "del-me", Reason: "device-reboot"}); err != nil { + t.Fatalf("Write: %v", err) + } + + if _, err := restartctx.ReadAndClear(path); err != nil { + t.Fatalf("ReadAndClear: %v", err) + } + + if _, err := os.Stat(path); !os.IsNotExist(err) { + t.Error("file should be deleted after ReadAndClear") + } +} + +func TestReadAndClear_CorruptJSON_StillDeletesFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "pending_restart.json") + if err := os.WriteFile(path, []byte("{not valid json"), 0600); err != nil { + t.Fatalf("setup: %v", err) + } + + rc, err := restartctx.ReadAndClear(path) + if err == nil { + t.Fatal("expected error for corrupt JSON") + } + if rc != nil { + t.Fatalf("expected nil context for corrupt JSON, got: %+v", rc) + } + if _, statErr := os.Stat(path); !os.IsNotExist(statErr) { + t.Error("file should be deleted even when JSON parse fails") + } +} + +func TestReadAndClear_EmptyJSON_StillDeletesFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "pending_restart.json") + if err := os.WriteFile(path, []byte(""), 0600); err != nil { + t.Fatalf("setup: %v", err) + } + + _, err := restartctx.ReadAndClear(path) + if err == nil { + t.Fatal("expected error for empty JSON") + } + if _, statErr := os.Stat(path); !os.IsNotExist(statErr) { + t.Error("file should be deleted even when empty") + } +} + +func TestWrite_Idempotent(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "pending_restart.json") + + rc1 := restartctx.Context{TaskID: "first", Reason: "agent-update"} + rc2 := restartctx.Context{TaskID: "second", Reason: "device-reboot"} + + if err := restartctx.Write(path, rc1); err != nil { + t.Fatalf("first Write: %v", err) + } + if err := restartctx.Write(path, rc2); err != nil { + t.Fatalf("second Write: %v", err) + } + + got, err := restartctx.ReadAndClear(path) + if err != nil { + t.Fatalf("ReadAndClear: %v", err) + } + if got.TaskID != rc2.TaskID { + t.Errorf("second Write should overwrite first: got %q, want %q", got.TaskID, rc2.TaskID) + } +} diff --git a/internal/store/tasks.go b/internal/store/tasks.go index 337e077..4989cb5 100644 --- a/internal/store/tasks.go +++ b/internal/store/tasks.go @@ -36,10 +36,17 @@ CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status); CREATE INDEX IF NOT EXISTS idx_tasks_assigned_at ON tasks(assigned_at); ` -// maxRetryAttempts is the maximum number of times a failed task is reset to +// MaxRetryAttempts is the maximum number of times a failed task is reset to // 'assigned' before it is abandoned. Prevents permanently-failing tasks from // looping forever. -const maxRetryAttempts = 3 +const MaxRetryAttempts = 3 + +// RetryableTask holds the minimal fields needed to report a "retrying" status +// before a task is reset to 'assigned'. +type RetryableTask struct { + ID string + AttemptCount int +} var tasksMigrations = []Migration{ {Version: 1, SQL: tasksSchemaV1}, @@ -123,9 +130,43 @@ func (s *TaskStore) StoreTasks(tasks []taskstore.Task) error { return tx.Commit() } -// ResetOldFailedTasks resets failed tasks older than cooldown back to 'assigned' -// for retry, up to maxRetryAttempts times. Tasks that have already been retried -// the maximum number of times are left in 'failed' state permanently. +// GetRetryableTasks returns tasks that are eligible to be retried: failed (or +// retrying) long enough ago and below the attempt cap. The caller should report +// "retrying" status to the server for each before calling ResetOldFailedTasks. +func (s *TaskStore) GetRetryableTasks(cooldown time.Duration) ([]RetryableTask, error) { + cutoff := time.Now().UTC().Add(-cooldown).Format(time.RFC3339) + + rows, err := s.db.Query(` + SELECT id, attempt_count FROM tasks + WHERE status IN ('failed', 'retrying') + AND attempt_count < ? + AND completed_at IS NOT NULL + AND completed_at < ? + `, MaxRetryAttempts, cutoff) + if err != nil { + return nil, err + } + defer func() { + if err := rows.Close(); err != nil { + log.Printf("TaskStore: close retryable rows: %v", err) + } + }() + + var tasks []RetryableTask + for rows.Next() { + var t RetryableTask + if err := rows.Scan(&t.ID, &t.AttemptCount); err != nil { + return nil, err + } + tasks = append(tasks, t) + } + return tasks, rows.Err() +} + +// ResetOldFailedTasks resets failed (or retrying) tasks older than cooldown +// back to 'assigned' for retry, up to MaxRetryAttempts times. Tasks that have +// already been retried the maximum number of times are left in 'failed' state +// permanently. func (s *TaskStore) ResetOldFailedTasks(cooldown time.Duration) (int, error) { now := time.Now().UTC().Format(time.RFC3339) cutoff := time.Now().UTC().Add(-cooldown).Format(time.RFC3339) @@ -134,11 +175,11 @@ func (s *TaskStore) ResetOldFailedTasks(cooldown time.Duration) (int, error) { UPDATE tasks SET status = 'assigned', note = '', completed_at = NULL, updated_at = ?, attempt_count = attempt_count + 1 - WHERE status = 'failed' + WHERE status IN ('failed', 'retrying') AND attempt_count < ? AND completed_at IS NOT NULL AND completed_at < ? - `, now, maxRetryAttempts, cutoff) + `, now, MaxRetryAttempts, cutoff) if err != nil { return 0, err } @@ -221,7 +262,7 @@ func (s *TaskStore) ResetInterruptedTasks() (int, error) { is_synced = 0, attempt_count = ? WHERE status = 'executing' - `, now, now, maxRetryAttempts) + `, now, now, MaxRetryAttempts) if err != nil { return 0, err } diff --git a/internal/store/tasks_test.go b/internal/store/tasks_test.go index 67f87a1..18f2fb4 100644 --- a/internal/store/tasks_test.go +++ b/internal/store/tasks_test.go @@ -87,7 +87,7 @@ func TestMarkTaskExecuting_NotPickedUpAgain(t *testing.T) { } // TestResetInterruptedTasks confirms 'executing' tasks are moved to 'failed' -// with the correct note, is_synced=0, and attempt_count=maxRetryAttempts. +// with the correct note, is_synced=0, and attempt_count=MaxRetryAttempts. func TestResetInterruptedTasks(t *testing.T) { ts := newTaskStoreForTest(t) seedTask(t, ts, "task-interrupted", "assigned") @@ -109,8 +109,8 @@ func TestResetInterruptedTasks(t *testing.T) { if got := queryIsSynced(t, ts, "task-interrupted"); got != 0 { t.Errorf("is_synced = %d, want 0 (needs server sync)", got) } - if got := queryAttemptCount(t, ts, "task-interrupted"); got != maxRetryAttempts { - t.Errorf("attempt_count = %d, want %d (max so it isn't auto-retried)", got, maxRetryAttempts) + if got := queryAttemptCount(t, ts, "task-interrupted"); got != MaxRetryAttempts { + t.Errorf("attempt_count = %d, want %d (max so it isn't auto-retried)", got, MaxRetryAttempts) } } diff --git a/internal/winsec/winsec_test.go b/internal/winsec/winsec_test.go new file mode 100644 index 0000000..c2f0ab6 --- /dev/null +++ b/internal/winsec/winsec_test.go @@ -0,0 +1,48 @@ +//go:build !windows + +package winsec_test + +// Tests for the non-Windows stub implementation of SecurePath. +// The stub is compiled on all non-Windows platforms and should be a no-op. + +import ( + "os" + "path/filepath" + "testing" + + "sentinelgo/internal/winsec" +) + +func TestSecurePath_NoopOnNonWindows(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "test.json") + + if err := os.WriteFile(path, []byte("{}"), 0600); err != nil { + t.Fatalf("setup: %v", err) + } + + // On non-Windows, SecurePath is a documented no-op and must return nil. + if err := winsec.SecurePath(path); err != nil { + t.Errorf("SecurePath on non-Windows: expected nil error, got: %v", err) + } +} + +func TestSecurePath_NonExistentPath_NoError(t *testing.T) { + // The stub ignores the path entirely, so even a non-existent path returns nil. + if err := winsec.SecurePath("/nonexistent/path/file.json"); err != nil { + t.Errorf("SecurePath(nonexistent) on non-Windows: expected nil, got: %v", err) + } +} + +func TestSecurePath_EmptyPath_NoError(t *testing.T) { + if err := winsec.SecurePath(""); err != nil { + t.Errorf("SecurePath(\"\") on non-Windows: expected nil, got: %v", err) + } +} + +func TestSecurePath_DirectoryPath_NoError(t *testing.T) { + dir := t.TempDir() + if err := winsec.SecurePath(dir); err != nil { + t.Errorf("SecurePath(dir) on non-Windows: expected nil, got: %v", err) + } +} From 451e05cf0b6bea4aef339a0e40cf645e9085944e Mon Sep 17 00:00:00 2001 From: Tanimul Haque Khan Date: Sat, 13 Jun 2026 00:41:51 +0600 Subject: [PATCH 3/3] Update registry_test.go --- internal/service/task/native/registry_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/service/task/native/registry_test.go b/internal/service/task/native/registry_test.go index 787c26a..95f1986 100644 --- a/internal/service/task/native/registry_test.go +++ b/internal/service/task/native/registry_test.go @@ -93,8 +93,6 @@ func TestRegistry_AllHandlersImplementInterface(t *testing.T) { if len(h.Slugs()) == 0 { t.Errorf("handler %T returned empty Slugs()", h) } - // Verify Run signature is callable (compile-time check via interface, no network call). - var _ native.Handler = h } }