diff --git a/api/persistence/v1/executions.pb.go b/api/persistence/v1/executions.pb.go index d42bdb3ce8..03a1da28ea 100644 --- a/api/persistence/v1/executions.pb.go +++ b/api/persistence/v1/executions.pb.go @@ -1170,11 +1170,18 @@ type TimeSkippingInfo struct { // Current time-skipping configuration applied to the workflow. Config *v13.TimeSkippingConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"` // Total skipped duration for the current workflow execution run, including any + // inherited skipped duration carried over from a preceding execution that started this run. AccumulatedSkippedDuration *durationpb.Duration `protobuf:"bytes,2,opt,name=accumulated_skipped_duration,json=accumulatedSkippedDuration,proto3" json:"accumulated_skipped_duration,omitempty"` // The current fast-forward info for time skipping. FastForwardInfo *FastForwardInfo `protobuf:"bytes,4,opt,name=fast_forward_info,json=fastForwardInfo,proto3" json:"fast_forward_info,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // Versioned transition at which this TimeSkippingInfo was last modified (i.e. when a + // skip transition changed accumulated_skipped_duration). Used by PartialRefresh to detect + // that pending timer tasks must be re-stamped against the new accumulated skip, since a + // skip mutates this workflow-level field without bumping any per-timer + // last_update_versioned_transition. Mirrors the per-entity stamps on TimerInfo/ActivityInfo. + LastUpdateVersionedTransition *VersionedTransition `protobuf:"bytes,5,opt,name=last_update_versioned_transition,json=lastUpdateVersionedTransition,proto3" json:"last_update_versioned_transition,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *TimeSkippingInfo) Reset() { @@ -1228,6 +1235,13 @@ func (x *TimeSkippingInfo) GetFastForwardInfo() *FastForwardInfo { return nil } +func (x *TimeSkippingInfo) GetLastUpdateVersionedTransition() *VersionedTransition { + if x != nil { + return x.LastUpdateVersionedTransition + } + return nil +} + type FastForwardInfo struct { state protoimpl.MessageState `protogen:"open.v1"` // Target time for the fast-forward, expressed in virtual time. @@ -5040,11 +5054,12 @@ const file_temporal_server_api_persistence_v1_executions_proto_rawDesc = "" + "&ChildrenInitializedPostResetPointEntry\x12\x10\n" + "\x03key\x18\x01 \x01(\tR\x03key\x12H\n" + "\x05value\x18\x02 \x01(\v22.temporal.server.api.persistence.v1.ResetChildInfoR\x05value:\x028\x01B\x1c\n" + - "\x1alast_workflow_task_failureJ\x04\b\b\x10\tJ\x04\b\x0e\x10\x0fJ\x04\b\x0f\x10\x10J\x04\b\x10\x10\x11J\x04\bp\x10qJ\x04\b,\x10-J\x04\b-\x10.J\x04\b/\x100J\x04\b0\x101J\x04\b1\x102J\x04\b2\x103\"\xba\x02\n" + + "\x1alast_workflow_task_failureJ\x04\b\b\x10\tJ\x04\b\x0e\x10\x0fJ\x04\b\x0f\x10\x10J\x04\b\x10\x10\x11J\x04\bp\x10qJ\x04\b,\x10-J\x04\b-\x10.J\x04\b/\x100J\x04\b0\x101J\x04\b1\x102J\x04\b2\x103\"\xbd\x03\n" + "\x10TimeSkippingInfo\x12B\n" + "\x06config\x18\x01 \x01(\v2*.temporal.api.common.v1.TimeSkippingConfigR\x06config\x12[\n" + "\x1caccumulated_skipped_duration\x18\x02 \x01(\v2\x19.google.protobuf.DurationR\x1aaccumulatedSkippedDuration\x12_\n" + - "\x11fast_forward_info\x18\x04 \x01(\v23.temporal.server.api.persistence.v1.FastForwardInfoR\x0ffastForwardInfoJ\x04\b\x03\x10\x04R\x1ecurrent_elapsed_duration_bound\"\x97\x01\n" + + "\x11fast_forward_info\x18\x04 \x01(\v23.temporal.server.api.persistence.v1.FastForwardInfoR\x0ffastForwardInfo\x12\x80\x01\n" + + " last_update_versioned_transition\x18\x05 \x01(\v27.temporal.server.api.persistence.v1.VersionedTransitionR\x1dlastUpdateVersionedTransitionJ\x04\b\x03\x10\x04R\x1ecurrent_elapsed_duration_bound\"\x97\x01\n" + "\x0fFastForwardInfo\x12;\n" + "\vtarget_time\x18\x01 \x01(\v2\x1a.google.protobuf.TimestampR\n" + "targetTime\x12\x1f\n" + @@ -5544,117 +5559,118 @@ var file_temporal_server_api_persistence_v1_executions_proto_depIdxs = []int32{ 64, // 48: temporal.server.api.persistence.v1.TimeSkippingInfo.config:type_name -> temporal.api.common.v1.TimeSkippingConfig 48, // 49: temporal.server.api.persistence.v1.TimeSkippingInfo.accumulated_skipped_duration:type_name -> google.protobuf.Duration 3, // 50: temporal.server.api.persistence.v1.TimeSkippingInfo.fast_forward_info:type_name -> temporal.server.api.persistence.v1.FastForwardInfo - 47, // 51: temporal.server.api.persistence.v1.FastForwardInfo.target_time:type_name -> google.protobuf.Timestamp - 65, // 52: temporal.server.api.persistence.v1.LastNotifiedTargetVersion.deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion - 66, // 53: temporal.server.api.persistence.v1.WorkflowExecutionState.state:type_name -> temporal.server.api.enums.v1.WorkflowExecutionState - 67, // 54: temporal.server.api.persistence.v1.WorkflowExecutionState.status:type_name -> temporal.api.enums.v1.WorkflowExecutionStatus - 56, // 55: temporal.server.api.persistence.v1.WorkflowExecutionState.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 47, // 56: temporal.server.api.persistence.v1.WorkflowExecutionState.start_time:type_name -> google.protobuf.Timestamp - 37, // 57: temporal.server.api.persistence.v1.WorkflowExecutionState.request_ids:type_name -> temporal.server.api.persistence.v1.WorkflowExecutionState.RequestIdsEntry - 68, // 58: temporal.server.api.persistence.v1.RequestIDInfo.event_type:type_name -> temporal.api.enums.v1.EventType - 69, // 59: temporal.server.api.persistence.v1.TransferTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType - 47, // 60: temporal.server.api.persistence.v1.TransferTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp - 38, // 61: temporal.server.api.persistence.v1.TransferTaskInfo.close_execution_task_details:type_name -> temporal.server.api.persistence.v1.TransferTaskInfo.CloseExecutionTaskDetails - 70, // 62: temporal.server.api.persistence.v1.TransferTaskInfo.chasm_task_info:type_name -> temporal.server.api.persistence.v1.ChasmTaskInfo - 69, // 63: temporal.server.api.persistence.v1.ReplicationTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType - 47, // 64: temporal.server.api.persistence.v1.ReplicationTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp - 71, // 65: temporal.server.api.persistence.v1.ReplicationTaskInfo.priority:type_name -> temporal.server.api.enums.v1.TaskPriority - 56, // 66: temporal.server.api.persistence.v1.ReplicationTaskInfo.versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 9, // 67: temporal.server.api.persistence.v1.ReplicationTaskInfo.task_equivalents:type_name -> temporal.server.api.persistence.v1.ReplicationTaskInfo - 72, // 68: temporal.server.api.persistence.v1.ReplicationTaskInfo.last_version_history_item:type_name -> temporal.server.api.history.v1.VersionHistoryItem - 69, // 69: temporal.server.api.persistence.v1.VisibilityTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType - 47, // 70: temporal.server.api.persistence.v1.VisibilityTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp - 47, // 71: temporal.server.api.persistence.v1.VisibilityTaskInfo.close_time:type_name -> google.protobuf.Timestamp - 47, // 72: temporal.server.api.persistence.v1.VisibilityTaskInfo.start_time:type_name -> google.protobuf.Timestamp - 70, // 73: temporal.server.api.persistence.v1.VisibilityTaskInfo.chasm_task_info:type_name -> temporal.server.api.persistence.v1.ChasmTaskInfo - 69, // 74: temporal.server.api.persistence.v1.TimerTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType - 62, // 75: temporal.server.api.persistence.v1.TimerTaskInfo.timeout_type:type_name -> temporal.api.enums.v1.TimeoutType - 73, // 76: temporal.server.api.persistence.v1.TimerTaskInfo.workflow_backoff_type:type_name -> temporal.server.api.enums.v1.WorkflowBackoffType - 47, // 77: temporal.server.api.persistence.v1.TimerTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp - 70, // 78: temporal.server.api.persistence.v1.TimerTaskInfo.chasm_task_info:type_name -> temporal.server.api.persistence.v1.ChasmTaskInfo - 69, // 79: temporal.server.api.persistence.v1.ArchivalTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType - 47, // 80: temporal.server.api.persistence.v1.ArchivalTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp - 69, // 81: temporal.server.api.persistence.v1.OutboundTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType - 47, // 82: temporal.server.api.persistence.v1.OutboundTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp - 74, // 83: temporal.server.api.persistence.v1.OutboundTaskInfo.state_machine_info:type_name -> temporal.server.api.persistence.v1.StateMachineTaskInfo - 70, // 84: temporal.server.api.persistence.v1.OutboundTaskInfo.chasm_task_info:type_name -> temporal.server.api.persistence.v1.ChasmTaskInfo - 14, // 85: temporal.server.api.persistence.v1.OutboundTaskInfo.worker_commands_task:type_name -> temporal.server.api.persistence.v1.WorkerCommandsTask - 75, // 86: temporal.server.api.persistence.v1.WorkerCommandsTask.commands:type_name -> temporal.api.worker.v1.WorkerCommand - 47, // 87: temporal.server.api.persistence.v1.ActivityInfo.scheduled_time:type_name -> google.protobuf.Timestamp - 47, // 88: temporal.server.api.persistence.v1.ActivityInfo.started_time:type_name -> google.protobuf.Timestamp - 48, // 89: temporal.server.api.persistence.v1.ActivityInfo.schedule_to_start_timeout:type_name -> google.protobuf.Duration - 48, // 90: temporal.server.api.persistence.v1.ActivityInfo.schedule_to_close_timeout:type_name -> google.protobuf.Duration - 48, // 91: temporal.server.api.persistence.v1.ActivityInfo.start_to_close_timeout:type_name -> google.protobuf.Duration - 48, // 92: temporal.server.api.persistence.v1.ActivityInfo.heartbeat_timeout:type_name -> google.protobuf.Duration - 48, // 93: temporal.server.api.persistence.v1.ActivityInfo.retry_initial_interval:type_name -> google.protobuf.Duration - 48, // 94: temporal.server.api.persistence.v1.ActivityInfo.retry_maximum_interval:type_name -> google.protobuf.Duration - 47, // 95: temporal.server.api.persistence.v1.ActivityInfo.retry_expiration_time:type_name -> google.protobuf.Timestamp - 76, // 96: temporal.server.api.persistence.v1.ActivityInfo.retry_last_failure:type_name -> temporal.api.failure.v1.Failure - 77, // 97: temporal.server.api.persistence.v1.ActivityInfo.last_heartbeat_details:type_name -> temporal.api.common.v1.Payloads - 47, // 98: temporal.server.api.persistence.v1.ActivityInfo.last_heartbeat_update_time:type_name -> google.protobuf.Timestamp - 78, // 99: temporal.server.api.persistence.v1.ActivityInfo.activity_type:type_name -> temporal.api.common.v1.ActivityType - 39, // 100: temporal.server.api.persistence.v1.ActivityInfo.use_workflow_build_id_info:type_name -> temporal.server.api.persistence.v1.ActivityInfo.UseWorkflowBuildIdInfo - 55, // 101: temporal.server.api.persistence.v1.ActivityInfo.last_worker_version_stamp:type_name -> temporal.api.common.v1.WorkerVersionStamp - 56, // 102: temporal.server.api.persistence.v1.ActivityInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 47, // 103: temporal.server.api.persistence.v1.ActivityInfo.first_scheduled_time:type_name -> google.protobuf.Timestamp - 47, // 104: temporal.server.api.persistence.v1.ActivityInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp - 79, // 105: temporal.server.api.persistence.v1.ActivityInfo.last_started_deployment:type_name -> temporal.api.deployment.v1.Deployment - 65, // 106: temporal.server.api.persistence.v1.ActivityInfo.last_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion - 60, // 107: temporal.server.api.persistence.v1.ActivityInfo.priority:type_name -> temporal.api.common.v1.Priority - 40, // 108: temporal.server.api.persistence.v1.ActivityInfo.pause_info:type_name -> temporal.server.api.persistence.v1.ActivityInfo.PauseInfo - 53, // 109: temporal.server.api.persistence.v1.ActivityInfo.started_clock:type_name -> temporal.server.api.clock.v1.VectorClock - 47, // 110: temporal.server.api.persistence.v1.TimerInfo.expiry_time:type_name -> google.protobuf.Timestamp - 56, // 111: temporal.server.api.persistence.v1.TimerInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 80, // 112: temporal.server.api.persistence.v1.ChildExecutionInfo.parent_close_policy:type_name -> temporal.api.enums.v1.ParentClosePolicy - 53, // 113: temporal.server.api.persistence.v1.ChildExecutionInfo.clock:type_name -> temporal.server.api.clock.v1.VectorClock - 56, // 114: temporal.server.api.persistence.v1.ChildExecutionInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 60, // 115: temporal.server.api.persistence.v1.ChildExecutionInfo.priority:type_name -> temporal.api.common.v1.Priority - 56, // 116: temporal.server.api.persistence.v1.RequestCancelInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 56, // 117: temporal.server.api.persistence.v1.SignalInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition - 81, // 118: temporal.server.api.persistence.v1.Checksum.flavor:type_name -> temporal.server.api.enums.v1.ChecksumFlavor - 42, // 119: temporal.server.api.persistence.v1.Callback.nexus:type_name -> temporal.server.api.persistence.v1.Callback.Nexus - 43, // 120: temporal.server.api.persistence.v1.Callback.hsm:type_name -> temporal.server.api.persistence.v1.Callback.HSM - 82, // 121: temporal.server.api.persistence.v1.Callback.links:type_name -> temporal.api.common.v1.Link - 83, // 122: temporal.server.api.persistence.v1.HSMCompletionCallbackArg.last_event:type_name -> temporal.api.history.v1.HistoryEvent - 23, // 123: temporal.server.api.persistence.v1.CallbackInfo.callback:type_name -> temporal.server.api.persistence.v1.Callback - 46, // 124: temporal.server.api.persistence.v1.CallbackInfo.trigger:type_name -> temporal.server.api.persistence.v1.CallbackInfo.Trigger - 47, // 125: temporal.server.api.persistence.v1.CallbackInfo.registration_time:type_name -> google.protobuf.Timestamp - 84, // 126: temporal.server.api.persistence.v1.CallbackInfo.state:type_name -> temporal.server.api.enums.v1.CallbackState - 47, // 127: temporal.server.api.persistence.v1.CallbackInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp - 76, // 128: temporal.server.api.persistence.v1.CallbackInfo.last_attempt_failure:type_name -> temporal.api.failure.v1.Failure - 47, // 129: temporal.server.api.persistence.v1.CallbackInfo.next_attempt_schedule_time:type_name -> google.protobuf.Timestamp - 48, // 130: temporal.server.api.persistence.v1.NexusOperationInfo.schedule_to_close_timeout:type_name -> google.protobuf.Duration - 47, // 131: temporal.server.api.persistence.v1.NexusOperationInfo.scheduled_time:type_name -> google.protobuf.Timestamp - 85, // 132: temporal.server.api.persistence.v1.NexusOperationInfo.state:type_name -> temporal.server.api.enums.v1.NexusOperationState - 47, // 133: temporal.server.api.persistence.v1.NexusOperationInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp - 76, // 134: temporal.server.api.persistence.v1.NexusOperationInfo.last_attempt_failure:type_name -> temporal.api.failure.v1.Failure - 47, // 135: temporal.server.api.persistence.v1.NexusOperationInfo.next_attempt_schedule_time:type_name -> google.protobuf.Timestamp - 48, // 136: temporal.server.api.persistence.v1.NexusOperationInfo.schedule_to_start_timeout:type_name -> google.protobuf.Duration - 48, // 137: temporal.server.api.persistence.v1.NexusOperationInfo.start_to_close_timeout:type_name -> google.protobuf.Duration - 47, // 138: temporal.server.api.persistence.v1.NexusOperationInfo.started_time:type_name -> google.protobuf.Timestamp - 47, // 139: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.requested_time:type_name -> google.protobuf.Timestamp - 86, // 140: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.state:type_name -> temporal.api.enums.v1.NexusOperationCancellationState - 47, // 141: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp - 76, // 142: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.last_attempt_failure:type_name -> temporal.api.failure.v1.Failure - 47, // 143: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.next_attempt_schedule_time:type_name -> google.protobuf.Timestamp - 47, // 144: temporal.server.api.persistence.v1.WorkflowPauseInfo.pause_time:type_name -> google.protobuf.Timestamp - 87, // 145: temporal.server.api.persistence.v1.ShardInfo.QueueStatesEntry.value:type_name -> temporal.server.api.persistence.v1.QueueState - 88, // 146: temporal.server.api.persistence.v1.WorkflowExecutionInfo.SearchAttributesEntry.value:type_name -> temporal.api.common.v1.Payload - 88, // 147: temporal.server.api.persistence.v1.WorkflowExecutionInfo.MemoEntry.value:type_name -> temporal.api.common.v1.Payload - 89, // 148: temporal.server.api.persistence.v1.WorkflowExecutionInfo.UpdateInfosEntry.value:type_name -> temporal.server.api.persistence.v1.UpdateInfo - 90, // 149: temporal.server.api.persistence.v1.WorkflowExecutionInfo.SubStateMachinesByTypeEntry.value:type_name -> temporal.server.api.persistence.v1.StateMachineMap - 28, // 150: temporal.server.api.persistence.v1.WorkflowExecutionInfo.ChildrenInitializedPostResetPointEntry.value:type_name -> temporal.server.api.persistence.v1.ResetChildInfo - 7, // 151: temporal.server.api.persistence.v1.WorkflowExecutionState.RequestIdsEntry.value:type_name -> temporal.server.api.persistence.v1.RequestIDInfo - 47, // 152: temporal.server.api.persistence.v1.ActivityInfo.PauseInfo.pause_time:type_name -> google.protobuf.Timestamp - 41, // 153: temporal.server.api.persistence.v1.ActivityInfo.PauseInfo.manual:type_name -> temporal.server.api.persistence.v1.ActivityInfo.PauseInfo.Manual - 44, // 154: temporal.server.api.persistence.v1.Callback.Nexus.header:type_name -> temporal.server.api.persistence.v1.Callback.Nexus.HeaderEntry - 91, // 155: temporal.server.api.persistence.v1.Callback.HSM.ref:type_name -> temporal.server.api.persistence.v1.StateMachineRef - 45, // 156: temporal.server.api.persistence.v1.CallbackInfo.Trigger.workflow_closed:type_name -> temporal.server.api.persistence.v1.CallbackInfo.WorkflowClosed - 157, // [157:157] is the sub-list for method output_type - 157, // [157:157] is the sub-list for method input_type - 157, // [157:157] is the sub-list for extension type_name - 157, // [157:157] is the sub-list for extension extendee - 0, // [0:157] is the sub-list for field type_name + 56, // 51: temporal.server.api.persistence.v1.TimeSkippingInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 47, // 52: temporal.server.api.persistence.v1.FastForwardInfo.target_time:type_name -> google.protobuf.Timestamp + 65, // 53: temporal.server.api.persistence.v1.LastNotifiedTargetVersion.deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion + 66, // 54: temporal.server.api.persistence.v1.WorkflowExecutionState.state:type_name -> temporal.server.api.enums.v1.WorkflowExecutionState + 67, // 55: temporal.server.api.persistence.v1.WorkflowExecutionState.status:type_name -> temporal.api.enums.v1.WorkflowExecutionStatus + 56, // 56: temporal.server.api.persistence.v1.WorkflowExecutionState.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 47, // 57: temporal.server.api.persistence.v1.WorkflowExecutionState.start_time:type_name -> google.protobuf.Timestamp + 37, // 58: temporal.server.api.persistence.v1.WorkflowExecutionState.request_ids:type_name -> temporal.server.api.persistence.v1.WorkflowExecutionState.RequestIdsEntry + 68, // 59: temporal.server.api.persistence.v1.RequestIDInfo.event_type:type_name -> temporal.api.enums.v1.EventType + 69, // 60: temporal.server.api.persistence.v1.TransferTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType + 47, // 61: temporal.server.api.persistence.v1.TransferTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp + 38, // 62: temporal.server.api.persistence.v1.TransferTaskInfo.close_execution_task_details:type_name -> temporal.server.api.persistence.v1.TransferTaskInfo.CloseExecutionTaskDetails + 70, // 63: temporal.server.api.persistence.v1.TransferTaskInfo.chasm_task_info:type_name -> temporal.server.api.persistence.v1.ChasmTaskInfo + 69, // 64: temporal.server.api.persistence.v1.ReplicationTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType + 47, // 65: temporal.server.api.persistence.v1.ReplicationTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp + 71, // 66: temporal.server.api.persistence.v1.ReplicationTaskInfo.priority:type_name -> temporal.server.api.enums.v1.TaskPriority + 56, // 67: temporal.server.api.persistence.v1.ReplicationTaskInfo.versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 9, // 68: temporal.server.api.persistence.v1.ReplicationTaskInfo.task_equivalents:type_name -> temporal.server.api.persistence.v1.ReplicationTaskInfo + 72, // 69: temporal.server.api.persistence.v1.ReplicationTaskInfo.last_version_history_item:type_name -> temporal.server.api.history.v1.VersionHistoryItem + 69, // 70: temporal.server.api.persistence.v1.VisibilityTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType + 47, // 71: temporal.server.api.persistence.v1.VisibilityTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp + 47, // 72: temporal.server.api.persistence.v1.VisibilityTaskInfo.close_time:type_name -> google.protobuf.Timestamp + 47, // 73: temporal.server.api.persistence.v1.VisibilityTaskInfo.start_time:type_name -> google.protobuf.Timestamp + 70, // 74: temporal.server.api.persistence.v1.VisibilityTaskInfo.chasm_task_info:type_name -> temporal.server.api.persistence.v1.ChasmTaskInfo + 69, // 75: temporal.server.api.persistence.v1.TimerTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType + 62, // 76: temporal.server.api.persistence.v1.TimerTaskInfo.timeout_type:type_name -> temporal.api.enums.v1.TimeoutType + 73, // 77: temporal.server.api.persistence.v1.TimerTaskInfo.workflow_backoff_type:type_name -> temporal.server.api.enums.v1.WorkflowBackoffType + 47, // 78: temporal.server.api.persistence.v1.TimerTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp + 70, // 79: temporal.server.api.persistence.v1.TimerTaskInfo.chasm_task_info:type_name -> temporal.server.api.persistence.v1.ChasmTaskInfo + 69, // 80: temporal.server.api.persistence.v1.ArchivalTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType + 47, // 81: temporal.server.api.persistence.v1.ArchivalTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp + 69, // 82: temporal.server.api.persistence.v1.OutboundTaskInfo.task_type:type_name -> temporal.server.api.enums.v1.TaskType + 47, // 83: temporal.server.api.persistence.v1.OutboundTaskInfo.visibility_time:type_name -> google.protobuf.Timestamp + 74, // 84: temporal.server.api.persistence.v1.OutboundTaskInfo.state_machine_info:type_name -> temporal.server.api.persistence.v1.StateMachineTaskInfo + 70, // 85: temporal.server.api.persistence.v1.OutboundTaskInfo.chasm_task_info:type_name -> temporal.server.api.persistence.v1.ChasmTaskInfo + 14, // 86: temporal.server.api.persistence.v1.OutboundTaskInfo.worker_commands_task:type_name -> temporal.server.api.persistence.v1.WorkerCommandsTask + 75, // 87: temporal.server.api.persistence.v1.WorkerCommandsTask.commands:type_name -> temporal.api.worker.v1.WorkerCommand + 47, // 88: temporal.server.api.persistence.v1.ActivityInfo.scheduled_time:type_name -> google.protobuf.Timestamp + 47, // 89: temporal.server.api.persistence.v1.ActivityInfo.started_time:type_name -> google.protobuf.Timestamp + 48, // 90: temporal.server.api.persistence.v1.ActivityInfo.schedule_to_start_timeout:type_name -> google.protobuf.Duration + 48, // 91: temporal.server.api.persistence.v1.ActivityInfo.schedule_to_close_timeout:type_name -> google.protobuf.Duration + 48, // 92: temporal.server.api.persistence.v1.ActivityInfo.start_to_close_timeout:type_name -> google.protobuf.Duration + 48, // 93: temporal.server.api.persistence.v1.ActivityInfo.heartbeat_timeout:type_name -> google.protobuf.Duration + 48, // 94: temporal.server.api.persistence.v1.ActivityInfo.retry_initial_interval:type_name -> google.protobuf.Duration + 48, // 95: temporal.server.api.persistence.v1.ActivityInfo.retry_maximum_interval:type_name -> google.protobuf.Duration + 47, // 96: temporal.server.api.persistence.v1.ActivityInfo.retry_expiration_time:type_name -> google.protobuf.Timestamp + 76, // 97: temporal.server.api.persistence.v1.ActivityInfo.retry_last_failure:type_name -> temporal.api.failure.v1.Failure + 77, // 98: temporal.server.api.persistence.v1.ActivityInfo.last_heartbeat_details:type_name -> temporal.api.common.v1.Payloads + 47, // 99: temporal.server.api.persistence.v1.ActivityInfo.last_heartbeat_update_time:type_name -> google.protobuf.Timestamp + 78, // 100: temporal.server.api.persistence.v1.ActivityInfo.activity_type:type_name -> temporal.api.common.v1.ActivityType + 39, // 101: temporal.server.api.persistence.v1.ActivityInfo.use_workflow_build_id_info:type_name -> temporal.server.api.persistence.v1.ActivityInfo.UseWorkflowBuildIdInfo + 55, // 102: temporal.server.api.persistence.v1.ActivityInfo.last_worker_version_stamp:type_name -> temporal.api.common.v1.WorkerVersionStamp + 56, // 103: temporal.server.api.persistence.v1.ActivityInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 47, // 104: temporal.server.api.persistence.v1.ActivityInfo.first_scheduled_time:type_name -> google.protobuf.Timestamp + 47, // 105: temporal.server.api.persistence.v1.ActivityInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp + 79, // 106: temporal.server.api.persistence.v1.ActivityInfo.last_started_deployment:type_name -> temporal.api.deployment.v1.Deployment + 65, // 107: temporal.server.api.persistence.v1.ActivityInfo.last_deployment_version:type_name -> temporal.api.deployment.v1.WorkerDeploymentVersion + 60, // 108: temporal.server.api.persistence.v1.ActivityInfo.priority:type_name -> temporal.api.common.v1.Priority + 40, // 109: temporal.server.api.persistence.v1.ActivityInfo.pause_info:type_name -> temporal.server.api.persistence.v1.ActivityInfo.PauseInfo + 53, // 110: temporal.server.api.persistence.v1.ActivityInfo.started_clock:type_name -> temporal.server.api.clock.v1.VectorClock + 47, // 111: temporal.server.api.persistence.v1.TimerInfo.expiry_time:type_name -> google.protobuf.Timestamp + 56, // 112: temporal.server.api.persistence.v1.TimerInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 80, // 113: temporal.server.api.persistence.v1.ChildExecutionInfo.parent_close_policy:type_name -> temporal.api.enums.v1.ParentClosePolicy + 53, // 114: temporal.server.api.persistence.v1.ChildExecutionInfo.clock:type_name -> temporal.server.api.clock.v1.VectorClock + 56, // 115: temporal.server.api.persistence.v1.ChildExecutionInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 60, // 116: temporal.server.api.persistence.v1.ChildExecutionInfo.priority:type_name -> temporal.api.common.v1.Priority + 56, // 117: temporal.server.api.persistence.v1.RequestCancelInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 56, // 118: temporal.server.api.persistence.v1.SignalInfo.last_update_versioned_transition:type_name -> temporal.server.api.persistence.v1.VersionedTransition + 81, // 119: temporal.server.api.persistence.v1.Checksum.flavor:type_name -> temporal.server.api.enums.v1.ChecksumFlavor + 42, // 120: temporal.server.api.persistence.v1.Callback.nexus:type_name -> temporal.server.api.persistence.v1.Callback.Nexus + 43, // 121: temporal.server.api.persistence.v1.Callback.hsm:type_name -> temporal.server.api.persistence.v1.Callback.HSM + 82, // 122: temporal.server.api.persistence.v1.Callback.links:type_name -> temporal.api.common.v1.Link + 83, // 123: temporal.server.api.persistence.v1.HSMCompletionCallbackArg.last_event:type_name -> temporal.api.history.v1.HistoryEvent + 23, // 124: temporal.server.api.persistence.v1.CallbackInfo.callback:type_name -> temporal.server.api.persistence.v1.Callback + 46, // 125: temporal.server.api.persistence.v1.CallbackInfo.trigger:type_name -> temporal.server.api.persistence.v1.CallbackInfo.Trigger + 47, // 126: temporal.server.api.persistence.v1.CallbackInfo.registration_time:type_name -> google.protobuf.Timestamp + 84, // 127: temporal.server.api.persistence.v1.CallbackInfo.state:type_name -> temporal.server.api.enums.v1.CallbackState + 47, // 128: temporal.server.api.persistence.v1.CallbackInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp + 76, // 129: temporal.server.api.persistence.v1.CallbackInfo.last_attempt_failure:type_name -> temporal.api.failure.v1.Failure + 47, // 130: temporal.server.api.persistence.v1.CallbackInfo.next_attempt_schedule_time:type_name -> google.protobuf.Timestamp + 48, // 131: temporal.server.api.persistence.v1.NexusOperationInfo.schedule_to_close_timeout:type_name -> google.protobuf.Duration + 47, // 132: temporal.server.api.persistence.v1.NexusOperationInfo.scheduled_time:type_name -> google.protobuf.Timestamp + 85, // 133: temporal.server.api.persistence.v1.NexusOperationInfo.state:type_name -> temporal.server.api.enums.v1.NexusOperationState + 47, // 134: temporal.server.api.persistence.v1.NexusOperationInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp + 76, // 135: temporal.server.api.persistence.v1.NexusOperationInfo.last_attempt_failure:type_name -> temporal.api.failure.v1.Failure + 47, // 136: temporal.server.api.persistence.v1.NexusOperationInfo.next_attempt_schedule_time:type_name -> google.protobuf.Timestamp + 48, // 137: temporal.server.api.persistence.v1.NexusOperationInfo.schedule_to_start_timeout:type_name -> google.protobuf.Duration + 48, // 138: temporal.server.api.persistence.v1.NexusOperationInfo.start_to_close_timeout:type_name -> google.protobuf.Duration + 47, // 139: temporal.server.api.persistence.v1.NexusOperationInfo.started_time:type_name -> google.protobuf.Timestamp + 47, // 140: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.requested_time:type_name -> google.protobuf.Timestamp + 86, // 141: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.state:type_name -> temporal.api.enums.v1.NexusOperationCancellationState + 47, // 142: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.last_attempt_complete_time:type_name -> google.protobuf.Timestamp + 76, // 143: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.last_attempt_failure:type_name -> temporal.api.failure.v1.Failure + 47, // 144: temporal.server.api.persistence.v1.NexusOperationCancellationInfo.next_attempt_schedule_time:type_name -> google.protobuf.Timestamp + 47, // 145: temporal.server.api.persistence.v1.WorkflowPauseInfo.pause_time:type_name -> google.protobuf.Timestamp + 87, // 146: temporal.server.api.persistence.v1.ShardInfo.QueueStatesEntry.value:type_name -> temporal.server.api.persistence.v1.QueueState + 88, // 147: temporal.server.api.persistence.v1.WorkflowExecutionInfo.SearchAttributesEntry.value:type_name -> temporal.api.common.v1.Payload + 88, // 148: temporal.server.api.persistence.v1.WorkflowExecutionInfo.MemoEntry.value:type_name -> temporal.api.common.v1.Payload + 89, // 149: temporal.server.api.persistence.v1.WorkflowExecutionInfo.UpdateInfosEntry.value:type_name -> temporal.server.api.persistence.v1.UpdateInfo + 90, // 150: temporal.server.api.persistence.v1.WorkflowExecutionInfo.SubStateMachinesByTypeEntry.value:type_name -> temporal.server.api.persistence.v1.StateMachineMap + 28, // 151: temporal.server.api.persistence.v1.WorkflowExecutionInfo.ChildrenInitializedPostResetPointEntry.value:type_name -> temporal.server.api.persistence.v1.ResetChildInfo + 7, // 152: temporal.server.api.persistence.v1.WorkflowExecutionState.RequestIdsEntry.value:type_name -> temporal.server.api.persistence.v1.RequestIDInfo + 47, // 153: temporal.server.api.persistence.v1.ActivityInfo.PauseInfo.pause_time:type_name -> google.protobuf.Timestamp + 41, // 154: temporal.server.api.persistence.v1.ActivityInfo.PauseInfo.manual:type_name -> temporal.server.api.persistence.v1.ActivityInfo.PauseInfo.Manual + 44, // 155: temporal.server.api.persistence.v1.Callback.Nexus.header:type_name -> temporal.server.api.persistence.v1.Callback.Nexus.HeaderEntry + 91, // 156: temporal.server.api.persistence.v1.Callback.HSM.ref:type_name -> temporal.server.api.persistence.v1.StateMachineRef + 45, // 157: temporal.server.api.persistence.v1.CallbackInfo.Trigger.workflow_closed:type_name -> temporal.server.api.persistence.v1.CallbackInfo.WorkflowClosed + 158, // [158:158] is the sub-list for method output_type + 158, // [158:158] is the sub-list for method input_type + 158, // [158:158] is the sub-list for extension type_name + 158, // [158:158] is the sub-list for extension extendee + 0, // [0:158] is the sub-list for field type_name } func init() { file_temporal_server_api_persistence_v1_executions_proto_init() } diff --git a/proto/internal/temporal/server/api/persistence/v1/executions.proto b/proto/internal/temporal/server/api/persistence/v1/executions.proto index a729bafa62..753331192e 100644 --- a/proto/internal/temporal/server/api/persistence/v1/executions.proto +++ b/proto/internal/temporal/server/api/persistence/v1/executions.proto @@ -328,10 +328,18 @@ message TimeSkippingInfo { temporal.api.common.v1.TimeSkippingConfig config = 1; // Total skipped duration for the current workflow execution run, including any - ./* inherited skipped duration carried over from a preceding execution that started this run. */google.protobuf.Duration accumulated_skipped_duration = 2; + // inherited skipped duration carried over from a preceding execution that started this run. + google.protobuf.Duration accumulated_skipped_duration = 2; // The current fast-forward info for time skipping. FastForwardInfo fast_forward_info = 4; + + // Versioned transition at which this TimeSkippingInfo was last modified (i.e. when a + // skip transition changed accumulated_skipped_duration). Used by PartialRefresh to detect + // that pending timer tasks must be re-stamped against the new accumulated skip, since a + // skip mutates this workflow-level field without bumping any per-timer + // last_update_versioned_transition. Mirrors the per-entity stamps on TimerInfo/ActivityInfo. + VersionedTransition last_update_versioned_transition = 5; } message FastForwardInfo { diff --git a/service/history/timer_queue_standby_task_executor.go b/service/history/timer_queue_standby_task_executor.go index 560e0c2d6f..0aec256735 100644 --- a/service/history/timer_queue_standby_task_executor.go +++ b/service/history/timer_queue_standby_task_executor.go @@ -110,9 +110,7 @@ func (t *timerQueueStandbyTaskExecutor) Execute( case *tasks.ChasmTask: err = t.executeChasmSideEffectTimerTask(ctx, task) case *tasks.TimeSkippingTimerTask: - // todo@time-skipping: replication. The disable-after-fast-forward transition is emitted - // on the active side and will replicate; standby drops the local task. - err = nil + err = t.executeTimeSkippingTimerTask(ctx, task) default: err = queueserrors.NewUnprocessableTaskError("unknown task type") } @@ -231,17 +229,53 @@ func (t *timerQueueStandbyTaskExecutor) discardChasmTask( ) } +// executeTimeSkippingTimerTask waits on the standby until the active cluster +// replicates the fast-forward transition. If the fast-forward this task was +// generated for is still pending (same source event and not yet reached), the +// task is retried until the discard delay elapses; otherwise it is acked. +func (t *timerQueueStandbyTaskExecutor) executeTimeSkippingTimerTask( + ctx context.Context, + timerTask *tasks.TimeSkippingTimerTask, +) error { + actionFn := func(_ context.Context, wfContext historyi.WorkflowContext, mutableState historyi.MutableState, _ historyi.ReleaseWorkflowContextFunc) (any, error) { + if !mutableState.IsWorkflowExecutionRunning() { + return nil, nil + } + tsi := mutableState.GetExecutionInfo().GetTimeSkippingInfo() + ffi := tsi.GetFastForwardInfo() + + // the fast-forward this timer task is associated with is still valid and has not been reached so keep waiting + if ffi != nil && ffi.GetSourceEventId() == timerTask.EventID && !ffi.GetHasReached() { + return &struct{}{}, nil + } + return nil, nil + } + + return t.processTimer( + ctx, + timerTask, + actionFn, + getStandbyPostActionFn( + timerTask, + t.getCurrentTime, + t.config.StandbyTaskMissingEventsDiscardDelay(timerTask.GetType()), + t.checkExecutionStillExistsOnSourceBeforeDiscard, + ), + ) +} + func (t *timerQueueStandbyTaskExecutor) executeUserTimerTimeoutTask( ctx context.Context, timerTask *tasks.UserTimerTask, ) error { - referenceTime := t.Now() actionFn := func(_ context.Context, wfContext historyi.WorkflowContext, mutableState historyi.MutableState, _ historyi.ReleaseWorkflowContextFunc) (any, error) { if !mutableState.IsWorkflowExecutionRunning() { // workflow already finished, no need to process the timer return nil, nil } + referenceTime := mutableState.Now() + timerSequence := t.getTimerSequence(mutableState) timerSequenceIDs := timerSequence.LoadAndSortUserTimers() if len(timerSequenceIDs) > 0 { @@ -253,6 +287,10 @@ func (t *timerQueueStandbyTaskExecutor) executeUserTimerTimeoutTask( return nil, serviceerror.NewInternal(errString) } + // Use mutableState.Now() as reference time as a mutable state may use virtual time + // which can skip duration and be before the wall clock time. + // And when this happens the timerSequenceID.Timestamp is also virtual time and before the wall clock time, + // while the timerTask.VisibilityTimestamp uses the wall clock time that maps to the virtual time. if queues.IsTimeExpired( timerTask, referenceTime, @@ -295,13 +333,14 @@ func (t *timerQueueStandbyTaskExecutor) executeActivityTimeoutTask( // // the overall solution is to attempt to generate a new activity timer task whenever the // task passed in is safe to be throw away. - referenceTime := t.Now() actionFn := func(ctx context.Context, wfContext historyi.WorkflowContext, mutableState historyi.MutableState, _ historyi.ReleaseWorkflowContextFunc) (any, error) { if !mutableState.IsWorkflowExecutionRunning() { // workflow already finished, no need to process the timer return nil, nil } + referenceTime := mutableState.Now() + timerSequence := t.getTimerSequence(mutableState) updateMutableState := false timerSequenceIDs := timerSequence.LoadAndSortActivityTimers() @@ -314,6 +353,10 @@ func (t *timerQueueStandbyTaskExecutor) executeActivityTimeoutTask( return nil, serviceerror.NewInternal(errString) } + // Use mutableState.Now() as reference time as a mutable state may use virtual time + // which can skip duration and be before the wall clock time. + // And when this happens the timerSequenceID.Timestamp is also virtual time and before the wall clock time, + // while the timerTask.VisibilityTimestamp uses the wall clock time that maps to the virtual time. if queues.IsTimeExpired( timerTask, referenceTime, @@ -336,6 +379,7 @@ func (t *timerQueueStandbyTaskExecutor) executeActivityTimeoutTask( // created. isHeartBeatTask := timerTask.TimeoutType == enumspb.TIMEOUT_TYPE_HEARTBEAT ai, heartbeatTimeoutVis, ok := mutableState.GetActivityInfoWithTimerHeartbeat(timerTask.EventID) + if isHeartBeatTask && ok && queues.IsTimeExpired(timerTask, timerTask.GetVisibilityTime(), mutableState.ToRealTime(heartbeatTimeoutVis)) { if err := mutableState.UpdateActivityTaskStatusWithTimerHeartbeat(ai.ScheduledEventId, ai.TimerTaskStatus&^workflow.TimerTaskStatusCreatedHeartbeat, nil); err != nil { return nil, err @@ -774,6 +818,12 @@ func (t *timerQueueStandbyTaskExecutor) pushActivity( ) } +// getCurrentTime returns the shard's wall-clock view of "now" for t.clusterName. +// Must stay wall-clock: it gates standby task-retry timing against VisibilityTime +// (also wall-clock); mutableState.Now() is virtual time and would force-discard +// time-skipping workflows. actionFn closures compare against virtual timestamps, +// so they use mutableState.Now() instead. +// // TODO: deprecate this function and always use t.Now() // Only test code sets t.clusterName to be non-current cluster name // and advance the time by setting calling shardContext.SetCurrentTime. diff --git a/service/history/timer_queue_standby_task_executor_test.go b/service/history/timer_queue_standby_task_executor_test.go index 18239bdfdc..ebc43f7c20 100644 --- a/service/history/timer_queue_standby_task_executor_test.go +++ b/service/history/timer_queue_standby_task_executor_test.go @@ -2459,6 +2459,109 @@ func (s *timerQueueStandbyTaskExecutorSuite) TestExecuteChasmPureTimerTask_Valid s.ErrorIs(expectedErr, resp.ExecutionErr) } +// makeTimeSkippingMS builds a running mutable state, snapshots it to a persistence proto, +// and returns the persistence proto plus the workflow key. The caller can mutate the returned +// ExecutionInfo (e.g. set TimeSkippingInfo) before programming GetWorkflowExecution. +func (s *timerQueueStandbyTaskExecutorSuite) makeTimeSkippingMS() (*persistencespb.WorkflowMutableState, definition.WorkflowKey) { + execution := &commonpb.WorkflowExecution{ + WorkflowId: "ts-bound-wf-" + uuid.NewString(), + RunId: uuid.NewString(), + } + workflowKey := definition.NewWorkflowKey(s.namespaceID.String(), execution.GetWorkflowId(), execution.GetRunId()) + + mutableState := workflow.TestGlobalMutableState( + s.mockShard, s.mockShard.GetEventsCache(), s.logger, s.version, execution.GetWorkflowId(), execution.GetRunId()) + event, err := mutableState.AddWorkflowExecutionStartedEvent( + execution, + &historyservice.StartWorkflowExecutionRequest{ + Attempt: 1, + NamespaceId: s.namespaceID.String(), + StartRequest: &workflowservice.StartWorkflowExecutionRequest{ + WorkflowType: &commonpb.WorkflowType{Name: "test-wf-type"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: "test-tq"}, + WorkflowRunTimeout: durationpb.New(200 * time.Second), + WorkflowTaskTimeout: durationpb.New(1 * time.Second), + }, + }, + ) + s.NoError(err) + + pms := s.createPersistenceMutableState(mutableState, event.GetEventId(), event.GetVersion()) + return pms, workflowKey +} + +// makeTimeSkippingPendingMS builds an MS that puts the standby's action function on +// the "still waiting" path: fast-forward matches the task's source event and HasReached=false. +func (s *timerQueueStandbyTaskExecutorSuite) makeTimeSkippingPendingMS() (*persistencespb.WorkflowMutableState, definition.WorkflowKey) { + pms, workflowKey := s.makeTimeSkippingMS() + pms.ExecutionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(time.Hour), + }, + FastForwardInfo: &persistencespb.FastForwardInfo{ + TargetTime: timestamppb.New(s.now.Add(time.Hour)), + SourceEventId: 1, + }, + } + return pms, workflowKey +} + +func (s *timerQueueStandbyTaskExecutorSuite) TestExecuteTimeSkippingTimerTask_Wait() { + pms, workflowKey := s.makeTimeSkippingPendingMS() + + timerTask := &tasks.TimeSkippingTimerTask{ + WorkflowKey: workflowKey, + TaskID: s.mustGenerateTaskID(), + VisibilityTimestamp: s.now, + EventID: 1, + } + s.mockExecutionMgr.EXPECT().GetWorkflowExecution(gomock.Any(), gomock.Any()). + Return(&persistence.GetWorkflowExecutionResponse{State: pms}, nil) + + s.mockShard.SetCurrentTime(s.clusterName, s.now) + resp := s.timerQueueStandbyTaskExecutor.Execute(context.Background(), s.newTaskExecutable(timerTask)) + s.Equal(consts.ErrTaskRetry, resp.ExecutionErr) +} + +func (s *timerQueueStandbyTaskExecutorSuite) TestExecuteTimeSkippingTimerTask_Ack() { + // HasReached=true: active side already replicated the disable transition, + // so the standby's action function returns nil and the task is acked. + pms, workflowKey := s.makeTimeSkippingPendingMS() + pms.ExecutionInfo.TimeSkippingInfo.FastForwardInfo.HasReached = true + + timerTask := &tasks.TimeSkippingTimerTask{ + WorkflowKey: workflowKey, + TaskID: s.mustGenerateTaskID(), + VisibilityTimestamp: s.now.Add(time.Hour), + EventID: 1, + } + s.mockExecutionMgr.EXPECT().GetWorkflowExecution(gomock.Any(), gomock.Any()). + Return(&persistence.GetWorkflowExecutionResponse{State: pms}, nil) + + s.mockShard.SetCurrentTime(s.clusterName, s.now) + resp := s.timerQueueStandbyTaskExecutor.Execute(context.Background(), s.newTaskExecutable(timerTask)) + s.NoError(resp.ExecutionErr) +} + +func (s *timerQueueStandbyTaskExecutorSuite) TestExecuteTimeSkippingTimerTask_Discard() { + pms, workflowKey := s.makeTimeSkippingPendingMS() + + timerTask := &tasks.TimeSkippingTimerTask{ + WorkflowKey: workflowKey, + TaskID: s.mustGenerateTaskID(), + VisibilityTimestamp: s.now, + EventID: 1, + } + s.mockExecutionMgr.EXPECT().GetWorkflowExecution(gomock.Any(), gomock.Any()). + Return(&persistence.GetWorkflowExecutionResponse{State: pms}, nil) + + // Past VisibilityTime + discardDelay: ErrTaskDiscarded. + s.mockShard.SetCurrentTime(s.clusterName, s.now.Add(s.discardDuration)) + resp := s.timerQueueStandbyTaskExecutor.Execute(context.Background(), s.newTaskExecutable(timerTask)) + s.Equal(consts.ErrTaskDiscarded, resp.ExecutionErr) +} + func (s *timerQueueStandbyTaskExecutorSuite) createPersistenceMutableState( ms historyi.MutableState, lastEventID int64, diff --git a/service/history/workflow/mutable_state_impl.go b/service/history/workflow/mutable_state_impl.go index 997b33bb39..a5edbb60c2 100644 --- a/service/history/workflow/mutable_state_impl.go +++ b/service/history/workflow/mutable_state_impl.go @@ -65,7 +65,6 @@ import ( "go.temporal.io/server/common/util" "go.temporal.io/server/common/worker_versioning" "go.temporal.io/server/components/callbacks" - "go.temporal.io/server/components/nexusoperations" "go.temporal.io/server/service/history/configs" "go.temporal.io/server/service/history/consts" "go.temporal.io/server/service/history/events" @@ -4164,27 +4163,24 @@ func (ms *MutableStateImpl) ApplyWorkflowExecutionTimeSkippingTransitionedEvent( attr := event.GetWorkflowExecutionTimeSkippingTransitionedEventAttributes() tsi := ms.executionInfo.GetTimeSkippingInfo() + opTag := tag.WorkflowActionWorkflowExecutionTimeSkippingTransitioned + invalidTransitionError := serviceerror.NewInternal("TimeSkippingTransitionedEvent failed to apply") if tsi == nil { - return serviceerror.NewInternal( - "TimeSkippingInfo is not set when applying WorkflowExecutionTimeSkippingTransitionedEvent, mutable state is corrupted", - ) + ms.logError("TimeSkippingTransitionedEvent failed to apply: TimeSkippingInfo is nil", opTag) + return invalidTransitionError } if attr.TargetTime == nil && !attr.GetDisabledAfterFastForward() { - return serviceerror.NewInternal( - "empty WorkflowExecutionTimeSkippingTransitionedEvent found, event is corrupted", - ) + ms.logError("TimeSkippingTransitionedEvent failed to apply: TargetTime is nil and disabled after fast-forward is false", opTag) + return invalidTransitionError } - if tsi.GetAccumulatedSkippedDuration() == nil { - tsi.AccumulatedSkippedDuration = durationpb.New(0) - } - accumulatedSkippedDuration := tsi.GetAccumulatedSkippedDuration().AsDuration() + // update time if !timeNotSet(attr.TargetTime) { - accumulatedSkippedDuration += attr.TargetTime.AsTime().Sub(event.GetEventTime().AsTime()) + asd := ms.accumulatedSkippedDuration() + attr.TargetTime.AsTime().Sub(event.GetEventTime().AsTime()) + tsi.AccumulatedSkippedDuration = durationpb.New(asd) } - tsi.AccumulatedSkippedDuration = durationpb.New(accumulatedSkippedDuration) + // update enabled state tsi.Config.Enabled = !attr.GetDisabledAfterFastForward() - if attr.GetDisabledAfterFastForward() && tsi.GetFastForwardInfo() != nil { tsi.FastForwardInfo.HasReached = true } @@ -7925,6 +7921,10 @@ func (ms *MutableStateImpl) closeTransactionTrackLastUpdateVersionedTransition( ms.executionState.LastUpdateVersionedTransition = currentVersionedTransition } + if ms.timeSkippingInfoUpdated && ms.executionInfo.TimeSkippingInfo != nil { + ms.executionInfo.TimeSkippingInfo.LastUpdateVersionedTransition = currentVersionedTransition + } + // LastUpdateVersionTransition for HSM nodes already updated when transitioning the nodes. // LastUpdateVersionTransition for CHASM nodes already updated when closing the chasm tree transaction. } @@ -8898,61 +8898,6 @@ func (ms *MutableStateImpl) closeTransactionHandleActivityUserTimerTasks( } } -func (ms *MutableStateImpl) closeTransactionHandleTimeSkipping( - ctx context.Context, - transactionPolicy historyi.TransactionPolicy, -) (regenTimerTasksForTimeSkipping bool) { - switch transactionPolicy { - case historyi.TransactionPolicyActive: - if !ms.IsWorkflowExecutionRunning() { - return false - } - if shouldExecute, transition := ms.shouldExecuteTimeSkipping(); shouldExecute { - _, err := ms.AddWorkflowExecutionTimeSkippingTransitionedEvent( - ctx, transition.targetTime, transition.disabledAfterFastForward) - if err != nil { - ms.metricsHandler.Counter(metrics.ExecutionTimeSkippingTransitionedErrorCounter.Name()).Record(1) - ms.logger.Error( - "failed to add workflow execution time skipping transitioned event, and ignore this error and continue", - tag.WorkflowID(ms.GetExecutionInfo().WorkflowId), - tag.WorkflowRunID(ms.GetExecutionState().RunId), - tag.Error(err), - ) - return false - } - if transition.targetTime.IsZero() { - return false - } - return true - } - return false - case historyi.TransactionPolicyPassive: - return false - default: - ms.logger.Error(fmt.Sprintf("closeTransactionHandleTimeSkipping: unknown transaction policy: %v", transactionPolicy), - tag.WorkflowID(ms.GetExecutionInfo().WorkflowId), - tag.WorkflowRunID(ms.GetExecutionState().RunId), - ) - return false - } -} - -func (ms *MutableStateImpl) closeTransactionRegenerateTimerTasksForTimeSkipping( - transactionPolicy historyi.TransactionPolicy, -) error { - switch transactionPolicy { - case historyi.TransactionPolicyActive: - if !ms.IsWorkflowExecutionRunning() { - return nil - } - return ms.taskGenerator.RegenerateTimerTasksForTimeSkipping() - case historyi.TransactionPolicyPassive: - return nil - default: - return serviceerror.NewInternalf("unknown transaction policy: %v", transactionPolicy) - } -} - // Visibility tasks are collapsed into a single one: START < UPSERT < CLOSE < DELETE // Their enum values are already in order, so using them to make the code simpler. // Any other task type is preserved in order. @@ -9972,276 +9917,6 @@ func logError( logger.Error(msg, tags...) } -func (ms *MutableStateImpl) shiftWorkflowTimes(initialSkippedDuration *durationpb.Duration) { - if initialSkippedDuration == nil || initialSkippedDuration.AsDuration() == 0 { - return - } - accum := initialSkippedDuration.AsDuration() - if !timeNotSet(ms.executionState.StartTime) { - ms.executionState.StartTime = timestamppb.New(ms.executionState.StartTime.AsTime().Add(accum)) - } - if !timeNotSet(ms.executionInfo.StartTime) { - ms.executionInfo.StartTime = timestamppb.New(ms.executionInfo.StartTime.AsTime().Add(accum)) - } - if !timeNotSet(ms.executionInfo.ExecutionTime) { - ms.executionInfo.ExecutionTime = timestamppb.New(ms.executionInfo.ExecutionTime.AsTime().Add(accum)) - } - if !timeNotSet(ms.executionInfo.WorkflowRunExpirationTime) { - ms.executionInfo.WorkflowRunExpirationTime = timestamppb.New(ms.executionInfo.WorkflowRunExpirationTime.AsTime().Add(accum)) - } - if !timeNotSet(ms.executionInfo.WorkflowExecutionExpirationTime) { - ms.executionInfo.WorkflowExecutionExpirationTime = timestamppb.New(ms.executionInfo.WorkflowExecutionExpirationTime.AsTime().Add(accum)) - } -} - -func (ms *MutableStateImpl) initTimeSkippingInfo( - config *commonpb.TimeSkippingConfig, - timeSkippingStatePropagation *commonpb.TimeSkippingStatePropagation, - currentEventID int64, -) { - // we only need to init time skipping info if - // either config is not nil or it has initial skip - initialSkip := timeSkippingStatePropagation.GetInitialSkippedDuration() - if config == nil && initialSkip == nil { - return - } - ms.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: config, - AccumulatedSkippedDuration: initialSkip, - } - ms.wrapTimeSourceWithTimeSkipping() - ms.shiftWorkflowTimes(initialSkip) - ms.applyFastForward(currentEventID, timeSkippingStatePropagation.GetFastForwardTargetTime()) - ms.timeSkippingInfoUpdated = true -} - -// updateTimeSkippingInfo updates the time skipping info with -// with new config and the event ID that updates the config -// we allow updating the config to nil when users want to remove the TSC -func (ms *MutableStateImpl) updateTimeSkippingInfo( - config *commonpb.TimeSkippingConfig, - currentEventID int64, -) { - ms.executionInfo.TimeSkippingInfo.Config = config - // Options update: the new ff duration is a fresh budget measured from now. - ms.applyFastForward(currentEventID, nil) - ms.timeSkippingInfoUpdated = true -} - -// applyFastForward (re)computes the FastForwardInfo using the new TimeSkippingConfig (TSC) and propagated time-skippingstates. -// This method should be called whenever the TimeSkippingConfig is initialized or updated. -// An invariant of the FastForwardInfo is that after this method is called, if the current TSC has a FastForward value, -// the FastForwardInfo should never be nil. -func (ms *MutableStateImpl) applyFastForward(currentEventID int64, propagatedTargetTime *timestamppb.Timestamp) { - - tsc := ms.GetExecutionInfo().GetTimeSkippingInfo().GetConfig() - tsi := ms.executionInfo.TimeSkippingInfo - - // clear fast forward if disabled or zero max_elapsed_duration - if !tsc.GetEnabled() || tsc.GetFastForward().AsDuration() <= 0 { - if tsi.FastForwardInfo != nil { - tsi.FastForwardInfo = nil - } - return - } - - var targetTime time.Time - if propagatedTargetTime != nil { - targetTime = propagatedTargetTime.AsTime() - } else { - // if there is no propagated target time, - // fast-forward refers to a new duration from now. - targetTime = ms.Now().Add(tsc.GetFastForward().AsDuration()) - } - - // always install a fresh fast-forward bound - tsi.FastForwardInfo = &persistencespb.FastForwardInfo{ - TargetTime: timestamppb.New(targetTime), - SourceEventId: currentEventID, - HasReached: false, - } - ms.AddTasks(&tasks.TimeSkippingTimerTask{ - WorkflowKey: ms.GetWorkflowKey(), - VisibilityTimestamp: targetTime, - EventID: currentEventID, - }) -} - -// wrapTimeSourceWithTimeSkipping wraps ms.timeSource (and the hBuilder's copy) with a time-skipping -// wrapper. The closure captures ms so the offset tracks ms.executionInfo.TimeSkippingInfo as it -// evolves — no need to re-wrap when TimeSkippingInfo is created or replaced. Called once per MS -// lifetime from the constructors; the type-assertion guard makes any repeat call a no-op. -func (ms *MutableStateImpl) wrapTimeSourceWithTimeSkipping() { - if _, ok := ms.timeSource.(*clock.TimeSkippingTimeSourceWrapper); ok { - return - } - ms.timeSource = clock.WrapTimeSourceWithTimeSkipping( - ms.timeSource, ms.accumulatedSkippedDuration) - ms.hBuilder.SetTimeSource(ms.timeSource) -} - -func (ms *MutableStateImpl) hasInflightWorkToPreventTimeSkipping() (bool, string) { - // HasPendingWorkflowTask covers both normal and speculative workflow tasks - if ms.HasPendingWorkflowTask() { - return true, "has pending workflow task" - } - // A pending activity blocks time skipping unless it has failed and is still - // waiting out its retry backoff (next attempt strictly in the future) — that one - // is a skip target, not in-flight work (see calculateTimeSkippingTransition). The - // strict future check is what keeps a just-scheduled or already-due activity (next - // attempt <= now) blocking. - for _, ai := range ms.GetPendingActivityInfos() { - // if this activity is just a retry with backoff scheduled in the future - if activityPendingRetry(ai) && ms.Now().Before(ai.GetScheduledTime().AsTime()) { - continue - } - return true, "has pending activity" - } - if nexusoperations.MachineCollection(ms.HSM()).Size() > 0 { - return true, "has pending nexus operations" - } - if len(ms.GetPendingChildExecutionInfos()) > 0 { - return true, "has pending child execution" - } - if len(ms.GetPendingSignalExternalInfos()) > 0 { - return true, "has pending signal external" - } - if len(ms.GetPendingRequestCancelExternalInfos()) > 0 { - return true, "has pending request cancel external" - } - return false, "" -} - -// ShouldExecuteTimeSkipping checks if one mutable state should execute time skipping, -// i.e. there is no in-flight work and there is a time point to skip to. -func (ms *MutableStateImpl) shouldExecuteTimeSkipping() (bool, *timeSkippingTransition) { - // configuration check - tsi := ms.GetExecutionInfo().GetTimeSkippingInfo() - if tsi == nil { - return false, nil - } - config := tsi.GetConfig() - if config == nil || !config.Enabled { - return false, nil - } - - // runtime check - noSkippingReason := "" - defer func() { - if noSkippingReason != "" { - ms.logger.Debug(fmt.Sprintf("time skipping skipped for: %s", noSkippingReason), - tag.WorkflowID(ms.GetExecutionInfo().WorkflowId), - tag.WorkflowRunID(ms.GetExecutionState().RunId), - ) - } - }() - if !ms.IsWorkflowExecutionRunning() { - noSkippingReason = "workflow is not running" - return false, nil - } - if ms.IsWorkflowExecutionStatusPaused() { - noSkippingReason = "workflow is paused" - return false, nil - } - if hasPendingWork, detailedReason := ms.hasInflightWorkToPreventTimeSkipping(); hasPendingWork { - noSkippingReason = fmt.Sprintf("pending work: %s", detailedReason) - return false, nil - } - - // Compute the transition early so we can short-circuit before allocating an event. - // todo(@time-skipping): replace error with nil - transition, err := ms.calculateTimeSkippingTransition() - if err != nil { - noSkippingReason = fmt.Sprintf("error calculating time skipping decision: %v", err) - ms.logger.Error( - "error calculating time skipping decision, and ignore this error and continue", - tag.WorkflowID(ms.GetExecutionInfo().WorkflowId), - tag.WorkflowRunID(ms.GetExecutionState().RunId), - tag.Error(err), - ) - return false, nil - } - if !transition.isValid() { - noSkippingReason = "time skipping has no candidate target time nor disabled after fast-forward flag" - return false, nil - } - return true, &transition -} - -type timeSkippingTransition struct { - targetTime time.Time - disabledAfterFastForward bool -} - -func (d timeSkippingTransition) isValid() bool { - return !d.targetTime.IsZero() || d.disabledAfterFastForward -} - -// calculateTimeSkippingTransition determines the next skip target. -// Candidates (in collection order): pending user timers, activity retry backoffs, -// workflow start-with-delay/CaN/retry backoff, and the fast-forward. -// The run/execution timeout is NOT a standalone candidate — it only applies as -// a cap: if any candidate wins, the skip target is clamped to min(target, -// runExpiry, execExpiry). This ensures we never advance virtual time past the -// workflow timeout, even when a user timer or fast-forward would otherwise overshoot. -func (ms *MutableStateImpl) calculateTimeSkippingTransition() (timeSkippingTransition, error) { - var transition timeSkippingTransition - advance := func(candidate time.Time, dueToFastForward bool) { - if transition.targetTime.IsZero() || candidate.Before(transition.targetTime) { - transition.targetTime = candidate - transition.disabledAfterFastForward = dueToFastForward - } - } - - for _, timerInfo := range ms.GetPendingTimerInfos() { - advance(timerInfo.ExpiryTime.AsTime(), false) - } - - // Activities waiting out a retry backoff are skip targets: advance to the earliest - // next-attempt time. No clock comparison is needed here — the idle check already - // guarantees each pending activity's next attempt is in the future when we get here. - for _, ai := range ms.GetPendingActivityInfos() { - if activityPendingRetry(ai) && ms.Now().Before(ai.GetScheduledTime().AsTime()) { - advance(ai.ScheduledTime.AsTime(), false) - } - } - - if !ms.HadOrHasWorkflowTask() { - // Support start-with-delay, cron, retry, and CaN-with-backoff: the workflow is - // waiting on a WorkflowBackoffTimerTask. Two extra checks are needed: - // - ExecutionTime > StartTime: a backoff is actually configured (FirstWorkflowTaskBackoff > 0). - // For child workflows, !HadOrHasWorkflowTask is also true between "start event applied" - // and "ScheduleWorkflowTask API call" but no backoff exists, so ExecutionTime == StartTime. - // - ExecutionTime > ms.Now(): the candidate is in the (virtual) future. Defends against - // CaN-with-backoff that inherits accumulated > backoff — past candidates would produce - // a negative delta in ApplyWorkflowExecutionTimeSkippingTransitionedEvent and decrement accumulated. - executionTime := ms.executionInfo.GetExecutionTime().AsTime() - startTime := ms.executionInfo.GetStartTime().AsTime() - if executionTime.After(startTime) && executionTime.After(ms.Now()) { - advance(executionTime, false) - } - } - - tsi := ms.GetExecutionInfo().GetTimeSkippingInfo() - if !tsi.GetFastForwardInfo().GetHasReached() && tsi.GetFastForwardInfo().GetTargetTime() != nil { - advance(tsi.GetFastForwardInfo().GetTargetTime().AsTime(), true) - } - - // Cap any skip target at the run/execution timeout: never advance virtual time past - // them. Timeouts alone do not create a skip target — only existing candidates - // (timers, backoffs, fast-forward) do. This also handles the case where a user - // timer fires past the workflow timeout: we cap the skip so the timeout fires on schedule. - if !transition.targetTime.IsZero() { - if t := ms.executionInfo.GetWorkflowRunExpirationTime(); t != nil && !t.AsTime().IsZero() { - advance(t.AsTime(), false) - } - if t := ms.executionInfo.GetWorkflowExecutionExpirationTime(); t != nil && !t.AsTime().IsZero() { - advance(t.AsTime(), false) - } - } - return transition, nil -} - func (ms *MutableStateImpl) ToRealTime(virtualTime time.Time) time.Time { if virtualTime.IsZero() { return virtualTime diff --git a/service/history/workflow/mutable_state_impl_test.go b/service/history/workflow/mutable_state_impl_test.go index 103ab1400c..e88fc87bfc 100644 --- a/service/history/workflow/mutable_state_impl_test.go +++ b/service/history/workflow/mutable_state_impl_test.go @@ -6582,487 +6582,6 @@ func (s *mutableStateSuite) TestCloseTransaction_PrincipalPreserved() { s.Equal("bob", principalBySignalName["signal-from-bob"], "bob's signal should retain his principal") } -func (s *mutableStateSuite) TestHasInflightWorkToPreventTimeSkipping() { - // Each s.Run() gets a fresh mutable state via SetupSubTest(). - - s.Run("FalseWhenNoPendingWork", func() { - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.False(hasPendingWork) - s.Empty(reason) - }) - - s.Run("TrueWhenPendingWorkflowTask", func() { - s.mutableState.executionInfo.WorkflowTaskScheduledEventId = 1 - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending workflow task", reason) - }) - - s.Run("TrueWhenPendingActivity", func() { - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{} - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending activity", reason) - }) - - s.Run("FalseWhenPendingActivityInRetryBackoff", func() { - now := s.mutableState.Now() - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, - HasRetryPolicy: true, - Attempt: 2, - ScheduledTime: timestamppb.New(now.Add(time.Hour)), - } - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.False(hasPendingWork) - s.Empty(reason) - }) - - s.Run("TrueWhenActivityStarted", func() { - now := s.mutableState.Now() - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, - HasRetryPolicy: true, - Attempt: 2, - ScheduledTime: timestamppb.New(now.Add(time.Hour)), - StartedEventId: 10, - } - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending activity", reason) - }) - - // A running activity that cannot be retried (no retry policy) must still block: - // the STARTED state short-circuits before the retry-policy check. - s.Run("TrueWhenActivityStartedNotRetryable", func() { - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, - HasRetryPolicy: false, - Attempt: 1, - StartedEventId: 10, - } - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending activity", reason) - }) - - // A first-attempt scheduled activity that has not failed yet (attempt 1) is not - // in backoff and must block even if it has a retry policy. - s.Run("TrueWhenActivityFirstAttemptScheduled", func() { - now := s.mutableState.Now() - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, - HasRetryPolicy: true, - Attempt: 1, - ScheduledTime: timestamppb.New(now.Add(time.Hour)), - } - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending activity", reason) - }) - - s.Run("TrueWhenActivityPausedInBackoff", func() { - now := s.mutableState.Now() - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, - HasRetryPolicy: true, - Attempt: 2, - ScheduledTime: timestamppb.New(now.Add(time.Hour)), - Paused: true, - } - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending activity", reason) - }) - - s.Run("TrueWhenActivityScheduledNow", func() { - now := s.mutableState.Now() - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, - HasRetryPolicy: true, - ScheduledTime: timestamppb.New(now.Add(-time.Hour)), - } - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending activity", reason) - }) - - s.Run("TrueWhenPendingChildExecution", func() { - s.mutableState.pendingChildExecutionInfoIDs[1] = &persistencespb.ChildExecutionInfo{} - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending child execution", reason) - }) - - s.Run("TrueWhenPendingNexusOperation", func() { - _, err := nexusoperations.AddChild(s.mutableState.HSM(), "op-1", &historypb.HistoryEvent{ - EventTime: timestamppb.Now(), - Attributes: &historypb.HistoryEvent_NexusOperationScheduledEventAttributes{ - NexusOperationScheduledEventAttributes: &historypb.NexusOperationScheduledEventAttributes{}, - }, - }, []byte("token")) - s.Require().NoError(err) - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending nexus operations", reason) - }) - - s.Run("TrueWhenPendingSignalExternal", func() { - s.mutableState.pendingSignalInfoIDs[1] = &persistencespb.SignalInfo{} - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending signal external", reason) - }) - - s.Run("TrueWhenPendingRequestCancelExternal", func() { - s.mutableState.pendingRequestCancelInfoIDs[1] = &persistencespb.RequestCancelInfo{} - hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() - s.True(hasPendingWork) - s.Equal("has pending request cancel external", reason) - }) - -} - -func (s *mutableStateSuite) TestShouldExecuteTimeSkipping() { - // Each s.Run() gets a fresh mutable state via SetupSubTest(). - // The default state is RUNNING with no pending work. - - s.Run("FalseWhenTimeSkippingInfoNil", func() { - s.mutableState.executionInfo.TimeSkippingInfo = nil - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("FalseWhenConfigNil", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{Config: nil} - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("FalseWhenConfigDisabled", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: false}, - } - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("FalseWhenWorkflowNotRunning", func() { - s.mutableState.executionState.State = enumsspb.WORKFLOW_EXECUTION_STATE_COMPLETED - s.mutableState.executionState.Status = enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("FalseWhenPendingWorkflowTask", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.mutableState.executionInfo.WorkflowTaskScheduledEventId = 1 - s.True(s.mutableState.HasPendingWorkflowTask()) - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("FalseWhenPendingActivity", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{} - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("FalseWhenPendingChildExecution", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.mutableState.pendingChildExecutionInfoIDs[1] = &persistencespb.ChildExecutionInfo{} - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("FalseWhenPendingNexusOperation", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - _, err := nexusoperations.AddChild(s.mutableState.HSM(), "op-1", &historypb.HistoryEvent{ - EventTime: timestamppb.Now(), - Attributes: &historypb.HistoryEvent_NexusOperationScheduledEventAttributes{ - NexusOperationScheduledEventAttributes: &historypb.NexusOperationScheduledEventAttributes{}, - }, - }, []byte("token")) - s.Require().NoError(err) - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("FalseWhenNoPendingTimersAndNoFastForward", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("TrueWhenPendingTimerAndNoFastForward", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.True(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("TrueWhenFastForwardAndNoPendingTimer", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(time.Hour), - }, - FastForwardInfo: &persistencespb.FastForwardInfo{ - TargetTime: timestamppb.New(s.mutableState.Now().Add(time.Hour)), - }, - } - s.True(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("TrueWhenFastForwardAndPendingTimer", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(time.Hour), - }, - FastForwardInfo: &persistencespb.FastForwardInfo{ - TargetTime: timestamppb.New(s.mutableState.Now().Add(time.Hour)), - }, - } - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.True(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("FalseWhenPaused", func() { - s.mutableState.executionState.Status = enumspb.WORKFLOW_EXECUTION_STATUS_PAUSED - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} - s.False(s.mutableState.shouldExecuteTimeSkipping()) - }) - - s.Run("TrueWhenOnlyActivityInRetryBackoff", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - now := s.mutableState.Now() - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, - HasRetryPolicy: true, - Attempt: 2, - ScheduledTime: timestamppb.New(now.Add(time.Hour)), - } - s.True(s.mutableState.shouldExecuteTimeSkipping()) - }) -} - -func (s *mutableStateSuite) TestApplyWorkflowExecutionTimeSkippingTransitionedEvent() { - // Use fixed UTC times so duration arithmetic is exact. - baseTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) - - makeEvent := func(eventTime time.Time, targetTime *time.Time, disabledAfterBound bool) *historypb.HistoryEvent { - attr := &historypb.WorkflowExecutionTimeSkippingTransitionedEventAttributes{ - DisabledAfterFastForward: disabledAfterBound, - } - if targetTime != nil { - attr.TargetTime = timestamppb.New(*targetTime) - } - return &historypb.HistoryEvent{ - EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_TIME_SKIPPING_TRANSITIONED, - EventTime: timestamppb.New(eventTime), - Attributes: &historypb.HistoryEvent_WorkflowExecutionTimeSkippingTransitionedEventAttributes{ - WorkflowExecutionTimeSkippingTransitionedEventAttributes: attr, - }, - } - } - - s.Run("ErrorWhenTimeSkippingInfoNil", func() { - s.mutableState.executionInfo.TimeSkippingInfo = nil - targetTime := baseTime.Add(2 * time.Hour) - err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( - context.Background(), - makeEvent(baseTime, &targetTime, false), - ) - s.Require().Error(err) - }) - - s.Run("AccumulatesDuration", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.mutableState.timeSkippingInfoUpdated = false - targetTime := baseTime.Add(2 * time.Hour) - - err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( - context.Background(), - makeEvent(baseTime, &targetTime, false), - ) - s.Require().NoError(err) - - accumulated := s.mutableState.GetExecutionInfo().TimeSkippingInfo.AccumulatedSkippedDuration - s.Require().Equal(2*time.Hour, accumulated.AsDuration()) - s.Require().True(s.mutableState.timeSkippingInfoUpdated) - }) - - s.Run("AccumulatesDurationAdditively", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - AccumulatedSkippedDuration: durationpb.New(time.Hour), - } - targetTime := baseTime.Add(2 * time.Hour) - - err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( - context.Background(), - makeEvent(baseTime, &targetTime, false), - ) - s.Require().NoError(err) - - accumulated := s.mutableState.GetExecutionInfo().TimeSkippingInfo.AccumulatedSkippedDuration - s.Require().Equal(3*time.Hour, accumulated.AsDuration()) // 1h pre-existing + 2h new - }) - - s.Run("ErrorWhenNilTargetTimeAndNotDisabledAfterBound", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - s.mutableState.timeSkippingInfoUpdated = false - err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( - context.Background(), - makeEvent(baseTime, nil, false), - ) - s.Require().Error(err) - }) - - s.Run("NilTargetTimeWithDisabledAfterBoundDoesNotAccumulateDuration", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - AccumulatedSkippedDuration: durationpb.New(time.Hour), - } - s.mutableState.timeSkippingInfoUpdated = false - err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( - context.Background(), - makeEvent(baseTime, nil, true), - ) - s.Require().NoError(err) - - accumulated := s.mutableState.GetExecutionInfo().TimeSkippingInfo.AccumulatedSkippedDuration - s.Require().Equal(time.Hour, accumulated.AsDuration()) - s.Require().False(s.mutableState.GetExecutionInfo().TimeSkippingInfo.Config.Enabled) - s.Require().True(s.mutableState.timeSkippingInfoUpdated) - }) - - s.Run("DisabledAfterBoundDisablesConfigAndAccumulatesDuration", func() { - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - targetTime := baseTime.Add(2 * time.Hour) - - err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( - context.Background(), - makeEvent(baseTime, &targetTime, true), - ) - s.Require().NoError(err) - - s.Require().False(s.mutableState.GetExecutionInfo().TimeSkippingInfo.Config.Enabled) - accumulated := s.mutableState.GetExecutionInfo().TimeSkippingInfo.AccumulatedSkippedDuration - s.Require().Equal(2*time.Hour, accumulated.AsDuration()) - }) -} - -func (s *mutableStateSuite) TestWrapTimeSourceWithTimeSkipping() { - const skipped = 2 * time.Hour - fixedBase := time.Date(2024, 6, 1, 10, 0, 0, 0, time.UTC) - - // fixedTimeSource returns fixedBase and is used as the base time source for subtests - // that need deterministic virtual-time assertions. - fixedTimeSource := func() *clock.EventTimeSource { - ts := clock.NewEventTimeSource() - ts.Update(fixedBase) - return ts - } - - s.Run("ZeroOffsetWhenTimeSkippingInfoNil", func() { - s.mutableState.timeSource = fixedTimeSource() - s.mutableState.executionInfo.TimeSkippingInfo = nil - - s.mutableState.wrapTimeSourceWithTimeSkipping() - - _, isWrapper := s.mutableState.timeSource.(*clock.TimeSkippingTimeSourceWrapper) - s.True(isWrapper) - // With nil TimeSkippingInfo the wrapper is present but applies a zero offset. - s.Equal(fixedBase, s.mutableState.timeSource.Now()) - }) - - s.Run("OffsetTracksAccumulatedDuration", func() { - s.mutableState.timeSource = fixedTimeSource() - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - AccumulatedSkippedDuration: durationpb.New(skipped), - } - - s.mutableState.wrapTimeSourceWithTimeSkipping() - - _, isWrapper := s.mutableState.timeSource.(*clock.TimeSkippingTimeSourceWrapper) - s.True(isWrapper) - s.Equal(fixedBase.Add(skipped), s.mutableState.timeSource.Now()) - }) - - s.Run("OffsetFollowsLateTimeSkippingInfoAssignment", func() { - // Wrap first with nil TimeSkippingInfo, then assign it — the closure must - // pick up the new accumulated duration without a re-wrap. - s.mutableState.timeSource = fixedTimeSource() - s.mutableState.executionInfo.TimeSkippingInfo = nil - s.mutableState.wrapTimeSourceWithTimeSkipping() - - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - AccumulatedSkippedDuration: durationpb.New(skipped), - } - - s.Equal(fixedBase.Add(skipped), s.mutableState.timeSource.Now()) - }) - - s.Run("IdempotentWhenAlreadyWrapped", func() { - s.mutableState.timeSource = fixedTimeSource() - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - AccumulatedSkippedDuration: durationpb.New(skipped), - } - s.mutableState.wrapTimeSourceWithTimeSkipping() - wrappedOnce := s.mutableState.timeSource - - s.mutableState.wrapTimeSourceWithTimeSkipping() - - s.Equal(wrappedOnce, s.mutableState.timeSource, "second call must not double-wrap") - }) - - s.Run("HBuilderUsesVirtualTime", func() { - s.mutableState.timeSource = fixedTimeSource() - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - AccumulatedSkippedDuration: durationpb.New(skipped), - } - - s.mutableState.wrapTimeSourceWithTimeSkipping() - - event := s.mutableState.hBuilder.AddHistoryEvent( - enumspb.EVENT_TYPE_TIMER_FIRED, - func(e *historypb.HistoryEvent) { - e.Attributes = &historypb.HistoryEvent_TimerFiredEventAttributes{ - TimerFiredEventAttributes: &historypb.TimerFiredEventAttributes{TimerId: "t1"}, - } - }, - ) - s.Equal(fixedBase.Add(skipped), event.GetEventTime().AsTime()) - }) -} - func (s *mutableStateSuite) TestAddTasks_SubtractsSkipFromTimerTasks() { skipped := 30 * time.Minute virtualTime := s.mockShard.GetTimeSource().Now().Add(2 * time.Hour) @@ -7260,6 +6779,13 @@ func (s *mutableStateSuite) TestSetSpeculativeWorkflowTaskTimeoutTask_SubtractsS // SetSpeculativeWorkflowTaskTimeoutTask, and ToRealTime all rely on this; if it panicked or // returned non-zero on a nil chain, every non-time-skipping workflow would break. func (s *mutableStateSuite) TestAccumulatedSkippedDuration_NilSafety() { + s.Run("ExecutionInfoNil", func() { + s.mutableState.executionInfo = nil + var got time.Duration + s.NotPanics(func() { got = s.mutableState.accumulatedSkippedDuration() }) + s.Equal(time.Duration(0), got) + }) + s.Run("TimeSkippingInfoNil", func() { s.mutableState.executionInfo.TimeSkippingInfo = nil var got time.Duration @@ -8205,514 +7731,6 @@ func TestGenerateActivityCancelCommandsForClose(t *testing.T) { } } -// TestApplyFastForward covers the full branch table of applyFastForward: -// MaxElapsedDuration set / nil duration / nil fast-forward / nil config / Enabled=false. -// The first-init virtual-time path is covered separately in -// TestInitTimeSkippingInfo_VirtualTime. -func (s *mutableStateSuite) TestApplyFastForward() { - s.Run("FastForward_WithNoPropagatedTargetTime", func() { - - fastForwardDuration := 3 * time.Hour - eventID := int64(1) - s.mutableState.timeSource = clock.NewEventTimeSource() - baseTime := s.mutableState.timeSource.Now() - - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(fastForwardDuration)}, - AccumulatedSkippedDuration: durationpb.New(time.Hour), - } - s.mutableState.applyFastForward(eventID, nil) - fastForward := s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo() - s.Require().NotNil(fastForward) - s.Equal(eventID, fastForward.GetSourceEventId()) - s.False(fastForward.GetHasReached()) - s.WithinDuration(fastForward.GetTargetTime().AsTime(), baseTime.Add(fastForwardDuration), 1*time.Second) - }) - - s.Run("FastForward_WithPropagatedTargetTime", func() { - fixed := time.Date(2026, 1, 1, 12, 0, 0, 0, time.UTC) - eventID := int64(1) - propagatedTarget := fixed.Add(2 * time.Hour) - s.mutableState.timeSource = clock.NewEventTimeSource().Update(fixed) - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(3 * time.Hour)}, - AccumulatedSkippedDuration: durationpb.New(time.Hour), - } - s.mutableState.applyFastForward(eventID, timestamppb.New(propagatedTarget)) - fastForward := s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo() - s.Require().NotNil(fastForward) - s.Equal(propagatedTarget, fastForward.GetTargetTime().AsTime(), - "propagated target used directly, not recomputed from ff duration") - }) - - s.Run("MaxElapsedDuration_NilDuration_NoOp", func() { - eventID := int64(1) - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{ - Enabled: true}, - } - s.mutableState.applyFastForward(eventID, nil) - s.Nil(s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo()) - }) - - s.Run("FastForward_ClearsExistingInfo", func() { - eventID := int64(1) - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: false}, - FastForwardInfo: &persistencespb.FastForwardInfo{ - TargetTime: timestamppb.New(s.mutableState.Now().Add(time.Hour)), - SourceEventId: 7, - }, - } - s.mutableState.applyFastForward(eventID, nil) - s.Nil(s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo()) - }) - - s.Run("FastForward_OverridesExistingInfo", func() { - eventID := int64(1) - fixed := time.Date(2026, 1, 1, 12, 0, 0, 0, time.UTC) - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true, FastForward: durationpb.New(time.Hour)}, - FastForwardInfo: &persistencespb.FastForwardInfo{ - TargetTime: timestamppb.New(fixed), - SourceEventId: 7, - }, - } - newTarget := s.mutableState.Now().Add(time.Hour) - s.mutableState.applyFastForward(eventID, nil) - s.WithinDuration(s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo().GetTargetTime().AsTime(), newTarget, 1*time.Second) - }) -} - -// TestInitTimeSkippingInfo covers 3 basic scenarios this function is called. -func (s *mutableStateSuite) TestInitTimeSkippingInfo() { - - // if the inputs are nil, the caller doesn't need to call the TSI - // yet we still add this test to ensure the function is safe with an noop implementation - // to call with nil inputs - s.Run("SafeInitWithNil_ForExecutionsWithoutTS", func() { - s.mutableState.timeSource = clock.NewEventTimeSource() - baseTime := s.mutableState.timeSource.Now() - s.NotPanics(func() { - s.mutableState.initTimeSkippingInfo(nil, nil, 0) - }) - s.Nil(s.mutableState.executionInfo.TimeSkippingInfo) - s.Equal(baseTime, s.mutableState.Now()) - }) - - s.Run("InitWithConfigOnly_ForExecutionsWithTSStartedByUser", func() { - eventID := int64(1) - s.mutableState.timeSource = clock.NewEventTimeSource() - baseTime := s.mutableState.timeSource.Now() - cfg := &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(3 * time.Hour)} - - s.mutableState.initTimeSkippingInfo(cfg, nil, eventID) - s.Equal(baseTime, s.mutableState.Now()) - tsi := s.mutableState.executionInfo.GetTimeSkippingInfo() - s.Require().NotNil(tsi) - s.True(proto.Equal(cfg, tsi.GetConfig())) - s.Require().NotNil(tsi.GetFastForwardInfo()) - s.Require().Nil(tsi.GetAccumulatedSkippedDuration()) - // timestamppb translates to UTC time - s.Require().Equal(baseTime.Add(3*time.Hour).UTC(), - tsi.GetFastForwardInfo().GetTargetTime().AsTime()) - }) - - s.Run("InitWithPropagation_ForExecutionsWithTSStartedByPropagation", func() { - s.mutableState.timeSource = clock.NewEventTimeSource() - - // all local time - baseTime := s.mutableState.timeSource.Now() - targetTime := baseTime.Add(3 * time.Hour) - - hasSkipped := 2 * time.Hour - fastForward := 3 * time.Hour - eventID := int64(1) - - cfg := &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(fastForward), - DisableChildPropagation: true, - } - propagation := &commonpb.TimeSkippingStatePropagation{ - InitialSkippedDuration: durationpb.New(hasSkipped), - FastForwardTargetTime: timestamppb.New(targetTime), - } - s.mutableState.initTimeSkippingInfo(cfg, propagation, eventID) - - tsi := s.mutableState.executionInfo.GetTimeSkippingInfo() - s.Require().NotNil(tsi) - s.Equal(baseTime.Add(hasSkipped), s.mutableState.Now()) - s.Equal(hasSkipped, tsi.GetAccumulatedSkippedDuration().AsDuration()) - s.Equal(targetTime.UTC(), - tsi.GetFastForwardInfo().GetTargetTime().AsTime()) - }) -} - -func (s *mutableStateSuite) TestUpdateTimeSkippingInfo() { - - s.Run("UpdateTimeSkippingInfo_UpdateWithNil", func() { - s.mutableState.timeSource = clock.NewEventTimeSource() - baseTime := s.mutableState.timeSource.Now() - currentTSI := &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(time.Hour), - }, - AccumulatedSkippedDuration: durationpb.New(time.Hour), - FastForwardInfo: &persistencespb.FastForwardInfo{ - TargetTime: timestamppb.New(baseTime.Add(time.Hour)), - SourceEventId: 7, - HasReached: false, - }, - } - s.mutableState.executionInfo.TimeSkippingInfo = currentTSI - s.mutableState.timeSkippingInfoUpdated = false - newEventID := int64(8) - s.mutableState.updateTimeSkippingInfo(nil, newEventID) - newTSI := s.mutableState.executionInfo.GetTimeSkippingInfo() - s.Require().NotNil(newTSI) - s.Nil(newTSI.GetConfig()) - s.Nil(newTSI.GetFastForwardInfo()) - s.Equal(currentTSI.GetAccumulatedSkippedDuration(), newTSI.GetAccumulatedSkippedDuration()) - s.True(s.mutableState.timeSkippingInfoUpdated) - }) - - s.Run("UpdateTimeSkippingInfo_EnableTS", func() { - s.mutableState.timeSource = clock.NewEventTimeSource() - baseTime := s.mutableState.timeSource.Now() - currentTSI := persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{ - Enabled: false, - }, - AccumulatedSkippedDuration: durationpb.New(time.Hour), - } - s.mutableState.executionInfo.TimeSkippingInfo = ¤tTSI - - // new config - newConfig := &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(2 * time.Hour), - DisableChildPropagation: true, - } - newEventID := int64(8) - s.mutableState.updateTimeSkippingInfo(newConfig, newEventID) - newTSI := s.mutableState.executionInfo.GetTimeSkippingInfo() - - s.Require().NotNil(newTSI) - s.True(proto.Equal(newConfig, newTSI.GetConfig())) - s.Require().NotNil(newTSI.GetFastForwardInfo()) - s.Equal(newEventID, newTSI.GetFastForwardInfo().GetSourceEventId()) - s.Equal(baseTime.Add(2*time.Hour).UTC(), newTSI.GetFastForwardInfo().GetTargetTime().AsTime()) - s.False(newTSI.GetFastForwardInfo().GetHasReached()) - s.Equal(time.Hour, newTSI.GetAccumulatedSkippedDuration().AsDuration()) - }) - - s.Run("UpdateTimeSkippingInfo_OverrideFFThenTurnOff", func() { - s.mutableState.timeSource = clock.NewEventTimeSource() - baseTime := s.mutableState.timeSource.Now() - currentTSI := persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(time.Hour), - }, - AccumulatedSkippedDuration: durationpb.New(time.Hour), - FastForwardInfo: &persistencespb.FastForwardInfo{ - TargetTime: timestamppb.New(baseTime.Add(time.Hour)), - SourceEventId: 7, - HasReached: false, - }, - } - s.mutableState.executionInfo.TimeSkippingInfo = ¤tTSI - - // update with new config with a new FF - tsc2 := &commonpb.TimeSkippingConfig{ - Enabled: true, - FastForward: durationpb.New(2 * time.Hour), - } - newEventID := int64(8) - s.mutableState.updateTimeSkippingInfo(tsc2, newEventID) - tsc2TSI := s.mutableState.executionInfo.GetTimeSkippingInfo() - - s.Require().NotNil(tsc2TSI) - s.True(proto.Equal(tsc2, tsc2TSI.GetConfig())) - s.Require().NotNil(tsc2TSI.GetFastForwardInfo()) - s.Equal(newEventID, tsc2TSI.GetFastForwardInfo().GetSourceEventId()) - s.Equal(baseTime.Add(2*time.Hour).UTC(), tsc2TSI.GetFastForwardInfo().GetTargetTime().AsTime()) - s.False(tsc2TSI.GetFastForwardInfo().GetHasReached()) - s.Equal(time.Hour, tsc2TSI.GetAccumulatedSkippedDuration().AsDuration()) - - // disable the time skipping - tsc3 := &commonpb.TimeSkippingConfig{ - Enabled: false, - } - s.mutableState.updateTimeSkippingInfo(tsc3, newEventID) - tsc3TSI := s.mutableState.executionInfo.GetTimeSkippingInfo() - s.Require().NotNil(tsc3TSI) - s.True(proto.Equal(tsc3, tsc3TSI.GetConfig())) - s.Nil(tsc3TSI.GetFastForwardInfo()) - s.Equal(time.Hour, tsc3TSI.GetAccumulatedSkippedDuration().AsDuration()) - - }) - -} - -// TestCalculateTimeSkippingTransition exercises the full candidate-selection -// branch table. -func (s *mutableStateSuite) TestCalculateTimeSkippingTransition() { - baseTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) - - addTimer := func(id string, expiry time.Time) { - s.mutableState.pendingTimerInfoIDs[id] = &persistencespb.TimerInfo{ - TimerId: id, - ExpiryTime: timestamppb.New(expiry), - } - } - - // resetMS gives each subtest a fresh, deterministic MS. - resetMS := func() { - ts := clock.NewEventTimeSource().Update(baseTime) - s.mutableState.timeSource = ts - s.mutableState.pendingTimerInfoIDs = make(map[string]*persistencespb.TimerInfo) - s.mutableState.pendingActivityInfoIDs = make(map[int64]*persistencespb.ActivityInfo) - s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ - Config: &commonpb.TimeSkippingConfig{Enabled: true}, - } - // Default: workflow has had a workflow task, so the backoff branch is skipped. - s.mutableState.executionInfo.StartTime = timestamppb.New(baseTime) - s.mutableState.executionInfo.ExecutionTime = timestamppb.New(baseTime) - s.mutableState.executionInfo.LastCompletedWorkflowTaskStartedEventId = 1 - } - - s.Run("NoCandidates_InvalidTransition", func() { - resetMS() - t, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.False(t.isValid()) - }) - - s.Run("OneUserTimer_TargetIsTimer", func() { - resetMS() - t1 := baseTime.Add(2 * time.Hour) - addTimer("t1", t1) - - t, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(t1, t.targetTime) - s.False(t.disabledAfterFastForward) - }) - - s.Run("TwoUserTimers_TargetIsEarliest", func() { - resetMS() - t1 := baseTime.Add(2 * time.Hour) - t2 := baseTime.Add(3 * time.Hour) - addTimer("t1", t1) - addTimer("t2", t2) - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(t1, tr.targetTime) - s.False(tr.disabledAfterFastForward) - }) - - s.Run("UserTimer_PlusEarlierFastForward_TargetIsFastForward", func() { - resetMS() - t1 := baseTime.Add(3 * time.Hour) - fastForwardTarget := baseTime.Add(time.Hour) - addTimer("t1", t1) - s.mutableState.executionInfo.TimeSkippingInfo.Config.FastForward = durationpb.New(time.Hour) - s.mutableState.executionInfo.TimeSkippingInfo.FastForwardInfo = - &persistencespb.FastForwardInfo{TargetTime: timestamppb.New(fastForwardTarget)} - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(fastForwardTarget, tr.targetTime) - s.True(tr.disabledAfterFastForward) - }) - - s.Run("ZeroFastForward_NilTarget_NoErrorNoCandidate", func() { - resetMS() - // A zero max_elapsed_duration is treated as no fast-forward: it contributes no candidate - // and must NOT trip the nil-fast-forward corruption check. - s.mutableState.executionInfo.TimeSkippingInfo.Config.FastForward = durationpb.New(0) - // FastForward persistence deliberately not set. - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.False(tr.isValid()) - }) - - s.Run("Backoff_NotChildAndExecutionTimeFuture_IsCandidate", func() { - resetMS() - // !HadOrHasWorkflowTask: clear the last completed WT. - s.mutableState.executionInfo.LastCompletedWorkflowTaskStartedEventId = common.EmptyEventID - s.mutableState.executionInfo.WorkflowTaskScheduledEventId = common.EmptyEventID - // ExecutionTime > StartTime: a real backoff configured. - execTime := baseTime.Add(time.Hour) - s.mutableState.executionInfo.ExecutionTime = timestamppb.New(execTime) - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(execTime, tr.targetTime) - s.False(tr.disabledAfterFastForward) - }) - - s.Run("Backoff_ChildWFCase_NotCandidate", func() { - resetMS() - s.mutableState.executionInfo.LastCompletedWorkflowTaskStartedEventId = common.EmptyEventID - s.mutableState.executionInfo.WorkflowTaskScheduledEventId = common.EmptyEventID - // ExecutionTime == StartTime: child WF "no first WT scheduled yet" case. - s.mutableState.executionInfo.ExecutionTime = timestamppb.New(baseTime) - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.False(tr.isValid(), - "child WF without backoff and no other candidate must yield invalid transition") - }) - - s.Run("Backoff_CaNWithInheritedSkip_PastExecutionTime_NotCandidate", func() { - resetMS() - // Inherited accumulated such that Now() > ExecutionTime: the backoff candidate - // is in the past relative to virtual now and must not be picked. - s.mutableState.executionInfo.LastCompletedWorkflowTaskStartedEventId = common.EmptyEventID - s.mutableState.executionInfo.WorkflowTaskScheduledEventId = common.EmptyEventID - execTime := baseTime.Add(10 * time.Minute) - s.mutableState.executionInfo.ExecutionTime = timestamppb.New(execTime) - // Wrap the time source so virtual Now is baseTime + accum (= baseTime + 1h). - s.mutableState.timeSource = clock.WrapTimeSourceWithTimeSkipping( - clock.NewEventTimeSource().Update(baseTime), - func() time.Duration { return time.Hour }, - ) - s.mutableState.executionInfo.TimeSkippingInfo.AccumulatedSkippedDuration = durationpb.New(time.Hour) - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.False(tr.isValid(), - "backoff in the virtual past must not produce a transition candidate") - }) - - s.Run("ActivityInRetryBackoff_IsCandidate", func() { - resetMS() - schedTime := baseTime.Add(30 * time.Minute) - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, - HasRetryPolicy: true, - Attempt: 2, - ScheduledTime: timestamppb.New(schedTime), - } - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(schedTime, tr.targetTime) - s.False(tr.disabledAfterFastForward) - }) - - s.Run("TwoActivitiesInBackoff_TargetIsEarliest", func() { - resetMS() - early := baseTime.Add(30 * time.Minute) - late := baseTime.Add(2 * time.Hour) - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, HasRetryPolicy: true, Attempt: 2, - ScheduledTime: timestamppb.New(late), - } - s.mutableState.pendingActivityInfoIDs[2] = &persistencespb.ActivityInfo{ - ScheduledEventId: 2, HasRetryPolicy: true, Attempt: 2, - ScheduledTime: timestamppb.New(early), - } - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(early, tr.targetTime) - }) - - s.Run("ActivityBackoff_PlusEarlierTimer_TargetIsTimer", func() { - resetMS() - schedTime := baseTime.Add(2 * time.Hour) - timerTime := baseTime.Add(time.Hour) - addTimer("t1", timerTime) - s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ - ScheduledEventId: 1, HasRetryPolicy: true, Attempt: 2, - ScheduledTime: timestamppb.New(schedTime), - } - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(timerTime, tr.targetTime) - }) - - // Universal cap: skip target must not exceed the run/execution timeout. - // MaxElapsedDuration's value is irrelevant to calculateTimeSkippingTransition; - // only FastForward.TargetTime is read. We use a large dummy - // duration solely to configure the fast-forward. - const largeFastForward = 24 * time.Hour - setFastForwardAt := func(target time.Time) { - s.mutableState.executionInfo.TimeSkippingInfo.Config.FastForward = durationpb.New(largeFastForward) - s.mutableState.executionInfo.TimeSkippingInfo.FastForwardInfo = - &persistencespb.FastForwardInfo{TargetTime: timestamppb.New(target)} - } - - s.Run("FastForward_LargerThanRunTimeout_CappedAtRunTimeout", func() { - resetMS() - runExpiry := baseTime.Add(30 * time.Minute) - fastForwardTarget := baseTime.Add(2 * time.Hour) // fast-forward > run timeout - setFastForwardAt(fastForwardTarget) - s.mutableState.executionInfo.WorkflowRunExpirationTime = timestamppb.New(runExpiry) - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(runExpiry, tr.targetTime, "skip must be capped at run timeout") - s.False(tr.disabledAfterFastForward, "cap fires before fast-forward; fast-forward must not be marked reached") - }) - - s.Run("FastForward_SmallerThanRunTimeout_NoCap_TargetIsFastForward", func() { - resetMS() - fastForwardTarget := baseTime.Add(30 * time.Minute) - runExpiry := baseTime.Add(2 * time.Hour) // run timeout > fast-forward, no cap needed - setFastForwardAt(fastForwardTarget) - s.mutableState.executionInfo.WorkflowRunExpirationTime = timestamppb.New(runExpiry) - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(fastForwardTarget, tr.targetTime, "fast-forward is minimum; no cap applies") - s.True(tr.disabledAfterFastForward, "fast-forward fires before run timeout; fast-forward must be marked reached") - }) - - s.Run("FastForward_LargerThanExecTimeout_NoRunTimeout_CappedAtExecTimeout", func() { - resetMS() - execExpiry := baseTime.Add(30 * time.Minute) - fastForwardTarget := baseTime.Add(2 * time.Hour) // fast-forward > execution timeout - setFastForwardAt(fastForwardTarget) - // No WorkflowRunExpirationTime; only execution timeout. - s.mutableState.executionInfo.WorkflowExecutionExpirationTime = timestamppb.New(execExpiry) - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(execExpiry, tr.targetTime, "skip must be capped at execution timeout") - s.False(tr.disabledAfterFastForward, "cap fires before fast-forward; fast-forward must not be marked reached") - }) - - s.Run("FastForward_ZeroRunTimeout_TreatedAsNoTimeout_NoCap", func() { - // A zero-value timestamp means "no timeout configured". The cap must not - // fire: the fast-forward target should be the skip destination unchanged. - resetMS() - fastForwardTarget := baseTime.Add(time.Hour) - setFastForwardAt(fastForwardTarget) - s.mutableState.executionInfo.WorkflowRunExpirationTime = timestamppb.New(time.Time{}) - s.mutableState.executionInfo.WorkflowExecutionExpirationTime = timestamppb.New(time.Time{}) - - tr, err := s.mutableState.calculateTimeSkippingTransition() - s.Require().NoError(err) - s.Equal(fastForwardTarget, tr.targetTime, "zero timeout must not cap the skip") - s.True(tr.disabledAfterFastForward) - }) -} - // TestToRealTime tests ms.ToRealTime() exhaustively as this function is also used by executions that don't // use time skipping and need to be tested thoroughly. This function converts virtual time to wall-clock time. func (s *mutableStateSuite) TestToRealTime() { diff --git a/service/history/workflow/task_generator.go b/service/history/workflow/task_generator.go index f30c96a7c0..dd96433dc6 100644 --- a/service/history/workflow/task_generator.go +++ b/service/history/workflow/task_generator.go @@ -1032,17 +1032,17 @@ func isPathAffectedByDelete(deletePath []hsm.Key, timerPath []*persistencespb.St return true } -// RegenerateTimerTasksForTimeSkipping regenerates the timer tasks for time skipping. -// This function is not idempotent, but when called twice, logically the timerTasks regenerated will have the same contents, -// and the only difference is the TaskID. -// TODO@time-skipping: currently not safe to call in replication context +// RegenerateTimerTasksForTimeSkipping force re-stamps every pending timer task against the +// current accumulated skip. +// +// It needs no per-task dedup status of its own. Callers gate it on whether a skip actually +// happened: the active close transaction only invokes it when a skip transition was emitted +// this transaction (regenerateTimerTasksForTimeSkipping), and PartialRefresh only invokes it +// when TimeSkippingInfo.LastUpdateVersionedTransition falls within the replicated delta (see +// refreshTasksForTimeSkipping). func (r *TaskGeneratorImpl) RegenerateTimerTasksForTimeSkipping() error { - if r.mutableState.GetExecutionInfo().TimeSkippingInfo == nil { - return nil - } - accumulatedSkippedDuration := r.mutableState.GetExecutionInfo().TimeSkippingInfo.AccumulatedSkippedDuration.AsDuration() - if accumulatedSkippedDuration <= 0 { + if accumulatedSkippedDuration(r.mutableState.GetExecutionInfo()) <= 0 { return nil } diff --git a/service/history/workflow/task_generator_test.go b/service/history/workflow/task_generator_test.go index ea1152eec3..b3bc76ff54 100644 --- a/service/history/workflow/task_generator_test.go +++ b/service/history/workflow/task_generator_test.go @@ -1245,6 +1245,47 @@ func TestTaskGeneratorImpl_RegenerateTimerTasksForTimeSkipping(t *testing.T) { require.Equal(t, timer2ExpiryTime, byEventID[2].VisibilityTimestamp) } +// TestTaskGeneratorImpl_RegenerateTimerTasksForTimeSkipping_ForceRegenerates asserts the +// contract: regen carries no per-task status of its own and force re-stamps on every call +// (callers gate it — the active boolean and PartialRefresh's LastUpdateVersionedTransition +// check). So a second back-to-back call emits the same tasks again, not a no-op. Content is +// identical; only the shard-assigned TaskID would differ. +func TestTaskGeneratorImpl_RegenerateTimerTasksForTimeSkipping_ForceRegenerates(t *testing.T) { + t.Parallel() + + tsi := &persistencespb.TimeSkippingInfo{ + AccumulatedSkippedDuration: durationpb.New(time.Hour), + } + + ctrl := gomock.NewController(t) + mutableState := historyi.NewMockMutableState(ctrl) + mutableState.EXPECT().GetExecutionInfo().Return(&persistencespb.WorkflowExecutionInfo{ + TimeSkippingInfo: tsi, + }).AnyTimes() + mutableState.EXPECT().GetPendingTimerInfos().Return(map[string]*persistencespb.TimerInfo{ + "timer-1": {StartedEventId: 1, ExpiryTime: timestamppb.New(time.Now().Add(time.Hour))}, + }).AnyTimes() + mutableState.EXPECT().GetWorkflowKey().Return(tests.WorkflowKey).AnyTimes() + mutableState.EXPECT().HadOrHasWorkflowTask().Return(true).AnyTimes() + mutableState.EXPECT().GetPendingActivityInfos().Return(map[int64]*persistencespb.ActivityInfo{}).AnyTimes() + + emitCount := 0 + mutableState.EXPECT().AddTasks(gomock.Any()).Do(func(ts ...tasks.Task) { + emitCount += len(ts) + }).AnyTimes() + + taskGenerator := NewTaskGenerator(nil, mutableState, &configs.Config{}, nil, log.NewTestLogger()) + + // First call emits. + require.NoError(t, taskGenerator.RegenerateTimerTasksForTimeSkipping()) + require.Positive(t, emitCount, "first call must emit at least one task") + + // Second call force-regenerates: emits again (no status latch suppresses it). + emitsAfterFirst := emitCount + require.NoError(t, taskGenerator.RegenerateTimerTasksForTimeSkipping()) + require.Equal(t, 2*emitsAfterFirst, emitCount, "second call must re-emit the same tasks") +} + func TestTaskGeneratorImpl_RegenerateTimerTasksForTimeSkipping_EdgeCases(t *testing.T) { t.Parallel() diff --git a/service/history/workflow/task_refresher.go b/service/history/workflow/task_refresher.go index 078a695077..2c64d9f58d 100644 --- a/service/history/workflow/task_refresher.go +++ b/service/history/workflow/task_refresher.go @@ -198,9 +198,17 @@ func (r *TaskRefresherImpl) PartialRefresh( return err } - return r.refreshTasksForSubStateMachines( + if err := r.refreshTasksForSubStateMachines( mutableState, minVersionedTransition, + ); err != nil { + return err + } + + return r.refreshTasksForTimeSkipping( + mutableState, + taskGenerator, + minVersionedTransition, ) } @@ -439,8 +447,6 @@ func (r *TaskRefresherImpl) refreshTasksForTimer( return nil } - // if mutableState.ExecutionInfo.TimeSkippingInfo changed, - // we need to pendingTimerInfos := mutableState.GetPendingTimerInfos() for _, timerInfo := range pendingTimerInfos { @@ -465,6 +471,37 @@ func (r *TaskRefresherImpl) refreshTasksForTimer( return err } +// refreshTasksForTimeSkipping re-stamps pending timer tasks against the current accumulated +// skip when a time-skipping transition happened within the replicated delta. A skip mutates +// only executionInfo.TimeSkippingInfo, never a per-timer entity, so refreshTasksForTimer's +// per-timer versioned-transition gate cannot see it. We gate instead on TimeSkippingInfo's own +// LastUpdateVersionedTransition: if it advanced at or after minVersionedTransition, the skip is +// new to this peer and every pending timer task must be regenerated with the shifted wall-clock +// VisibilityTimestamp. +func (r *TaskRefresherImpl) refreshTasksForTimeSkipping( + mutableState historyi.MutableState, + taskGenerator TaskGenerator, + minVersionedTransition *persistencespb.VersionedTransition, +) error { + executionState := mutableState.GetExecutionState() + if executionState.Status != enumspb.WORKFLOW_EXECUTION_STATUS_RUNNING { + return nil + } + + tsi := mutableState.GetExecutionInfo().GetTimeSkippingInfo() + if tsi == nil { + return nil + } + if transitionhistory.Compare( + tsi.GetLastUpdateVersionedTransition(), + minVersionedTransition, + ) < 0 { + return nil + } + + return taskGenerator.RegenerateTimerTasksForTimeSkipping() +} + func (r *TaskRefresherImpl) refreshTasksForChildWorkflow( mutableState historyi.MutableState, taskGenerator TaskGenerator, diff --git a/service/history/workflow/task_refresher_test.go b/service/history/workflow/task_refresher_test.go index 11bdc928c7..f3bab190f0 100644 --- a/service/history/workflow/task_refresher_test.go +++ b/service/history/workflow/task_refresher_test.go @@ -1434,6 +1434,86 @@ func (s *taskRefresherSuite) TestRefreshSubStateMachineTasks() { s.False(hsmRoot.Dirty()) } +// TestRefreshTasksForTimeSkipping gates the standby's time-skipping timer re-stamp on +// TimeSkippingInfo.LastUpdateVersionedTransition. It covers both invariants in one place: +// regen runs iff a skip happened at/after the watermark (don't miss), and is bounded — +// it does NOT run when the skip predates the delta or there's no TimeSkippingInfo (don't +// re-stamp on every unrelated replication delta). +func (s *taskRefresherSuite) TestRefreshTasksForTimeSkipping() { + tsiAt := func(tc int64) *persistencespb.TimeSkippingInfo { + return &persistencespb.TimeSkippingInfo{ + AccumulatedSkippedDuration: durationpb.New(time.Hour), + LastUpdateVersionedTransition: &persistencespb.VersionedTransition{ + NamespaceFailoverVersion: common.EmptyVersion, + TransitionCount: tc, + }, + } + } + + for _, tc := range []struct { + name string + tsi *persistencespb.TimeSkippingInfo + minVersionedTransition *persistencespb.VersionedTransition + wantRegen bool + }{ + { + // Skip at transition 5, watermark 3 → new to this peer → re-stamp. + name: "SkipWithinDelta/Regenerates", + tsi: tsiAt(5), + minVersionedTransition: &persistencespb.VersionedTransition{NamespaceFailoverVersion: common.EmptyVersion, TransitionCount: 3}, + wantRegen: true, + }, + { + // Skip at transition 2, watermark 4 → predates the delta → bounded, no re-stamp. + name: "SkipBeforeDelta/DoesNotRegenerate", + tsi: tsiAt(2), + minVersionedTransition: &persistencespb.VersionedTransition{NamespaceFailoverVersion: common.EmptyVersion, TransitionCount: 4}, + wantRegen: false, + }, + { + // Workflow never enabled time-skipping → never re-stamp. + name: "NoTimeSkippingInfo/DoesNotRegenerate", + tsi: nil, + minVersionedTransition: &persistencespb.VersionedTransition{NamespaceFailoverVersion: common.EmptyVersion, TransitionCount: 4}, + wantRegen: false, + }, + } { + s.Run(tc.name, func() { + mutableState, err := NewMutableStateFromDB( + s.mockShard, + s.mockShard.GetEventsCache(), + log.NewTestLogger(), + tests.LocalNamespaceEntry, + &persistencespb.WorkflowMutableState{ + ExecutionInfo: &persistencespb.WorkflowExecutionInfo{ + NamespaceId: tests.NamespaceID.String(), + WorkflowId: tests.WorkflowID, + TimeSkippingInfo: tc.tsi, + }, + ExecutionState: &persistencespb.WorkflowExecutionState{ + RunId: tests.RunID, + State: enumsspb.WORKFLOW_EXECUTION_STATE_RUNNING, + Status: enumspb.WORKFLOW_EXECUTION_STATUS_RUNNING, + }, + NextEventId: int64(20), + }, + 10, + ) + s.NoError(err) + + // Times asserts the bound directly: 0 means the mock fails if regen is called. + times := 0 + if tc.wantRegen { + times = 1 + } + s.mockTaskGenerator.EXPECT().RegenerateTimerTasksForTimeSkipping().Return(nil).Times(times) + + err = s.taskRefresher.refreshTasksForTimeSkipping(mutableState, s.mockTaskGenerator, tc.minVersionedTransition) + s.NoError(err) + }) + } +} + type mockTaskGeneratorProvider struct { mockTaskGenerator *MockTaskGenerator } diff --git a/service/history/workflow/timeskipping.go b/service/history/workflow/timeskipping.go index a99248fd01..1a5ada09b3 100644 --- a/service/history/workflow/timeskipping.go +++ b/service/history/workflow/timeskipping.go @@ -1,12 +1,22 @@ package workflow import ( + "context" + "fmt" "time" commonpb "go.temporal.io/api/common/v1" + "go.temporal.io/api/serviceerror" persistencespb "go.temporal.io/server/api/persistence/v1" "go.temporal.io/server/common" + "go.temporal.io/server/common/clock" + "go.temporal.io/server/common/log/tag" + "go.temporal.io/server/common/metrics" + "go.temporal.io/server/components/nexusoperations" + historyi "go.temporal.io/server/service/history/interfaces" + "go.temporal.io/server/service/history/tasks" "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/timestamppb" ) // propagateTimeSkippingToNextRun propagates both time skipping config and state to the next run in @@ -60,3 +70,328 @@ func propagateTimeSkippingToChild( func accumulatedSkippedDuration(source *persistencespb.WorkflowExecutionInfo) time.Duration { return source.GetTimeSkippingInfo().GetAccumulatedSkippedDuration().AsDuration() } + +func (ms *MutableStateImpl) initTimeSkippingInfo( + config *commonpb.TimeSkippingConfig, + timeSkippingStatePropagation *commonpb.TimeSkippingStatePropagation, + currentEventID int64, +) { + // we only need to init time skipping info if + // either config is not nil or it has initial skip + initialSkip := timeSkippingStatePropagation.GetInitialSkippedDuration() + if config == nil && initialSkip == nil { + return + } + ms.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: config, + AccumulatedSkippedDuration: initialSkip, + } + ms.wrapTimeSourceWithTimeSkipping() + ms.shiftWorkflowTimes(initialSkip) + ms.applyFastForward(currentEventID, timeSkippingStatePropagation.GetFastForwardTargetTime()) + ms.timeSkippingInfoUpdated = true +} + +// updateTimeSkippingInfo updates the time skipping info with +// with new config and the event ID that updates the config +// we allow updating the config to nil when users want to remove the TSC +func (ms *MutableStateImpl) updateTimeSkippingInfo( + config *commonpb.TimeSkippingConfig, + currentEventID int64, +) { + ms.executionInfo.TimeSkippingInfo.Config = config + // Options update: the new ff duration is a fresh budget measured from now. + ms.applyFastForward(currentEventID, nil) + ms.timeSkippingInfoUpdated = true +} + +// applyFastForward (re)computes the FastForwardInfo using the new TimeSkippingConfig (TSC) and propagated time-skippingstates. +// This method should be called whenever the TimeSkippingConfig is initialized or updated. +// An invariant of the FastForwardInfo is that after this method is called, if the current TSC has a FastForward value, +// the FastForwardInfo should never be nil. +func (ms *MutableStateImpl) applyFastForward(currentEventID int64, propagatedTargetTime *timestamppb.Timestamp) { + + tsc := ms.GetExecutionInfo().GetTimeSkippingInfo().GetConfig() + tsi := ms.executionInfo.TimeSkippingInfo + + // clear fast forward if disabled or zero max_elapsed_duration + if !tsc.GetEnabled() || tsc.GetFastForward().AsDuration() <= 0 { + if tsi.FastForwardInfo != nil { + tsi.FastForwardInfo = nil + } + return + } + + var targetTime time.Time + if propagatedTargetTime != nil { + targetTime = propagatedTargetTime.AsTime() + } else { + // if there is no propagated target time, + // fast-forward refers to a new duration from now. + targetTime = ms.Now().Add(tsc.GetFastForward().AsDuration()) + } + + // always install a fresh fast-forward bound + tsi.FastForwardInfo = &persistencespb.FastForwardInfo{ + TargetTime: timestamppb.New(targetTime), + SourceEventId: currentEventID, + HasReached: false, + } + ms.AddTasks(&tasks.TimeSkippingTimerTask{ + WorkflowKey: ms.GetWorkflowKey(), + VisibilityTimestamp: targetTime, + EventID: currentEventID, + }) +} + +// wrapTimeSourceWithTimeSkipping wraps ms.timeSource (and the hBuilder's copy) with a time-skipping +// wrapper. The closure captures ms so the offset tracks ms.executionInfo.TimeSkippingInfo as it +// evolves — no need to re-wrap when TimeSkippingInfo is created or replaced. Called once per MS +// lifetime from the constructors; the type-assertion guard makes any repeat call a no-op. +func (ms *MutableStateImpl) wrapTimeSourceWithTimeSkipping() { + if _, ok := ms.timeSource.(*clock.TimeSkippingTimeSourceWrapper); ok { + return + } + ms.timeSource = clock.WrapTimeSourceWithTimeSkipping( + ms.timeSource, ms.accumulatedSkippedDuration) + ms.hBuilder.SetTimeSource(ms.timeSource) +} + +func (ms *MutableStateImpl) hasInflightWorkToPreventTimeSkipping() (bool, string) { + // HasPendingWorkflowTask covers both normal and speculative workflow tasks + if ms.HasPendingWorkflowTask() { + return true, "has pending workflow task" + } + // A pending activity blocks time skipping unless it has failed and is still + // waiting out its retry backoff (next attempt strictly in the future) — that one + // is a skip target, not in-flight work (see calculateTimeSkippingTransition). The + // strict future check is what keeps a just-scheduled or already-due activity (next + // attempt <= now) blocking. + for _, ai := range ms.GetPendingActivityInfos() { + // if this activity is just a retry with backoff scheduled in the future + if activityPendingRetry(ai) && ms.Now().Before(ai.GetScheduledTime().AsTime()) { + continue + } + return true, "has pending activity" + } + if nexusoperations.MachineCollection(ms.HSM()).Size() > 0 { + return true, "has pending nexus operations" + } + if len(ms.GetPendingChildExecutionInfos()) > 0 { + return true, "has pending child execution" + } + if len(ms.GetPendingSignalExternalInfos()) > 0 { + return true, "has pending signal external" + } + if len(ms.GetPendingRequestCancelExternalInfos()) > 0 { + return true, "has pending request cancel external" + } + return false, "" +} + +// ShouldExecuteTimeSkipping checks if one mutable state should execute time skipping, +// i.e. there is no in-flight work and there is a time point to skip to. +func (ms *MutableStateImpl) shouldExecuteTimeSkipping() (bool, *timeSkippingTransition) { + // configuration check + tsi := ms.GetExecutionInfo().GetTimeSkippingInfo() + if tsi == nil { + return false, nil + } + config := tsi.GetConfig() + if config == nil || !config.Enabled { + return false, nil + } + + // runtime check + noSkippingReason := "" + defer func() { + if noSkippingReason != "" { + ms.logger.Debug(fmt.Sprintf("time skipping skipped for: %s", noSkippingReason), + tag.WorkflowID(ms.GetExecutionInfo().WorkflowId), + tag.WorkflowRunID(ms.GetExecutionState().RunId), + ) + } + }() + if !ms.IsWorkflowExecutionRunning() { + noSkippingReason = "workflow is not running" + return false, nil + } + if ms.IsWorkflowExecutionStatusPaused() { + noSkippingReason = "workflow is paused" + return false, nil + } + if hasPendingWork, detailedReason := ms.hasInflightWorkToPreventTimeSkipping(); hasPendingWork { + noSkippingReason = fmt.Sprintf("pending work: %s", detailedReason) + return false, nil + } + + // Compute the transition early so we can short-circuit before allocating an event. + // todo(@time-skipping): replace error with nil + transition, err := ms.calculateTimeSkippingTransition() + if err != nil { + noSkippingReason = fmt.Sprintf("error calculating time skipping decision: %v", err) + ms.logger.Error( + "error calculating time skipping decision, and ignore this error and continue", + tag.WorkflowID(ms.GetExecutionInfo().WorkflowId), + tag.WorkflowRunID(ms.GetExecutionState().RunId), + tag.Error(err), + ) + return false, nil + } + if !transition.isValid() { + noSkippingReason = "time skipping has no candidate target time nor disabled after fast-forward flag" + return false, nil + } + return true, &transition +} + +type timeSkippingTransition struct { + targetTime time.Time + disabledAfterFastForward bool +} + +func (d timeSkippingTransition) isValid() bool { + return !d.targetTime.IsZero() || d.disabledAfterFastForward +} + +// calculateTimeSkippingTransition determines the next skip target. +// Candidates (in collection order): pending user timers, activity retry backoffs, +// workflow start-with-delay/CaN/retry backoff, and the fast-forward. +// The run/execution timeout is NOT a standalone candidate — it only applies as +// a cap: if any candidate wins, the skip target is clamped to min(target, +// runExpiry, execExpiry). This ensures we never advance virtual time past the +// workflow timeout, even when a user timer or fast-forward would otherwise overshoot. +func (ms *MutableStateImpl) calculateTimeSkippingTransition() (timeSkippingTransition, error) { + var transition timeSkippingTransition + advance := func(candidate time.Time, dueToFastForward bool) { + if transition.targetTime.IsZero() || candidate.Before(transition.targetTime) { + transition.targetTime = candidate + transition.disabledAfterFastForward = dueToFastForward + } + } + + for _, timerInfo := range ms.GetPendingTimerInfos() { + advance(timerInfo.ExpiryTime.AsTime(), false) + } + + // Activities waiting out a retry backoff are skip targets: advance to the earliest + // next-attempt time. No clock comparison is needed here — the idle check already + // guarantees each pending activity's next attempt is in the future when we get here. + for _, ai := range ms.GetPendingActivityInfos() { + if activityPendingRetry(ai) && ms.Now().Before(ai.GetScheduledTime().AsTime()) { + advance(ai.ScheduledTime.AsTime(), false) + } + } + + if !ms.HadOrHasWorkflowTask() { + // Support start-with-delay, cron, retry, and CaN-with-backoff: the workflow is + // waiting on a WorkflowBackoffTimerTask. Two extra checks are needed: + // - ExecutionTime > StartTime: a backoff is actually configured (FirstWorkflowTaskBackoff > 0). + // For child workflows, !HadOrHasWorkflowTask is also true between "start event applied" + // and "ScheduleWorkflowTask API call" but no backoff exists, so ExecutionTime == StartTime. + // - ExecutionTime > ms.Now(): the candidate is in the (virtual) future. Defends against + // CaN-with-backoff that inherits accumulated > backoff — past candidates would produce + // a negative delta in ApplyWorkflowExecutionTimeSkippingTransitionedEvent and decrement accumulated. + executionTime := ms.executionInfo.GetExecutionTime().AsTime() + startTime := ms.executionInfo.GetStartTime().AsTime() + if executionTime.After(startTime) && executionTime.After(ms.Now()) { + advance(executionTime, false) + } + } + + tsi := ms.GetExecutionInfo().GetTimeSkippingInfo() + if !tsi.GetFastForwardInfo().GetHasReached() && tsi.GetFastForwardInfo().GetTargetTime() != nil { + advance(tsi.GetFastForwardInfo().GetTargetTime().AsTime(), true) + } + + // Cap any skip target at the run/execution timeout: never advance virtual time past + // them. Timeouts alone do not create a skip target — only existing candidates + // (timers, backoffs, fast-forward) do. This also handles the case where a user + // timer fires past the workflow timeout: we cap the skip so the timeout fires on schedule. + if !transition.targetTime.IsZero() { + if t := ms.executionInfo.GetWorkflowRunExpirationTime(); t != nil && !t.AsTime().IsZero() { + advance(t.AsTime(), false) + } + if t := ms.executionInfo.GetWorkflowExecutionExpirationTime(); t != nil && !t.AsTime().IsZero() { + advance(t.AsTime(), false) + } + } + return transition, nil +} + +func (ms *MutableStateImpl) shiftWorkflowTimes(initialSkippedDuration *durationpb.Duration) { + if initialSkippedDuration == nil || initialSkippedDuration.AsDuration() == 0 { + return + } + accum := initialSkippedDuration.AsDuration() + if !timeNotSet(ms.executionState.StartTime) { + ms.executionState.StartTime = timestamppb.New(ms.executionState.StartTime.AsTime().Add(accum)) + } + if !timeNotSet(ms.executionInfo.StartTime) { + ms.executionInfo.StartTime = timestamppb.New(ms.executionInfo.StartTime.AsTime().Add(accum)) + } + if !timeNotSet(ms.executionInfo.ExecutionTime) { + ms.executionInfo.ExecutionTime = timestamppb.New(ms.executionInfo.ExecutionTime.AsTime().Add(accum)) + } + if !timeNotSet(ms.executionInfo.WorkflowRunExpirationTime) { + ms.executionInfo.WorkflowRunExpirationTime = timestamppb.New(ms.executionInfo.WorkflowRunExpirationTime.AsTime().Add(accum)) + } + if !timeNotSet(ms.executionInfo.WorkflowExecutionExpirationTime) { + ms.executionInfo.WorkflowExecutionExpirationTime = timestamppb.New(ms.executionInfo.WorkflowExecutionExpirationTime.AsTime().Add(accum)) + } +} + +func (ms *MutableStateImpl) closeTransactionHandleTimeSkipping( + ctx context.Context, + transactionPolicy historyi.TransactionPolicy, +) (regenTimerTasksForTimeSkipping bool) { + switch transactionPolicy { + case historyi.TransactionPolicyActive: + if !ms.IsWorkflowExecutionRunning() { + return false + } + if shouldExecute, transition := ms.shouldExecuteTimeSkipping(); shouldExecute { + _, err := ms.AddWorkflowExecutionTimeSkippingTransitionedEvent( + ctx, transition.targetTime, transition.disabledAfterFastForward) + if err != nil { + ms.metricsHandler.Counter(metrics.ExecutionTimeSkippingTransitionedErrorCounter.Name()).Record(1) + ms.logger.Error( + "failed to add workflow execution time skipping transitioned event, and ignore this error and continue", + tag.WorkflowID(ms.GetExecutionInfo().WorkflowId), + tag.WorkflowRunID(ms.GetExecutionState().RunId), + tag.Error(err), + ) + return false + } + if transition.targetTime.IsZero() { + return false + } + return true + } + return false + case historyi.TransactionPolicyPassive: + return false + default: + ms.logger.Error(fmt.Sprintf("closeTransactionHandleTimeSkipping: unknown transaction policy: %v", transactionPolicy), + tag.WorkflowID(ms.GetExecutionInfo().WorkflowId), + tag.WorkflowRunID(ms.GetExecutionState().RunId), + ) + return false + } +} + +func (ms *MutableStateImpl) closeTransactionRegenerateTimerTasksForTimeSkipping( + transactionPolicy historyi.TransactionPolicy, +) error { + switch transactionPolicy { + case historyi.TransactionPolicyActive: + if !ms.IsWorkflowExecutionRunning() { + return nil + } + return ms.taskGenerator.RegenerateTimerTasksForTimeSkipping() + case historyi.TransactionPolicyPassive: + return nil + default: + return serviceerror.NewInternalf("unknown transaction policy: %v", transactionPolicy) + } +} diff --git a/service/history/workflow/timeskipping_test.go b/service/history/workflow/timeskipping_test.go index 9e1c8efc4c..a8d8fa94e6 100644 --- a/service/history/workflow/timeskipping_test.go +++ b/service/history/workflow/timeskipping_test.go @@ -1,11 +1,17 @@ package workflow import ( + "context" "time" commonpb "go.temporal.io/api/common/v1" + enumspb "go.temporal.io/api/enums/v1" + historypb "go.temporal.io/api/history/v1" + enumsspb "go.temporal.io/server/api/enums/v1" persistencespb "go.temporal.io/server/api/persistence/v1" + "go.temporal.io/server/common" "go.temporal.io/server/common/clock" + "go.temporal.io/server/components/nexusoperations" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" @@ -152,3 +158,992 @@ func (s *mutableStateSuite) TestSnapshotTimeSkippingInfo_ForChildWorkflows() { }) } + +func (s *mutableStateSuite) TestHasInflightWorkToPreventTimeSkipping() { + // Each s.Run() gets a fresh mutable state via SetupSubTest(). + + s.Run("FalseWhenNoPendingWork", func() { + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.False(hasPendingWork) + s.Empty(reason) + }) + + s.Run("TrueWhenPendingWorkflowTask", func() { + s.mutableState.executionInfo.WorkflowTaskScheduledEventId = 1 + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending workflow task", reason) + }) + + s.Run("TrueWhenPendingActivity", func() { + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{} + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending activity", reason) + }) + + s.Run("FalseWhenPendingActivityInRetryBackoff", func() { + now := s.mutableState.Now() + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, + HasRetryPolicy: true, + Attempt: 2, + ScheduledTime: timestamppb.New(now.Add(time.Hour)), + } + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.False(hasPendingWork) + s.Empty(reason) + }) + + s.Run("TrueWhenActivityStarted", func() { + now := s.mutableState.Now() + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, + HasRetryPolicy: true, + Attempt: 2, + ScheduledTime: timestamppb.New(now.Add(time.Hour)), + StartedEventId: 10, + } + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending activity", reason) + }) + + // A running activity that cannot be retried (no retry policy) must still block: + // the STARTED state short-circuits before the retry-policy check. + s.Run("TrueWhenActivityStartedNotRetryable", func() { + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, + HasRetryPolicy: false, + Attempt: 1, + StartedEventId: 10, + } + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending activity", reason) + }) + + // A first-attempt scheduled activity that has not failed yet (attempt 1) is not + // in backoff and must block even if it has a retry policy. + s.Run("TrueWhenActivityFirstAttemptScheduled", func() { + now := s.mutableState.Now() + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, + HasRetryPolicy: true, + Attempt: 1, + ScheduledTime: timestamppb.New(now.Add(time.Hour)), + } + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending activity", reason) + }) + + s.Run("TrueWhenActivityPausedInBackoff", func() { + now := s.mutableState.Now() + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, + HasRetryPolicy: true, + Attempt: 2, + ScheduledTime: timestamppb.New(now.Add(time.Hour)), + Paused: true, + } + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending activity", reason) + }) + + s.Run("TrueWhenActivityScheduledNow", func() { + now := s.mutableState.Now() + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, + HasRetryPolicy: true, + ScheduledTime: timestamppb.New(now.Add(-time.Hour)), + } + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending activity", reason) + }) + + s.Run("TrueWhenPendingChildExecution", func() { + s.mutableState.pendingChildExecutionInfoIDs[1] = &persistencespb.ChildExecutionInfo{} + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending child execution", reason) + }) + + s.Run("TrueWhenPendingNexusOperation", func() { + _, err := nexusoperations.AddChild(s.mutableState.HSM(), "op-1", &historypb.HistoryEvent{ + EventTime: timestamppb.Now(), + Attributes: &historypb.HistoryEvent_NexusOperationScheduledEventAttributes{ + NexusOperationScheduledEventAttributes: &historypb.NexusOperationScheduledEventAttributes{}, + }, + }, []byte("token")) + s.Require().NoError(err) + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending nexus operations", reason) + }) + + s.Run("TrueWhenPendingSignalExternal", func() { + s.mutableState.pendingSignalInfoIDs[1] = &persistencespb.SignalInfo{} + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending signal external", reason) + }) + + s.Run("TrueWhenPendingRequestCancelExternal", func() { + s.mutableState.pendingRequestCancelInfoIDs[1] = &persistencespb.RequestCancelInfo{} + hasPendingWork, reason := s.mutableState.hasInflightWorkToPreventTimeSkipping() + s.True(hasPendingWork) + s.Equal("has pending request cancel external", reason) + }) + +} + +func (s *mutableStateSuite) TestShouldExecuteTimeSkipping() { + // Each s.Run() gets a fresh mutable state via SetupSubTest(). + // The default state is RUNNING with no pending work. + + s.Run("FalseWhenTimeSkippingInfoNil", func() { + s.mutableState.executionInfo.TimeSkippingInfo = nil + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("FalseWhenConfigNil", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{Config: nil} + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("FalseWhenConfigDisabled", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: false}, + } + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("FalseWhenWorkflowNotRunning", func() { + s.mutableState.executionState.State = enumsspb.WORKFLOW_EXECUTION_STATE_COMPLETED + s.mutableState.executionState.Status = enumspb.WORKFLOW_EXECUTION_STATUS_COMPLETED + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("FalseWhenPendingWorkflowTask", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.mutableState.executionInfo.WorkflowTaskScheduledEventId = 1 + s.True(s.mutableState.HasPendingWorkflowTask()) + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("FalseWhenPendingActivity", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{} + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("FalseWhenPendingChildExecution", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.mutableState.pendingChildExecutionInfoIDs[1] = &persistencespb.ChildExecutionInfo{} + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("FalseWhenPendingNexusOperation", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + _, err := nexusoperations.AddChild(s.mutableState.HSM(), "op-1", &historypb.HistoryEvent{ + EventTime: timestamppb.Now(), + Attributes: &historypb.HistoryEvent_NexusOperationScheduledEventAttributes{ + NexusOperationScheduledEventAttributes: &historypb.NexusOperationScheduledEventAttributes{}, + }, + }, []byte("token")) + s.Require().NoError(err) + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("FalseWhenNoPendingTimersAndNoFastForward", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("TrueWhenPendingTimerAndNoFastForward", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.True(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("TrueWhenFastForwardAndNoPendingTimer", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(time.Hour), + }, + FastForwardInfo: &persistencespb.FastForwardInfo{ + TargetTime: timestamppb.New(s.mutableState.Now().Add(time.Hour)), + }, + } + s.True(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("TrueWhenFastForwardAndPendingTimer", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(time.Hour), + }, + FastForwardInfo: &persistencespb.FastForwardInfo{ + TargetTime: timestamppb.New(s.mutableState.Now().Add(time.Hour)), + }, + } + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.True(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("FalseWhenPaused", func() { + s.mutableState.executionState.Status = enumspb.WORKFLOW_EXECUTION_STATUS_PAUSED + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.mutableState.pendingTimerInfoIDs["t1"] = &persistencespb.TimerInfo{TimerId: "t1"} + s.False(s.mutableState.shouldExecuteTimeSkipping()) + }) + + s.Run("TrueWhenOnlyActivityInRetryBackoff", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + now := s.mutableState.Now() + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, + HasRetryPolicy: true, + Attempt: 2, + ScheduledTime: timestamppb.New(now.Add(time.Hour)), + } + s.True(s.mutableState.shouldExecuteTimeSkipping()) + }) +} + +func (s *mutableStateSuite) TestApplyWorkflowExecutionTimeSkippingTransitionedEvent() { + // Use fixed UTC times so duration arithmetic is exact. + baseTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) + + makeEvent := func(eventTime time.Time, targetTime *time.Time, disabledAfterBound bool) *historypb.HistoryEvent { + attr := &historypb.WorkflowExecutionTimeSkippingTransitionedEventAttributes{ + DisabledAfterFastForward: disabledAfterBound, + } + if targetTime != nil { + attr.TargetTime = timestamppb.New(*targetTime) + } + return &historypb.HistoryEvent{ + EventType: enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_TIME_SKIPPING_TRANSITIONED, + EventTime: timestamppb.New(eventTime), + Attributes: &historypb.HistoryEvent_WorkflowExecutionTimeSkippingTransitionedEventAttributes{ + WorkflowExecutionTimeSkippingTransitionedEventAttributes: attr, + }, + } + } + + s.Run("ErrorWhenTimeSkippingInfoNil", func() { + s.mutableState.executionInfo.TimeSkippingInfo = nil + targetTime := baseTime.Add(2 * time.Hour) + err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( + context.Background(), + makeEvent(baseTime, &targetTime, false), + ) + s.Require().Error(err) + }) + + s.Run("AccumulatesDuration", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.mutableState.timeSkippingInfoUpdated = false + targetTime := baseTime.Add(2 * time.Hour) + + err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( + context.Background(), + makeEvent(baseTime, &targetTime, false), + ) + s.Require().NoError(err) + + accumulated := s.mutableState.GetExecutionInfo().TimeSkippingInfo.AccumulatedSkippedDuration + s.Require().Equal(2*time.Hour, accumulated.AsDuration()) + s.Require().True(s.mutableState.timeSkippingInfoUpdated) + }) + + s.Run("AccumulatesDurationAdditively", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + AccumulatedSkippedDuration: durationpb.New(time.Hour), + } + targetTime := baseTime.Add(2 * time.Hour) + + err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( + context.Background(), + makeEvent(baseTime, &targetTime, false), + ) + s.Require().NoError(err) + + accumulated := s.mutableState.GetExecutionInfo().TimeSkippingInfo.AccumulatedSkippedDuration + s.Require().Equal(3*time.Hour, accumulated.AsDuration()) // 1h pre-existing + 2h new + }) + + s.Run("ErrorWhenNilTargetTimeAndNotDisabledAfterBound", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + s.mutableState.timeSkippingInfoUpdated = false + err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( + context.Background(), + makeEvent(baseTime, nil, false), + ) + s.Require().Error(err) + }) + + s.Run("NilTargetTimeWithDisabledAfterBoundDoesNotAccumulateDuration", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + AccumulatedSkippedDuration: durationpb.New(time.Hour), + } + s.mutableState.timeSkippingInfoUpdated = false + err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( + context.Background(), + makeEvent(baseTime, nil, true), + ) + s.Require().NoError(err) + + accumulated := s.mutableState.GetExecutionInfo().TimeSkippingInfo.AccumulatedSkippedDuration + s.Require().Equal(time.Hour, accumulated.AsDuration()) + s.Require().False(s.mutableState.GetExecutionInfo().TimeSkippingInfo.Config.Enabled) + s.Require().True(s.mutableState.timeSkippingInfoUpdated) + }) + + s.Run("DisabledAfterBoundDisablesConfigAndAccumulatesDuration", func() { + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + targetTime := baseTime.Add(2 * time.Hour) + + err := s.mutableState.ApplyWorkflowExecutionTimeSkippingTransitionedEvent( + context.Background(), + makeEvent(baseTime, &targetTime, true), + ) + s.Require().NoError(err) + + s.Require().False(s.mutableState.GetExecutionInfo().TimeSkippingInfo.Config.Enabled) + accumulated := s.mutableState.GetExecutionInfo().TimeSkippingInfo.AccumulatedSkippedDuration + s.Require().Equal(2*time.Hour, accumulated.AsDuration()) + }) +} + +func (s *mutableStateSuite) TestWrapTimeSourceWithTimeSkipping() { + const skipped = 2 * time.Hour + fixedBase := time.Date(2024, 6, 1, 10, 0, 0, 0, time.UTC) + + // fixedTimeSource returns fixedBase and is used as the base time source for subtests + // that need deterministic virtual-time assertions. + fixedTimeSource := func() *clock.EventTimeSource { + ts := clock.NewEventTimeSource() + ts.Update(fixedBase) + return ts + } + + s.Run("ZeroOffsetWhenTimeSkippingInfoNil", func() { + s.mutableState.timeSource = fixedTimeSource() + s.mutableState.executionInfo.TimeSkippingInfo = nil + + s.mutableState.wrapTimeSourceWithTimeSkipping() + + _, isWrapper := s.mutableState.timeSource.(*clock.TimeSkippingTimeSourceWrapper) + s.True(isWrapper) + // With nil TimeSkippingInfo the wrapper is present but applies a zero offset. + s.Equal(fixedBase, s.mutableState.timeSource.Now()) + }) + + s.Run("OffsetTracksAccumulatedDuration", func() { + s.mutableState.timeSource = fixedTimeSource() + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + AccumulatedSkippedDuration: durationpb.New(skipped), + } + + s.mutableState.wrapTimeSourceWithTimeSkipping() + + _, isWrapper := s.mutableState.timeSource.(*clock.TimeSkippingTimeSourceWrapper) + s.True(isWrapper) + s.Equal(fixedBase.Add(skipped), s.mutableState.timeSource.Now()) + }) + + s.Run("OffsetFollowsLateTimeSkippingInfoAssignment", func() { + // Wrap first with nil TimeSkippingInfo, then assign it — the closure must + // pick up the new accumulated duration without a re-wrap. + s.mutableState.timeSource = fixedTimeSource() + s.mutableState.executionInfo.TimeSkippingInfo = nil + s.mutableState.wrapTimeSourceWithTimeSkipping() + + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + AccumulatedSkippedDuration: durationpb.New(skipped), + } + + s.Equal(fixedBase.Add(skipped), s.mutableState.timeSource.Now()) + }) + + s.Run("IdempotentWhenAlreadyWrapped", func() { + s.mutableState.timeSource = fixedTimeSource() + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + AccumulatedSkippedDuration: durationpb.New(skipped), + } + s.mutableState.wrapTimeSourceWithTimeSkipping() + wrappedOnce := s.mutableState.timeSource + + s.mutableState.wrapTimeSourceWithTimeSkipping() + + s.Equal(wrappedOnce, s.mutableState.timeSource, "second call must not double-wrap") + }) + + s.Run("HBuilderUsesVirtualTime", func() { + s.mutableState.timeSource = fixedTimeSource() + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + AccumulatedSkippedDuration: durationpb.New(skipped), + } + + s.mutableState.wrapTimeSourceWithTimeSkipping() + + event := s.mutableState.hBuilder.AddHistoryEvent( + enumspb.EVENT_TYPE_TIMER_FIRED, + func(e *historypb.HistoryEvent) { + e.Attributes = &historypb.HistoryEvent_TimerFiredEventAttributes{ + TimerFiredEventAttributes: &historypb.TimerFiredEventAttributes{TimerId: "t1"}, + } + }, + ) + s.Equal(fixedBase.Add(skipped), event.GetEventTime().AsTime()) + }) +} + +// TestApplyFastForward covers the full branch table of applyFastForward: +// MaxElapsedDuration set / nil duration / nil fast-forward / nil config / Enabled=false. +// The first-init virtual-time path is covered separately in +// TestInitTimeSkippingInfo_VirtualTime. +func (s *mutableStateSuite) TestApplyFastForward() { + s.Run("FastForward_WithNoPropagatedTargetTime", func() { + + fastForwardDuration := 3 * time.Hour + eventID := int64(1) + s.mutableState.timeSource = clock.NewEventTimeSource() + baseTime := s.mutableState.timeSource.Now() + + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(fastForwardDuration)}, + AccumulatedSkippedDuration: durationpb.New(time.Hour), + } + s.mutableState.applyFastForward(eventID, nil) + fastForward := s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo() + s.Require().NotNil(fastForward) + s.Equal(eventID, fastForward.GetSourceEventId()) + s.False(fastForward.GetHasReached()) + s.WithinDuration(fastForward.GetTargetTime().AsTime(), baseTime.Add(fastForwardDuration), 1*time.Second) + }) + + s.Run("FastForward_WithPropagatedTargetTime", func() { + fixed := time.Date(2026, 1, 1, 12, 0, 0, 0, time.UTC) + eventID := int64(1) + propagatedTarget := fixed.Add(2 * time.Hour) + s.mutableState.timeSource = clock.NewEventTimeSource().Update(fixed) + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(3 * time.Hour)}, + AccumulatedSkippedDuration: durationpb.New(time.Hour), + } + s.mutableState.applyFastForward(eventID, timestamppb.New(propagatedTarget)) + fastForward := s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo() + s.Require().NotNil(fastForward) + s.Equal(propagatedTarget, fastForward.GetTargetTime().AsTime(), + "propagated target used directly, not recomputed from ff duration") + }) + + s.Run("MaxElapsedDuration_NilDuration_NoOp", func() { + eventID := int64(1) + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{ + Enabled: true}, + } + s.mutableState.applyFastForward(eventID, nil) + s.Nil(s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo()) + }) + + s.Run("FastForward_ClearsExistingInfo", func() { + eventID := int64(1) + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: false}, + FastForwardInfo: &persistencespb.FastForwardInfo{ + TargetTime: timestamppb.New(s.mutableState.Now().Add(time.Hour)), + SourceEventId: 7, + }, + } + s.mutableState.applyFastForward(eventID, nil) + s.Nil(s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo()) + }) + + s.Run("FastForward_OverridesExistingInfo", func() { + eventID := int64(1) + fixed := time.Date(2026, 1, 1, 12, 0, 0, 0, time.UTC) + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true, FastForward: durationpb.New(time.Hour)}, + FastForwardInfo: &persistencespb.FastForwardInfo{ + TargetTime: timestamppb.New(fixed), + SourceEventId: 7, + }, + } + newTarget := s.mutableState.Now().Add(time.Hour) + s.mutableState.applyFastForward(eventID, nil) + s.WithinDuration(s.mutableState.executionInfo.TimeSkippingInfo.GetFastForwardInfo().GetTargetTime().AsTime(), newTarget, 1*time.Second) + }) +} + +// TestInitTimeSkippingInfo covers 3 basic scenarios this function is called. +func (s *mutableStateSuite) TestInitTimeSkippingInfo() { + + // if the inputs are nil, the caller doesn't need to call the TSI + // yet we still add this test to ensure the function is safe with an noop implementation + // to call with nil inputs + s.Run("SafeInitWithNil_ForExecutionsWithoutTS", func() { + s.mutableState.timeSource = clock.NewEventTimeSource() + baseTime := s.mutableState.timeSource.Now() + s.NotPanics(func() { + s.mutableState.initTimeSkippingInfo(nil, nil, 0) + }) + s.Nil(s.mutableState.executionInfo.TimeSkippingInfo) + s.Equal(baseTime, s.mutableState.Now()) + }) + + s.Run("InitWithConfigOnly_ForExecutionsWithTSStartedByUser", func() { + eventID := int64(1) + s.mutableState.timeSource = clock.NewEventTimeSource() + baseTime := s.mutableState.timeSource.Now() + cfg := &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(3 * time.Hour)} + + s.mutableState.initTimeSkippingInfo(cfg, nil, eventID) + s.Equal(baseTime, s.mutableState.Now()) + tsi := s.mutableState.executionInfo.GetTimeSkippingInfo() + s.Require().NotNil(tsi) + s.True(proto.Equal(cfg, tsi.GetConfig())) + s.Require().NotNil(tsi.GetFastForwardInfo()) + s.Require().Nil(tsi.GetAccumulatedSkippedDuration()) + // timestamppb translates to UTC time + s.Require().Equal(baseTime.Add(3*time.Hour).UTC(), + tsi.GetFastForwardInfo().GetTargetTime().AsTime()) + }) + + s.Run("InitWithPropagation_ForExecutionsWithTSStartedByPropagation", func() { + s.mutableState.timeSource = clock.NewEventTimeSource() + + // all local time + baseTime := s.mutableState.timeSource.Now() + targetTime := baseTime.Add(3 * time.Hour) + + hasSkipped := 2 * time.Hour + fastForward := 3 * time.Hour + eventID := int64(1) + + cfg := &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(fastForward), + DisableChildPropagation: true, + } + propagation := &commonpb.TimeSkippingStatePropagation{ + InitialSkippedDuration: durationpb.New(hasSkipped), + FastForwardTargetTime: timestamppb.New(targetTime), + } + s.mutableState.initTimeSkippingInfo(cfg, propagation, eventID) + + tsi := s.mutableState.executionInfo.GetTimeSkippingInfo() + s.Require().NotNil(tsi) + s.Equal(baseTime.Add(hasSkipped), s.mutableState.Now()) + s.Equal(hasSkipped, tsi.GetAccumulatedSkippedDuration().AsDuration()) + s.Equal(targetTime.UTC(), + tsi.GetFastForwardInfo().GetTargetTime().AsTime()) + }) +} + +func (s *mutableStateSuite) TestUpdateTimeSkippingInfo() { + + s.Run("UpdateTimeSkippingInfo_UpdateWithNil", func() { + s.mutableState.timeSource = clock.NewEventTimeSource() + baseTime := s.mutableState.timeSource.Now() + currentTSI := &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(time.Hour), + }, + AccumulatedSkippedDuration: durationpb.New(time.Hour), + FastForwardInfo: &persistencespb.FastForwardInfo{ + TargetTime: timestamppb.New(baseTime.Add(time.Hour)), + SourceEventId: 7, + HasReached: false, + }, + } + s.mutableState.executionInfo.TimeSkippingInfo = currentTSI + s.mutableState.timeSkippingInfoUpdated = false + newEventID := int64(8) + s.mutableState.updateTimeSkippingInfo(nil, newEventID) + newTSI := s.mutableState.executionInfo.GetTimeSkippingInfo() + s.Require().NotNil(newTSI) + s.Nil(newTSI.GetConfig()) + s.Nil(newTSI.GetFastForwardInfo()) + s.Equal(currentTSI.GetAccumulatedSkippedDuration(), newTSI.GetAccumulatedSkippedDuration()) + s.True(s.mutableState.timeSkippingInfoUpdated) + }) + + s.Run("UpdateTimeSkippingInfo_EnableTS", func() { + s.mutableState.timeSource = clock.NewEventTimeSource() + baseTime := s.mutableState.timeSource.Now() + currentTSI := persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{ + Enabled: false, + }, + AccumulatedSkippedDuration: durationpb.New(time.Hour), + } + s.mutableState.executionInfo.TimeSkippingInfo = ¤tTSI + + // new config + newConfig := &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(2 * time.Hour), + DisableChildPropagation: true, + } + newEventID := int64(8) + s.mutableState.updateTimeSkippingInfo(newConfig, newEventID) + newTSI := s.mutableState.executionInfo.GetTimeSkippingInfo() + + s.Require().NotNil(newTSI) + s.True(proto.Equal(newConfig, newTSI.GetConfig())) + s.Require().NotNil(newTSI.GetFastForwardInfo()) + s.Equal(newEventID, newTSI.GetFastForwardInfo().GetSourceEventId()) + s.Equal(baseTime.Add(2*time.Hour).UTC(), newTSI.GetFastForwardInfo().GetTargetTime().AsTime()) + s.False(newTSI.GetFastForwardInfo().GetHasReached()) + s.Equal(time.Hour, newTSI.GetAccumulatedSkippedDuration().AsDuration()) + }) + + s.Run("UpdateTimeSkippingInfo_OverrideFFThenTurnOff", func() { + s.mutableState.timeSource = clock.NewEventTimeSource() + baseTime := s.mutableState.timeSource.Now() + currentTSI := persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(time.Hour), + }, + AccumulatedSkippedDuration: durationpb.New(time.Hour), + FastForwardInfo: &persistencespb.FastForwardInfo{ + TargetTime: timestamppb.New(baseTime.Add(time.Hour)), + SourceEventId: 7, + HasReached: false, + }, + } + s.mutableState.executionInfo.TimeSkippingInfo = ¤tTSI + + // update with new config with a new FF + tsc2 := &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(2 * time.Hour), + } + newEventID := int64(8) + s.mutableState.updateTimeSkippingInfo(tsc2, newEventID) + tsc2TSI := s.mutableState.executionInfo.GetTimeSkippingInfo() + + s.Require().NotNil(tsc2TSI) + s.True(proto.Equal(tsc2, tsc2TSI.GetConfig())) + s.Require().NotNil(tsc2TSI.GetFastForwardInfo()) + s.Equal(newEventID, tsc2TSI.GetFastForwardInfo().GetSourceEventId()) + s.Equal(baseTime.Add(2*time.Hour).UTC(), tsc2TSI.GetFastForwardInfo().GetTargetTime().AsTime()) + s.False(tsc2TSI.GetFastForwardInfo().GetHasReached()) + s.Equal(time.Hour, tsc2TSI.GetAccumulatedSkippedDuration().AsDuration()) + + // disable the time skipping + tsc3 := &commonpb.TimeSkippingConfig{ + Enabled: false, + } + s.mutableState.updateTimeSkippingInfo(tsc3, newEventID) + tsc3TSI := s.mutableState.executionInfo.GetTimeSkippingInfo() + s.Require().NotNil(tsc3TSI) + s.True(proto.Equal(tsc3, tsc3TSI.GetConfig())) + s.Nil(tsc3TSI.GetFastForwardInfo()) + s.Equal(time.Hour, tsc3TSI.GetAccumulatedSkippedDuration().AsDuration()) + + }) + +} + +// TestCalculateTimeSkippingTransition exercises the full candidate-selection +// branch table. +func (s *mutableStateSuite) TestCalculateTimeSkippingTransition() { + baseTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) + + addTimer := func(id string, expiry time.Time) { + s.mutableState.pendingTimerInfoIDs[id] = &persistencespb.TimerInfo{ + TimerId: id, + ExpiryTime: timestamppb.New(expiry), + } + } + + // resetMS gives each subtest a fresh, deterministic MS. + resetMS := func() { + ts := clock.NewEventTimeSource().Update(baseTime) + s.mutableState.timeSource = ts + s.mutableState.pendingTimerInfoIDs = make(map[string]*persistencespb.TimerInfo) + s.mutableState.pendingActivityInfoIDs = make(map[int64]*persistencespb.ActivityInfo) + s.mutableState.executionInfo.TimeSkippingInfo = &persistencespb.TimeSkippingInfo{ + Config: &commonpb.TimeSkippingConfig{Enabled: true}, + } + // Default: workflow has had a workflow task, so the backoff branch is skipped. + s.mutableState.executionInfo.StartTime = timestamppb.New(baseTime) + s.mutableState.executionInfo.ExecutionTime = timestamppb.New(baseTime) + s.mutableState.executionInfo.LastCompletedWorkflowTaskStartedEventId = 1 + } + + s.Run("NoCandidates_InvalidTransition", func() { + resetMS() + t, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.False(t.isValid()) + }) + + s.Run("OneUserTimer_TargetIsTimer", func() { + resetMS() + t1 := baseTime.Add(2 * time.Hour) + addTimer("t1", t1) + + t, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(t1, t.targetTime) + s.False(t.disabledAfterFastForward) + }) + + s.Run("TwoUserTimers_TargetIsEarliest", func() { + resetMS() + t1 := baseTime.Add(2 * time.Hour) + t2 := baseTime.Add(3 * time.Hour) + addTimer("t1", t1) + addTimer("t2", t2) + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(t1, tr.targetTime) + s.False(tr.disabledAfterFastForward) + }) + + s.Run("UserTimer_PlusEarlierFastForward_TargetIsFastForward", func() { + resetMS() + t1 := baseTime.Add(3 * time.Hour) + fastForwardTarget := baseTime.Add(time.Hour) + addTimer("t1", t1) + s.mutableState.executionInfo.TimeSkippingInfo.Config.FastForward = durationpb.New(time.Hour) + s.mutableState.executionInfo.TimeSkippingInfo.FastForwardInfo = + &persistencespb.FastForwardInfo{TargetTime: timestamppb.New(fastForwardTarget)} + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(fastForwardTarget, tr.targetTime) + s.True(tr.disabledAfterFastForward) + }) + + s.Run("ZeroFastForward_NilTarget_NoErrorNoCandidate", func() { + resetMS() + // A zero max_elapsed_duration is treated as no fast-forward: it contributes no candidate + // and must NOT trip the nil-fast-forward corruption check. + s.mutableState.executionInfo.TimeSkippingInfo.Config.FastForward = durationpb.New(0) + // FastForward persistence deliberately not set. + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.False(tr.isValid()) + }) + + s.Run("Backoff_NotChildAndExecutionTimeFuture_IsCandidate", func() { + resetMS() + // !HadOrHasWorkflowTask: clear the last completed WT. + s.mutableState.executionInfo.LastCompletedWorkflowTaskStartedEventId = common.EmptyEventID + s.mutableState.executionInfo.WorkflowTaskScheduledEventId = common.EmptyEventID + // ExecutionTime > StartTime: a real backoff configured. + execTime := baseTime.Add(time.Hour) + s.mutableState.executionInfo.ExecutionTime = timestamppb.New(execTime) + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(execTime, tr.targetTime) + s.False(tr.disabledAfterFastForward) + }) + + s.Run("Backoff_ChildWFCase_NotCandidate", func() { + resetMS() + s.mutableState.executionInfo.LastCompletedWorkflowTaskStartedEventId = common.EmptyEventID + s.mutableState.executionInfo.WorkflowTaskScheduledEventId = common.EmptyEventID + // ExecutionTime == StartTime: child WF "no first WT scheduled yet" case. + s.mutableState.executionInfo.ExecutionTime = timestamppb.New(baseTime) + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.False(tr.isValid(), + "child WF without backoff and no other candidate must yield invalid transition") + }) + + s.Run("Backoff_CaNWithInheritedSkip_PastExecutionTime_NotCandidate", func() { + resetMS() + // Inherited accumulated such that Now() > ExecutionTime: the backoff candidate + // is in the past relative to virtual now and must not be picked. + s.mutableState.executionInfo.LastCompletedWorkflowTaskStartedEventId = common.EmptyEventID + s.mutableState.executionInfo.WorkflowTaskScheduledEventId = common.EmptyEventID + execTime := baseTime.Add(10 * time.Minute) + s.mutableState.executionInfo.ExecutionTime = timestamppb.New(execTime) + // Wrap the time source so virtual Now is baseTime + accum (= baseTime + 1h). + s.mutableState.timeSource = clock.WrapTimeSourceWithTimeSkipping( + clock.NewEventTimeSource().Update(baseTime), + func() time.Duration { return time.Hour }, + ) + s.mutableState.executionInfo.TimeSkippingInfo.AccumulatedSkippedDuration = durationpb.New(time.Hour) + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.False(tr.isValid(), + "backoff in the virtual past must not produce a transition candidate") + }) + + s.Run("ActivityInRetryBackoff_IsCandidate", func() { + resetMS() + schedTime := baseTime.Add(30 * time.Minute) + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, + HasRetryPolicy: true, + Attempt: 2, + ScheduledTime: timestamppb.New(schedTime), + } + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(schedTime, tr.targetTime) + s.False(tr.disabledAfterFastForward) + }) + + s.Run("TwoActivitiesInBackoff_TargetIsEarliest", func() { + resetMS() + early := baseTime.Add(30 * time.Minute) + late := baseTime.Add(2 * time.Hour) + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, HasRetryPolicy: true, Attempt: 2, + ScheduledTime: timestamppb.New(late), + } + s.mutableState.pendingActivityInfoIDs[2] = &persistencespb.ActivityInfo{ + ScheduledEventId: 2, HasRetryPolicy: true, Attempt: 2, + ScheduledTime: timestamppb.New(early), + } + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(early, tr.targetTime) + }) + + s.Run("ActivityBackoff_PlusEarlierTimer_TargetIsTimer", func() { + resetMS() + schedTime := baseTime.Add(2 * time.Hour) + timerTime := baseTime.Add(time.Hour) + addTimer("t1", timerTime) + s.mutableState.pendingActivityInfoIDs[1] = &persistencespb.ActivityInfo{ + ScheduledEventId: 1, HasRetryPolicy: true, Attempt: 2, + ScheduledTime: timestamppb.New(schedTime), + } + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(timerTime, tr.targetTime) + }) + + // Universal cap: skip target must not exceed the run/execution timeout. + // MaxElapsedDuration's value is irrelevant to calculateTimeSkippingTransition; + // only FastForward.TargetTime is read. We use a large dummy + // duration solely to configure the fast-forward. + const largeFastForward = 24 * time.Hour + setFastForwardAt := func(target time.Time) { + s.mutableState.executionInfo.TimeSkippingInfo.Config.FastForward = durationpb.New(largeFastForward) + s.mutableState.executionInfo.TimeSkippingInfo.FastForwardInfo = + &persistencespb.FastForwardInfo{TargetTime: timestamppb.New(target)} + } + + s.Run("FastForward_LargerThanRunTimeout_CappedAtRunTimeout", func() { + resetMS() + runExpiry := baseTime.Add(30 * time.Minute) + fastForwardTarget := baseTime.Add(2 * time.Hour) // fast-forward > run timeout + setFastForwardAt(fastForwardTarget) + s.mutableState.executionInfo.WorkflowRunExpirationTime = timestamppb.New(runExpiry) + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(runExpiry, tr.targetTime, "skip must be capped at run timeout") + s.False(tr.disabledAfterFastForward, "cap fires before fast-forward; fast-forward must not be marked reached") + }) + + s.Run("FastForward_SmallerThanRunTimeout_NoCap_TargetIsFastForward", func() { + resetMS() + fastForwardTarget := baseTime.Add(30 * time.Minute) + runExpiry := baseTime.Add(2 * time.Hour) // run timeout > fast-forward, no cap needed + setFastForwardAt(fastForwardTarget) + s.mutableState.executionInfo.WorkflowRunExpirationTime = timestamppb.New(runExpiry) + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(fastForwardTarget, tr.targetTime, "fast-forward is minimum; no cap applies") + s.True(tr.disabledAfterFastForward, "fast-forward fires before run timeout; fast-forward must be marked reached") + }) + + s.Run("FastForward_LargerThanExecTimeout_NoRunTimeout_CappedAtExecTimeout", func() { + resetMS() + execExpiry := baseTime.Add(30 * time.Minute) + fastForwardTarget := baseTime.Add(2 * time.Hour) // fast-forward > execution timeout + setFastForwardAt(fastForwardTarget) + // No WorkflowRunExpirationTime; only execution timeout. + s.mutableState.executionInfo.WorkflowExecutionExpirationTime = timestamppb.New(execExpiry) + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(execExpiry, tr.targetTime, "skip must be capped at execution timeout") + s.False(tr.disabledAfterFastForward, "cap fires before fast-forward; fast-forward must not be marked reached") + }) + + s.Run("FastForward_ZeroRunTimeout_TreatedAsNoTimeout_NoCap", func() { + // A zero-value timestamp means "no timeout configured". The cap must not + // fire: the fast-forward target should be the skip destination unchanged. + resetMS() + fastForwardTarget := baseTime.Add(time.Hour) + setFastForwardAt(fastForwardTarget) + s.mutableState.executionInfo.WorkflowRunExpirationTime = timestamppb.New(time.Time{}) + s.mutableState.executionInfo.WorkflowExecutionExpirationTime = timestamppb.New(time.Time{}) + + tr, err := s.mutableState.calculateTimeSkippingTransition() + s.Require().NoError(err) + s.Equal(fastForwardTarget, tr.targetTime, "zero timeout must not cap the skip") + s.True(tr.disabledAfterFastForward) + }) +} diff --git a/tests/xdc/timeskipping_replication_test.go b/tests/xdc/timeskipping_replication_test.go new file mode 100644 index 0000000000..7223bb8baa --- /dev/null +++ b/tests/xdc/timeskipping_replication_test.go @@ -0,0 +1,381 @@ +package xdc + +import ( + "context" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + commandpb "go.temporal.io/api/command/v1" + commonpb "go.temporal.io/api/common/v1" + enumspb "go.temporal.io/api/enums/v1" + taskqueuepb "go.temporal.io/api/taskqueue/v1" + "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/server/api/historyservice/v1" + persistencespb "go.temporal.io/server/api/persistence/v1" + "go.temporal.io/server/chasm" + "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/log" + "go.temporal.io/server/common/testing/await" + "go.temporal.io/server/common/testing/taskpoller" + "go.temporal.io/server/common/testing/testvars" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/durationpb" +) + +type timeSkippingReplicationSuite struct { + xdcBaseSuite +} + +func TestTimeSkippingReplicationSuite(t *testing.T) { + t.Parallel() + s := &timeSkippingReplicationSuite{} + suite.Run(t, s) +} + +func (s *timeSkippingReplicationSuite) SetupSuite() { + s.dynamicConfigOverrides = map[dynamicconfig.Key]any{ + dynamicconfig.TimeSkippingEnabled.Key(): true, + } + // Drive the state-based replication path so TimeSkippingInfo replicates via the + // generic ExecutionInfo merge and PartialRefresh re-stamps timer tasks on the standby. + // Without this, events alone replicate TimeSkippingInfo via their handlers, and the + // state-based path is never exercised. + s.enableTransitionHistory = true + s.logger = log.NewTestLogger() + s.setupSuite() +} + +func (s *timeSkippingReplicationSuite) TearDownSuite() { + s.tearDownSuite() +} + +func (s *timeSkippingReplicationSuite) SetupTest() { + s.setupTest() +} + +// describeNamespaceID looks up the namespace ID from cluster[0]; the same ID is +// shared on standby once the namespace replicates. +func (s *timeSkippingReplicationSuite) describeNamespaceID(ctx context.Context, ns string) string { + resp, err := s.clusters[0].FrontendClient().DescribeNamespace(ctx, &workflowservice.DescribeNamespaceRequest{ + Namespace: ns, + }) + s.NoError(err) + return resp.GetNamespaceInfo().GetId() +} + +// getExecutionInfoFromCluster reads the persisted ExecutionInfo from the given cluster's +// history service. Use the database (not cache) view to ensure we observe replicated state. +func (s *timeSkippingReplicationSuite) getExecutionInfoFromCluster( + ctx context.Context, + clusterIdx int, + nsID, wfID, runID string, +) *persistencespb.WorkflowExecutionInfo { + resp, err := s.clusters[clusterIdx].HistoryClient().DescribeMutableState(ctx, &historyservice.DescribeMutableStateRequest{ + NamespaceId: nsID, + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID, RunId: runID}, + ArchetypeId: chasm.WorkflowArchetypeID, + }) + s.NoError(err) + s.NotNil(resp.GetDatabaseMutableState()) + return resp.GetDatabaseMutableState().GetExecutionInfo() +} + +// waitForTimeSkippingInfoSynced blocks until the standby cluster's TimeSkippingInfo +// agrees with the active's on Config and AccumulatedSkippedDuration. LastUpdateVersionedTransition +// replicates verbatim and drives the standby's PartialRefresh re-stamp; it is not asserted here. +func (s *timeSkippingReplicationSuite) waitForTimeSkippingInfoSynced( + ctx context.Context, + nsID, wfID, runID string, +) { + s.waitForClusterSynced() + await.Require(ctx, s.T(), func(t *await.T) { + active := s.getExecutionInfoFromCluster(ctx, 0, nsID, wfID, runID).GetTimeSkippingInfo() + standby := s.getExecutionInfoFromCluster(ctx, 1, nsID, wfID, runID).GetTimeSkippingInfo() + require.NotNil(t, active, "active TimeSkippingInfo must be present") + require.NotNil(t, standby, "standby TimeSkippingInfo must be present") + require.True(t, proto.Equal(active.GetConfig(), standby.GetConfig()), + "config mismatch: active=%v standby=%v", active.GetConfig(), standby.GetConfig()) + require.Equal(t, + active.GetAccumulatedSkippedDuration().AsDuration(), + standby.GetAccumulatedSkippedDuration().AsDuration(), + "accumulated skipped duration must match") + }, replicationWaitTime, replicationCheckInterval) +} + +// startSkippingWorkflow starts a workflow on the active cluster (cluster[0]) with +// the given TimeSkippingConfig and optional WorkflowStartDelay. Returns the run ID. +// The start-delay scenario triggers a skip on the very first close transaction +// (no WT yet, ExecutionTime > StartTime), giving us deterministic accumulated skip +// without needing to drive any workflow tasks. +func (s *timeSkippingReplicationSuite) startSkippingWorkflow( + ctx context.Context, + ns, wfID, tq string, + runTimeout, startDelay time.Duration, + cfg *commonpb.TimeSkippingConfig, +) string { + req := &workflowservice.StartWorkflowExecutionRequest{ + RequestId: uuid.NewString(), + Namespace: ns, + WorkflowId: wfID, + WorkflowType: &commonpb.WorkflowType{Name: "ts-replication-wf"}, + TaskQueue: &taskqueuepb.TaskQueue{Name: tq, Kind: enumspb.TASK_QUEUE_KIND_NORMAL}, + WorkflowRunTimeout: durationpb.New(runTimeout), + WorkflowTaskTimeout: durationpb.New(10 * time.Second), + TimeSkippingConfig: cfg, + } + if startDelay > 0 { + req.WorkflowStartDelay = durationpb.New(startDelay) + } + resp, err := s.clusters[0].FrontendClient().StartWorkflowExecution(ctx, req) + s.NoError(err) + return resp.GetRunId() +} + +// completeFirstWorkflowTask polls and completes the initial workflow task on the +// active cluster with no commands, leaving the workflow open and idle. Once idle, +// a registered FastForward fires on the next close transaction (see the +// FastForward functional tests for the same pattern). +func (s *timeSkippingReplicationSuite) completeFirstWorkflowTask(ns, wfID, tq string) { + tv := testvars.New(s.T()).WithTaskQueue(tq).WithWorkflowID(wfID) + poller := taskpoller.New(s.T(), s.clusters[0].FrontendClient(), ns) + _, err := poller.PollAndHandleWorkflowTask(tv, + func(_ *workflowservice.PollWorkflowTaskQueueResponse) (*workflowservice.RespondWorkflowTaskCompletedRequest, error) { + return &workflowservice.RespondWorkflowTaskCompletedRequest{}, nil + }) + s.NoError(err) +} + +// TestBasicSkipReplicates verifies the core replication contract for time-skipping: +// a skip transition applied on the active cluster's mutable state replicates to the +// standby with matching Config and AccumulatedSkippedDuration. LastUpdateVersionedTransition +// replicates verbatim and drives the standby's PartialRefresh re-stamp; it is not asserted. +func (s *timeSkippingReplicationSuite) TestBasicSkipReplicates() { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + ns := s.createGlobalNamespace() + nsID := s.describeNamespaceID(ctx, ns) + wfID := "ts-repl-basic-" + uuid.NewString() + tq := "ts-repl-basic-tq-" + uuid.NewString() + + const ( + startDelay = time.Hour + accumTol = 100 * time.Millisecond + ) + runID := s.startSkippingWorkflow(ctx, ns, wfID, tq, 24*time.Hour, startDelay, + &commonpb.TimeSkippingConfig{Enabled: true}) + + // Wait for the start close-transaction skip to land on active. + await.Require(ctx, s.T(), func(t *await.T) { + info := s.getExecutionInfoFromCluster(ctx, 0, nsID, wfID, runID).GetTimeSkippingInfo() + require.NotNil(t, info, "active must persist TimeSkippingInfo after start") + require.Greater(t, info.GetAccumulatedSkippedDuration().AsDuration(), 30*time.Minute, + "active must accumulate ~startDelay of skip on the first close transaction") + }, 15*time.Second, 200*time.Millisecond) + + s.waitForTimeSkippingInfoSynced(ctx, nsID, wfID, runID) + + active := s.getExecutionInfoFromCluster(ctx, 0, nsID, wfID, runID).GetTimeSkippingInfo() + standby := s.getExecutionInfoFromCluster(ctx, 1, nsID, wfID, runID).GetTimeSkippingInfo() + s.True(proto.Equal(active.GetConfig(), standby.GetConfig())) + s.Equal( + active.GetAccumulatedSkippedDuration().AsDuration(), + standby.GetAccumulatedSkippedDuration().AsDuration(), + ) + s.InDelta(float64(startDelay), float64(standby.GetAccumulatedSkippedDuration().AsDuration()), float64(accumTol), + "standby's accumulated skip should match the configured startDelay within tolerance") +} + +// TestFastForwardDisablePropagates verifies that completing a registered FastForward +// on the active — which flips Config.Enabled to false and accumulates the fast-forward +// duration — replicates to the standby. After replication, standby's config must +// report Enabled=false so subsequent skip checks short-circuit there too. +func (s *timeSkippingReplicationSuite) TestFastForwardDisablePropagates() { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + ns := s.createGlobalNamespace() + nsID := s.describeNamespaceID(ctx, ns) + wfID := "ts-repl-ff-" + uuid.NewString() + tq := "ts-repl-ff-tq-" + uuid.NewString() + + const ( + fastForward = 30 * time.Minute + accumTol = 30 * time.Second + ) + runID := s.startSkippingWorkflow(ctx, ns, wfID, tq, 24*time.Hour, 0, + &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(fastForward), + }, + ) + + // Drive the initial workflow task so the workflow goes idle; the fast-forward + // fires on the next close transaction. + s.completeFirstWorkflowTask(ns, wfID, tq) + + // Wait for the fast-forward to complete on active: Enabled must flip to false, + // HasReached must be set, and accumulated must land at ~fastForward. + await.Require(ctx, s.T(), func(t *await.T) { + info := s.getExecutionInfoFromCluster(ctx, 0, nsID, wfID, runID).GetTimeSkippingInfo() + require.NotNil(t, info) + require.False(t, info.GetConfig().GetEnabled(), + "completing the fast-forward must flip Config.Enabled to false") + require.True(t, info.GetFastForwardInfo().GetHasReached(), "active must set HasReached=true") + require.InDelta(t, + float64(fastForward), float64(info.GetAccumulatedSkippedDuration().AsDuration()), + float64(accumTol), + "accumulated must land at ~fastForward after the disable transition") + }, 30*time.Second, 200*time.Millisecond) + + s.waitForTimeSkippingInfoSynced(ctx, nsID, wfID, runID) + + standby := s.getExecutionInfoFromCluster(ctx, 1, nsID, wfID, runID).GetTimeSkippingInfo() + s.False(standby.GetConfig().GetEnabled(), + "standby must observe Config.Enabled=false after fast-forward replication") + s.InDelta(float64(fastForward), float64(standby.GetAccumulatedSkippedDuration().AsDuration()), float64(accumTol)) + + // Standby's history must contain the TimeSkippingTransitioned event marking + // the fast-forward disable — proves the event itself (not just the MS field) + // replicated from active. + histResp, err := s.clusters[1].FrontendClient().GetWorkflowExecutionHistory(ctx, + &workflowservice.GetWorkflowExecutionHistoryRequest{ + Namespace: ns, + Execution: &commonpb.WorkflowExecution{WorkflowId: wfID, RunId: runID}, + }) + s.NoError(err) + disableTransitions := 0 + for _, ev := range histResp.GetHistory().GetEvents() { + if ev.GetEventType() != enumspb.EVENT_TYPE_WORKFLOW_EXECUTION_TIME_SKIPPING_TRANSITIONED { + continue + } + if ev.GetWorkflowExecutionTimeSkippingTransitionedEventAttributes().GetDisabledAfterFastForward() { + disableTransitions++ + } + } + s.Equal(1, disableTransitions, + "standby history must contain exactly one TimeSkippingTransitioned event with DisabledAfterFastForward=true") +} + +// TestStandbyTimeSkippingTimerTaskAcksOnReachedFastForward drives the standby +// executor's executeTimeSkippingTimerTask end-to-end. A registered FastForward +// installs a TimeSkippingTimerTask whose visibility is scheduled wall-clock time. +// On the standby this task is regenerated during state-based replication via +// task_refresher.refreshTasksForTimeSkipping. +// +// Scenario: +// +// 1. Start a workflow on active with TimeSkippingConfig {Enabled, FastForward}. +// 2. Drive the initial workflow task so the workflow goes idle; the fast-forward +// fires, flipping Enabled=false and setting HasReached=true. +// 3. State replicates to standby. Standby's own TimeSkippingTimerTask was generated +// by the refresh; when it fires, executeTimeSkippingTimerTask runs and observes +// SourceEventId-match with HasReached=true → ack branch. +// +// Convergence on both clusters proves the standby's executor path is wired up, +// returns no error, and doesn't hang or crash. +func (s *timeSkippingReplicationSuite) TestStandbyTimeSkippingTimerTaskAcksOnReachedFastForward() { + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + ns := s.createGlobalNamespace() + nsID := s.describeNamespaceID(ctx, ns) + wfID := "ts-repl-ff-timer-" + uuid.NewString() + tq := "ts-repl-ff-timer-tq-" + uuid.NewString() + + const fastForward = 30 * time.Minute + runID := s.startSkippingWorkflow(ctx, ns, wfID, tq, 24*time.Hour, 0, + &commonpb.TimeSkippingConfig{ + Enabled: true, + FastForward: durationpb.New(fastForward), + }, + ) + + s.completeFirstWorkflowTask(ns, wfID, tq) + + // Active reaches the fast-forward: Enabled flips to false and HasReached is set. + // This is driven by the TimeSkippingTimerTask firing on the active. + await.Require(ctx, s.T(), func(t *await.T) { + info := s.getExecutionInfoFromCluster(ctx, 0, nsID, wfID, runID).GetTimeSkippingInfo() + require.NotNil(t, info) + require.False(t, info.GetConfig().GetEnabled(), + "active must flip Enabled=false after the fast-forward timer fires") + ff := info.GetFastForwardInfo() + require.NotNil(t, ff) + require.True(t, ff.GetHasReached(), "active must set HasReached=true") + }, 30*time.Second, 200*time.Millisecond) + + s.waitForTimeSkippingInfoSynced(ctx, nsID, wfID, runID) + + // Standby must converge to the same end state. The standby's own + // TimeSkippingTimerTask was generated by refreshTasksForTimeSkipping during + // replication; it fires through executeTimeSkippingTimerTask and acks because + // HasReached is already true by that time. + standby := s.getExecutionInfoFromCluster(ctx, 1, nsID, wfID, runID).GetTimeSkippingInfo() + s.NotNil(standby) + s.False(standby.GetConfig().GetEnabled(), + "standby must observe Config.Enabled=false after fast-forward replication") + ff := standby.GetFastForwardInfo() + s.NotNil(ff) + s.True(ff.GetHasReached(), + "standby must observe HasReached=true; absence indicates the standby's TimeSkippingTimerTask didn't replicate the fast-forward transition correctly") +} + +// TestFailoverPreservesAccumulatedSkip verifies that after failover, the new active +// cluster preserves the AccumulatedSkippedDuration accumulated under the previous +// active and can drive further work — the regenerated WorkflowBackoffTimerTask on +// the new active fires, producing a workflow task that completes the run. +func (s *timeSkippingReplicationSuite) TestFailoverPreservesAccumulatedSkip() { + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + defer cancel() + + ns := s.createGlobalNamespace() + nsID := s.describeNamespaceID(ctx, ns) + wfID := "ts-repl-failover-" + uuid.NewString() + tq := "ts-repl-failover-tq-" + uuid.NewString() + + const startDelay = time.Hour + runID := s.startSkippingWorkflow(ctx, ns, wfID, tq, 24*time.Hour, startDelay, + &commonpb.TimeSkippingConfig{Enabled: true}) + + // Skip on active; wait for replication to standby. + await.Require(ctx, s.T(), func(t *await.T) { + info := s.getExecutionInfoFromCluster(ctx, 0, nsID, wfID, runID).GetTimeSkippingInfo() + require.NotNil(t, info) + require.Greater(t, info.GetAccumulatedSkippedDuration().AsDuration(), 30*time.Minute) + }, 15*time.Second, 200*time.Millisecond) + s.waitForTimeSkippingInfoSynced(ctx, nsID, wfID, runID) + accumBefore := s.getExecutionInfoFromCluster(ctx, 0, nsID, wfID, runID). + GetTimeSkippingInfo().GetAccumulatedSkippedDuration().AsDuration() + + // Fail over to cluster[1]. cluster[1].InitialFailoverVersion=2. + s.failover(ns, 0, s.clusters[1].ClusterName(), 2) + + // New active (cluster[1]) must preserve accumulated skip in its MS. + afterFailover := s.getExecutionInfoFromCluster(ctx, 1, nsID, wfID, runID).GetTimeSkippingInfo() + s.NotNil(afterFailover, "TimeSkippingInfo must persist on the new active after failover") + s.Equal(accumBefore, afterFailover.GetAccumulatedSkippedDuration().AsDuration(), + "accumulated skip must survive failover") + + // New active must be able to drive forward progress: the regenerated + // WorkflowBackoffTimerTask on cluster[1] should fire (its visibility was + // shifted into near-now by RegenerateTimerTasksForTimeSkipping at replicate + // time), making a workflow task available to complete the run. + tv := testvars.New(s.T()).WithTaskQueue(tq).WithWorkflowID(wfID) + poller := taskpoller.New(s.T(), s.clusters[1].FrontendClient(), ns) + _, err := poller.PollAndHandleWorkflowTask(tv, + func(_ *workflowservice.PollWorkflowTaskQueueResponse) (*workflowservice.RespondWorkflowTaskCompletedRequest, error) { + return &workflowservice.RespondWorkflowTaskCompletedRequest{ + Commands: []*commandpb.Command{{ + CommandType: enumspb.COMMAND_TYPE_COMPLETE_WORKFLOW_EXECUTION, + Attributes: &commandpb.Command_CompleteWorkflowExecutionCommandAttributes{ + CompleteWorkflowExecutionCommandAttributes: &commandpb.CompleteWorkflowExecutionCommandAttributes{}, + }, + }}, + }, nil + }) + s.NoError(err, "new active must be able to complete a workflow task post-failover") +}