diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PState.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PState.scala index fa64d628bb..7154d890d0 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PState.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PState.scala @@ -429,6 +429,28 @@ final class PState private ( reset(pouPop) } + /** + * Restore state to the point of uncertainty WITHOUT discarding it. The POU + * remains on the stack and can still be used for a subsequent reset or + * discard by the outer handler. Used when a separator helper needs to + * speculatively rewind state (e.g., postfix absent-rep detection) while + * leaving the outer POU available for the outer handler to discard normally. + * + * Unlike resetToPointOfUncertainty, this does not pop the POU from the + * stack, decrement the infoset walker block count, or return the mark to + * the pool. Because dataInputStream.reset discards its mark argument, a + * fresh data stream mark is captured at the restored position so that the + * outer handler can still call resetToPointOfUncertainty or + * discardPointOfUncertainty. + */ + def restoreToPointOfUncertainty(pou: PState.Mark): Unit = { + Assert.usage(!isPointOfUncertaintyResolved(pou)) + pou.restoreInto(this) + // dataInputStream.reset (called inside restoreInto) discards pou.disMark. + // Re-capture at this (= outer POU) position so the POU remains valid. + pou.disMark = dataInputStream.mark(pou.poolDebugLabel) + } + /** * Discard the point of uncertainty created by withPointOfUncertainty. Once * called, the pou variable should no longer be used. If the pou is not diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala index 6919cb369e..c2eea03cb9 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala @@ -17,6 +17,7 @@ package org.apache.daffodil.runtime1.processors.parsers import org.apache.daffodil.lib.exceptions.Assert +import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.runtime1.processors.ElementRuntimeData import org.apache.daffodil.runtime1.processors.Failure import org.apache.daffodil.runtime1.processors.Success @@ -44,7 +45,8 @@ sealed abstract class SeparatorParseHelper( def parseOneWithSeparator( state: PState, - requiredOptional: RequiredOptionalStatus + requiredOptional: RequiredOptionalStatus, + maybePOU: Maybe[PState.Mark] ): ParseAttemptStatus /** @@ -76,9 +78,10 @@ final class PrefixSeparatorHelper(sep: Parser, childParser: Parser, scParserArg: override def parseOneWithSeparator( pstate: PState, - requiredOptional: RequiredOptionalStatus + requiredOptional: RequiredOptionalStatus, + maybePOU: Maybe[PState.Mark] ): ParseAttemptStatus = - parseOneWithInfixOrPrefixSeparator(true, pstate, requiredOptional) + parseOneWithInfixOrPrefixSeparator(true, pstate, requiredOptional, maybePOU) } final class InfixSeparatorHelper(sep: Parser, childParser: Parser, scParserArg: Separated) @@ -89,13 +92,19 @@ final class InfixSeparatorHelper(sep: Parser, childParser: Parser, scParserArg: override def parseOneWithSeparator( pstate: PState, - requiredOptional: RequiredOptionalStatus + requiredOptional: RequiredOptionalStatus, + maybePOU: Maybe[PState.Mark] ): ParseAttemptStatus = { val infixSepShouldBePresent = pstate.mpstate.groupPos > 1 - parseOneWithInfixOrPrefixSeparator(infixSepShouldBePresent, pstate, requiredOptional) + parseOneWithInfixOrPrefixSeparator( + infixSepShouldBePresent, + pstate, + requiredOptional, + maybePOU + ) } } @@ -106,7 +115,8 @@ trait InfixPrefixSeparatorHelperMixin { self: SeparatorParseHelper => final protected def parseOneWithInfixOrPrefixSeparator( shouldParseTheSep: Boolean, pstate: PState, - requiredOptional: RequiredOptionalStatus + requiredOptional: RequiredOptionalStatus, + maybePOU: Maybe[PState.Mark] ): ParseAttemptStatus = { val sepStatus = @@ -116,23 +126,22 @@ trait InfixPrefixSeparatorHelperMixin { self: SeparatorParseHelper => // while searching for it which should not be propagated upward requiredOptional match { case _: RequiredOptionalStatus.Optional => { - // Create a point of uncertainty with which to reset if we find errors - pstate.withPointOfUncertainty( - "parseOneWithInfixOrPrefixSeparator", - childParser.context - ) { pou => - sep.parse1(pstate) - - val rv = if (pstate.processorStatus eq Success) { - SeparatorParseStatus.SeparatorFound - } else { - // reset to the point of uncertainty to discard the errors - pstate.resetToPointOfUncertainty(pou) - SeparatorParseStatus.SeparatorFailed - } - rv + // Optional elements require arrayIndexStatus to return OptionalMiddle/OptionalLast, + // which requires minRepeats < maxRepeats, which only happens in MinMaxOccursCountParser + // and MinMaxOccursCountParser has HasPoU, so needsPoU = true, so One(pou) is always passed + // it is a bug if maybePOU is not defined then + Assert.invariant(maybePOU.isDefined) + val pou = maybePOU.get + sep.parse1(pstate) + val rv = if (pstate.processorStatus eq Success) { + SeparatorParseStatus.SeparatorFound + } else { + // reset to the point of uncertainty to discard the errors + pstate.resetToPointOfUncertainty(pou) + SeparatorParseStatus.SeparatorFailed } + rv } case _ => { sep.parse1(pstate) @@ -185,124 +194,112 @@ final class PostfixSeparatorHelper( scParserArg: Separated, isSimpleDelimited: Boolean ) extends SeparatorParseHelper(sep, child, scParserArg) { - import ParseAttemptStatus.* override def parseOneWithSeparator( pstate: PState, - requiredOptional: RequiredOptionalStatus + requiredOptional: RequiredOptionalStatus, + maybePOU: Maybe[PState.Mark] ): ParseAttemptStatus = { val prevBitPosBeforeChild = pstate.bitPos0b - pstate.withPointOfUncertainty("PostfixSeparatorHelper", childParser.context) { pou => - childParser.parse1(pstate) - val childSuccessful = pstate.processorStatus eq Success - val childFailure = !childSuccessful // just makes later logic easier to read - val bitPosAfterChildAttempt = pstate.bitPos0b - val prh = scParser.parseResultHelper - // This seems odd, but there are cases where after a failure - // the bitPos has not yet been repositioned back to where it started. - // This does happen for complex types. - val hasZLChildAttempt = prevBitPosBeforeChild == bitPosAfterChildAttempt - - if (childSuccessful) { - val dataOnlyRep = - prh.computeParseAttemptStatus( - scParser, - prevBitPosBeforeChild, - pstate, - requiredOptional - ) - sep.parse1(pstate) - if (pstate.processorStatus eq Success) { - // we got the postfix sep after successful parse of the data item - // so whatever the status of the item was, that's the status overall. - dataOnlyRep - } else { - // child successful, but - // no postfix sep found. - // capture failure reason, but reset position to before the child, - // and remove any elements. That way this behaves like a prefix - // or infix separator, where if you don't get the sep, then you - // usually (except first one in infix case) don't run the child parser - val failure = pstate.processorStatus.asInstanceOf[Failure] + childParser.parse1(pstate) + val childSuccessful = pstate.processorStatus eq Success + val childFailure = !childSuccessful // just makes later logic easier to read + val bitPosAfterChildAttempt = pstate.bitPos0b + val prh = scParser.parseResultHelper + // This seems odd, but there are cases where after a failure + // the bitPos has not yet been repositioned back to where it started. + // This does happen for complex types. + val hasZLChildAttempt = prevBitPosBeforeChild == bitPosAfterChildAttempt + + if (childSuccessful) { + val dataOnlyRep = + prh.computeParseAttemptStatus( + scParser, + prevBitPosBeforeChild, + pstate, + requiredOptional + ) + sep.parse1(pstate) + if (pstate.processorStatus eq Success) { + // we got the postfix sep after successful parse of the data item + // so whatever the status of the item was, that's the status overall. + dataOnlyRep + } else { + // child successful, but no postfix sep found. + // The outer handler (parseOneInstanceWithMaybePoU) resets state back + // to before the child for optional elements without a discriminator, or + // propagates as a hard failure if a discriminator fired (spec 9.3.3.1). + failedSeparator(pstate, "postfix") + prh.computeFailedSeparatorParseAttemptStatus( + scParser, + prevBitPosBeforeChild, + pstate, + hasZLChildAttempt, + requiredOptional + ) + } + } else { + Assert.invariant(childFailure) + // We might be tempted to just try the postfix sep now, + // to see if the postfix sep is there immediately. + // This is, however, problematic. + // + // We're depending on this item being an element with lengthKind="delimited" + // which can't have a zero-length representation, so that if there is zero-data before + // the postfix separator, we can declare the item to be absentRep. + // + // But what if the element is not lengthKind 'delimited'. + // Or what if it has complex type? But has a non-zero-length structure where this + // same delimiter is used at a more deeply nested depth? + // + // If the type is simple, and lengthKind 'delimited' we can proceed. + // Otherwise we punt and call it missing. + // + // required elements within an ordered sequence get Nope passed + // as maybePOU (because needsPoU = HasPoU && !Required = false). + // Without a POU to restore to, there's no position to reset back + // and try the bare postfix separator, so the guard rightfully skips + // that optimization and falls through to the else below + // (treating the child failure as unresolvable missing). + if ( + isSimpleDelimited && maybePOU.isDefined && !pstate.isPointOfUncertaintyResolved( + maybePOU.get + ) + ) { + val failure = pstate.processorStatus.asInstanceOf[Failure] - Assert.invariant(!pstate.isPointOfUncertaintyResolved(pou)) - pstate.resetToPointOfUncertainty(pou) + pstate.restoreToPointOfUncertainty(maybePOU.get) + sep.parse1(pstate) + if (pstate.isSuccess) { + // child failed, but postfix sep found immediately after reset + // marking this as ZL content. + // Use hasZLChildAttempt (captured before sep.parse1) so isZL reflects + // the child bit position, not the post-separator position. pstate.setFailed(failure.cause) - failedSeparator(pstate, "postfix") - prh.computeFailedSeparatorParseAttemptStatus( + val pas = prh.computeFailedParseAttemptStatus( scParser, prevBitPosBeforeChild, pstate, hasZLChildAttempt, requiredOptional ) - } - } else { - Assert.invariant(childFailure) - // We might be tempted to just try the postfix sep now, - // to see if the postfix sep is there immediately. - // This is, however, problematic. - // - // We're depending on this item being an element with lengthKind="delimited" - // which can't have a zero-length representation, so that if there is zero-data before - // the postfix separator, we can declare the item to be absentRep. - // - // But what if the element is not lengthKind 'delimited'. - // Or what if it has complex type? But has a non-zero-length structure where this - // same delimiter is used at a more deeply nested depth? - // - // If the type is simple, and lengthKind 'delimited' we can proceed. - // Otherwise we punt and call it missing. - // - if (isSimpleDelimited) { - val failure = pstate.processorStatus.asInstanceOf[Failure] - - Assert.invariant(!pstate.isPointOfUncertaintyResolved(pou)) - pstate.resetToPointOfUncertainty(pou) - - sep.parse1(pstate) - if (pstate.isSuccess) { - val posAfterSep = pstate.bitPos0b - // failed child, but success on postfix sep. - // behave here just like a prefix separator that was then followed by a child failure - pstate.setFailed(failure.cause) - val pas = - prh.computeParseAttemptStatus( - scParser, - prevBitPosBeforeChild, - pstate, - requiredOptional - ) - val res = pas match { - case AbsentRep => { - pstate.dataInputStream.setBitPos0b(posAfterSep) - MissingSeparator - } - case _ => pas - } - res - } else { - // the separator failed on ZL data - // so no chance on a ZL representation here. - val isZL = false - prh.computeFailedSeparatorParseAttemptStatus( - scParser, - prevBitPosBeforeChild, - pstate, - isZL, - requiredOptional - ) + // For non-positional (anyEmpty) sequences, the child result helper returns + // MissingItem for optional ZL elements, but since the postfix separator WAS + // found this is AbsentRep per DFDL spec 9.2.4. + pas match { + case ParseAttemptStatus.MissingItem + if requiredOptional.isInstanceOf[RequiredOptionalStatus.Optional] => + ParseAttemptStatus.AbsentRep + case _ => pas } } else { - // not simple delimited. We really have no way of knowing - // if trying to parse just the sep now makes any sense at all. - // (ex: the child could be failing because it is fixed length, with asserts that check the value - // that fail.) + // the separator failed on ZL data + // so no chance on a ZL representation here. val isZL = false - prh.computeFailedParseAttemptStatus( + prh.computeFailedSeparatorParseAttemptStatus( scParser, prevBitPosBeforeChild, pstate, @@ -310,8 +307,20 @@ final class PostfixSeparatorHelper( requiredOptional ) } + } else { + // not simple delimited. We really have no way of knowing + // if trying to parse just the sep now makes any sense at all. + // (ex: the child could be failing because it is fixed length, with asserts that check the value + // that fail.) + val isZL = false + prh.computeFailedParseAttemptStatus( + scParser, + prevBitPosBeforeChild, + pstate, + isZL, + requiredOptional + ) } - } } } diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedSequenceParsers.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedSequenceParsers.scala index 8ff39edb08..dff35f0998 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedSequenceParsers.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedSequenceParsers.scala @@ -17,6 +17,7 @@ package org.apache.daffodil.runtime1.processors.parsers import org.apache.daffodil.lib.schema.annotation.props.gen.SeparatorPosition +import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.runtime1.processors.* trait Separated { self: SequenceChildParser => @@ -43,9 +44,10 @@ trait Separated { self: SequenceChildParser => final def parseOne( pstate: PState, - requiredOptional: RequiredOptionalStatus + requiredOptional: RequiredOptionalStatus, + maybePOU: Maybe[PState.Mark] ): ParseAttemptStatus = { - separatorHelper.parseOneWithSeparator(pstate, requiredOptional) + separatorHelper.parseOneWithSeparator(pstate, requiredOptional, maybePOU) } final override def arrayCompleteChecks( diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SequenceChildBases.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SequenceChildBases.scala index cce424fdfc..23e8ab12a4 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SequenceChildBases.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SequenceChildBases.scala @@ -207,7 +207,11 @@ abstract class SequenceChildParser( final override def parse(pstate: PState): Unit = Assert.usageError("Not to be called on sequence child parsers") - def parseOne(pstate: PState, requiredOptional: RequiredOptionalStatus): ParseAttemptStatus + def parseOne( + pstate: PState, + requiredOptional: RequiredOptionalStatus, + maybePOU: Maybe[PState.Mark] + ): ParseAttemptStatus def maybeStaticRequiredOptionalStatus: Maybe[RequiredOptionalStatus] @@ -264,7 +268,11 @@ final class NonRepresentedSequenceChildParser( def maybeStaticRequiredOptionalStatus: Maybe[RequiredOptionalStatus] = Assert.usageError("not to be used for non-represented terms.") - def parseOne(pstate: PState, ignored_roStatus: RequiredOptionalStatus): ParseAttemptStatus = { + def parseOne( + pstate: PState, + ignored_roStatus: RequiredOptionalStatus, + maybePOU: Maybe[PState.Mark] + ): ParseAttemptStatus = { childParser.parse1(pstate) if (pstate.processorStatus eq Success) ParseAttemptStatus.NormalRep diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SequenceParserBases.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SequenceParserBases.scala index a0366cf812..e339d77d77 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SequenceParserBases.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SequenceParserBases.scala @@ -226,7 +226,10 @@ abstract class SequenceParserBase( // should never have non-represented children in unordered sequences Assert.invariant(isOrdered) - nonRepresentedParser.parseOne(pstate, null) + // non-represented children by definition can never be absent or + // optional, so there is no speculative parsing and no need for + // a point of uncertainty. + nonRepresentedParser.parseOne(pstate, null, Nope) // don't need to digest result from this. All // information about success/failure is in the pstate. // @@ -258,22 +261,23 @@ abstract class SequenceParserBase( resultOfTry = nextResultOfTry resultOfTry match { case AbsentRep => { - // a scalar element, or a model group is absent. That means no separator - // was found for it. - // - // That means were at the end of the representation of this sequence, - // This is only returned as resultOfTry if it is - // OK for us to act on it. I.e., we know that the situation is - // Positional trailing, with a group that can have zero-length representation. - // and no separator was found for it. - // - // So we mask the failure, and exit the sequence successfully - pstate.setSuccess() - isDone = true - // If we're masking the failure, we don't want the error dianostics - // to flow up. Restore the diagnostics from before the parse - // attempt - pstate.diagnostics = diagnosticsBeforeAttempt + // A scalar element is absent. For positional trailing, this means no + // separator was found, so we mask the failure and exit the sequence + // successfully. For non-positional separated sequences (e.g., anyEmpty + // postfix), the postfix separator was found and position has already + // been advanced past it by parseOneInstanceWithMaybePoU, so we + // continue parsing the next sequence child instead. + val continueSequence = scalarParser match { + case sep: Separated => !sep.isPositional + case _ => false + } + if (continueSequence) { + pstate.mpstate.moveOverOneGroupIndexOnly() + } else { + pstate.setSuccess() + isDone = true + pstate.diagnostics = diagnosticsBeforeAttempt + } } // This child alternative of an unordered sequence failed, and that @@ -385,7 +389,7 @@ abstract class SequenceParserBase( checkN(pstate, parser) // check if occursIndex exceeds tunable limit. val priorPos = pstate.bitPos0b - var resultOfTry = parser.parseOne(pstate, roStatus) + var resultOfTry = parser.parseOne(pstate, roStatus, maybePoU) val currentPos = pstate.bitPos0b diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/UnseparatedSequenceParsers.scala b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/UnseparatedSequenceParsers.scala index ebb82983c5..65c7ffdca7 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/UnseparatedSequenceParsers.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/UnseparatedSequenceParsers.scala @@ -16,6 +16,7 @@ */ package org.apache.daffodil.runtime1.processors.parsers +import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.runtime1.processors.ElementRuntimeData import org.apache.daffodil.runtime1.processors.OccursCountEv import org.apache.daffodil.runtime1.processors.Processor @@ -27,7 +28,8 @@ trait Unseparated { self: SequenceChildParser => final def parseOne( pstate: PState, - requiredOptional: RequiredOptionalStatus + requiredOptional: RequiredOptionalStatus, + maybePOU: Maybe[PState.Mark] ): ParseAttemptStatus = { val prevBitPosBeforeChild = pstate.bitPos0b self.childParser.parse1(pstate) diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroup.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroup.tdml index f58c28f327..13f6f43395 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroup.tdml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroup.tdml @@ -1780,5 +1780,200 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + separator + not found + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hello + + + + + + + + + + + + + + unable to parse + integer from empty string + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 42 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 42 + + + + + diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section14/sequence_groups/TestSequenceGroups.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section14/sequence_groups/TestSequenceGroups.scala index b661c5dc9f..94615234fe 100644 --- a/daffodil-test/src/test/scala/org/apache/daffodil/section14/sequence_groups/TestSequenceGroups.scala +++ b/daffodil-test/src/test/scala/org/apache/daffodil/section14/sequence_groups/TestSequenceGroups.scala @@ -121,4 +121,12 @@ class TestSequenceGroup extends TdmlTests { @Test def choice_group_with_annotation_01 = test @Test def similar_model_groups_01 = test + + // DAFFODIL-3085 + @Test def seq2 = test + @Test def seq3 = test + @Test def seq4 = test + @Test def seq5 = test + @Test def seq6 = test + @Test def seq7 = test }