Skip to content

Commit 1467784

Browse files
committed
Remove unnecessary truncation code
- we were truncating and overwriting value, but we didn't actually need to since we ended up truncating again right after. Removed the unnecessary and buggy code which changed the dataValue. StringMaybeTruncateBitsUnparser.unparse does not change the dataValue, instead it changes the DOS, resulting in the value getting truncated but string-length returning the untruncated length when it's called - move subset/subsetError to ThrowsSDE trait so state has access to it - refactor SpecifiedLengthExplicitImplicitUnparser to be more straightforward and to remove unused cruft - add tests DAFFODIL-1592
1 parent d79efe1 commit 1467784

File tree

6 files changed

+170
-243
lines changed

6 files changed

+170
-243
lines changed

daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala

+1-2
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,7 @@ trait SpecifiedLengthExplicitImplicitUnparserMixin {
110110
new SpecifiedLengthExplicitImplicitUnparser(
111111
u,
112112
e.elementRuntimeData,
113-
e.unparseTargetLengthInBitsEv,
114-
e.maybeUnparseTargetLengthInCharactersEv
113+
e.unparseTargetLengthInBitsEv
115114
)
116115
}
117116
}

daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala

+13
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,19 @@ trait ThrowsSDE {
8080
final def notYetImplemented(msg: String, args: Any*): Nothing =
8181
SDE("Feature not yet implemented: " + msg, args: _*)
8282

83+
/**
84+
* Use for cases where it is an SDE because of something we've chosen
85+
* not to implement. Not merely short term (haven't coded it yet, but intending to),
86+
* more like things we've chosen to defer intentionally to some future release.
87+
*/
88+
def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = {
89+
if (!testThatWillThrowIfFalse) subsetError(msg, args: _*)
90+
}
91+
92+
def subsetError(msg: String, args: Any*) = {
93+
val msgTxt = msg.format(args: _*)
94+
SDE("Subset: " + msgTxt)
95+
}
8396
}
8497

8598
/**

daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala

+14-226
Original file line numberDiff line numberDiff line change
@@ -20,42 +20,22 @@ package org.apache.daffodil.unparsers.runtime1
2020
import org.apache.daffodil.lib.exceptions.Assert
2121
import org.apache.daffodil.lib.schema.annotation.props.gen.LengthUnits
2222
import org.apache.daffodil.lib.schema.annotation.props.gen.Representation
23-
import org.apache.daffodil.lib.util.Maybe
2423
import org.apache.daffodil.lib.util.Maybe._
25-
import org.apache.daffodil.lib.util.MaybeJULong
26-
import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
2724
import org.apache.daffodil.runtime1.infoset.DIElement
2825
import org.apache.daffodil.runtime1.infoset.DISimple
2926
import org.apache.daffodil.runtime1.infoset.Infoset
30-
import org.apache.daffodil.runtime1.infoset.RetryableException
3127
import org.apache.daffodil.runtime1.processors.CharsetEv
3228
import org.apache.daffodil.runtime1.processors.ElementRuntimeData
33-
import org.apache.daffodil.runtime1.processors.Evaluatable
3429
import org.apache.daffodil.runtime1.processors.ParseOrUnparseState
35-
import org.apache.daffodil.runtime1.processors.Success
3630
import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInBitsEv
37-
import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInCharactersEv
3831
import org.apache.daffodil.runtime1.processors.unparsers._
3932

4033
import passera.unsigned.ULong
4134

42-
/**
43-
* Restricts the bits available for unparsing to just those within
44-
* the specified length computed.
45-
*
46-
* If a unparser (supplied as arg) runs past the available space,
47-
* that's an unparse error.
48-
*
49-
* Truncation of strings - the only case where we truncate, and only when
50-
* dfdl:truncateSpecifiedLengthString is 'yes', is handled elsewhere.
51-
*/
52-
sealed abstract class SpecifiedLengthUnparserBase(eUnparser: Unparser, erd: ElementRuntimeData)
53-
5435
final class SpecifiedLengthExplicitImplicitUnparser(
5536
eUnparser: Unparser,
5637
erd: ElementRuntimeData,
57-
targetLengthInBitsEv: UnparseTargetLengthInBitsEv,
58-
maybeTargetLengthInCharactersEv: Maybe[UnparseTargetLengthInCharactersEv]
38+
targetLengthInBitsEv: UnparseTargetLengthInBitsEv
5939
) extends CombinatorUnparser(erd) {
6040

6141
override lazy val runtimeDependencies = Vector()
@@ -74,215 +54,23 @@ final class SpecifiedLengthExplicitImplicitUnparser(
7454
}
7555

7656
override final def unparse(state: UState): Unit = {
77-
78-
erd.impliedRepresentation match {
79-
case Representation.Binary =>
80-
unparseBits(state)
81-
case Representation.Text => {
82-
val dcs = getCharset(state)
83-
if (dcs.maybeFixedWidth.isDefined)
84-
unparseBits(state)
85-
else {
86-
// we know the encoding is variable width characters
87-
// but we don't know if the length units characters or bits/bytes.
88-
lengthUnits match {
89-
case LengthUnits.Bits | LengthUnits.Bytes =>
90-
unparseVarWidthCharactersInBits(state)
91-
case LengthUnits.Characters =>
92-
unparseVarWidthCharactersInCharacters(state)
93-
}
94-
}
95-
}
96-
}
97-
}
98-
99-
/**
100-
* Encoding is variable width (e.g., utf-8, but the
101-
* target length is expressed in bits.
102-
*
103-
* Truncation, in this case, requires determining how many
104-
* of the string's characters fit within the available target length
105-
* bits.
106-
*/
107-
def unparseVarWidthCharactersInBits(state: UState): Unit = {
108-
val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)
109-
110-
if (maybeTLBits.isDefined) {
111-
//
112-
// We know the target length. We can use it.
113-
//
114-
if (areTruncating) {
115-
val diSimple = state.currentInfosetNode.asSimple
116-
val v = diSimple.dataValue.getString
117-
val tl = maybeTLBits.get
118-
val cs = getCharset(state)
119-
val newV = state.truncateToBits(v, cs, tl)
120-
//
121-
// JIRA DFDL-1592
122-
//
123-
// BUG: should not modify the actual dataset value.
124-
// as fn:string-length of the value should always return the same
125-
// value which is the un-truncated length.
126-
//
127-
diSimple.overwriteDataValue(newV)
128-
}
129-
eUnparser.unparse1(state)
57+
lazy val dcs = getCharset(state)
58+
if (
59+
erd.impliedRepresentation == Representation.Text &&
60+
lengthUnits == LengthUnits.Characters &&
61+
dcs.maybeFixedWidth.isEmpty &&
62+
erd.isComplexType
63+
) {
64+
state.subsetError(
65+
"Variable width character encoding '%s', dfdl:lengthKind '%s' and dfdl:lengthUnits '%s' are not supported for complex types.",
66+
getCharset(state).name,
67+
lengthKind.toString,
68+
lengthUnits.toString
69+
)
13070
} else {
131-
// target length unknown
132-
// ignore constraining the output length. Just unparse it.
133-
//
134-
// This happens when we're unparsing, and this element depends on a prior element for
135-
// determining its length, but that prior element has dfdl:outputValueCalc that depends
136-
// on this element.
137-
// This breaks the chicken-egg cycle.
138-
//
13971
eUnparser.unparse1(state)
14072
}
14173
}
142-
143-
private def areTruncating = {
144-
if (erd.isSimpleType && (erd.optPrimType.get eq PrimType.String)) {
145-
Assert.invariant(erd.optTruncateSpecifiedLengthString.isDefined)
146-
erd.optTruncateSpecifiedLengthString.get
147-
} else
148-
false
149-
}
150-
151-
/**
152-
* Encoding is variable width (e.g., utf-8). The target
153-
* length is expressed in characters.
154-
*/
155-
def unparseVarWidthCharactersInCharacters(state: UState): Unit = {
156-
157-
//
158-
// variable-width encodings and lengthUnits characters, and lengthKind explicit
159-
// is not supported (currently) for complex types
160-
//
161-
state.schemaDefinitionUnless(
162-
erd.isSimpleType,
163-
"Variable width character encoding '%s', dfdl:lengthKind '%s' and dfdl:lengthUnits '%s' are not supported for complex types.",
164-
getCharset(state).name,
165-
lengthKind.toString,
166-
lengthUnits.toString
167-
)
168-
169-
Assert.invariant(erd.isSimpleType)
170-
Assert.invariant(this.maybeTargetLengthInCharactersEv.isDefined)
171-
val tlEv = this.maybeTargetLengthInCharactersEv.get
172-
val tlChars = this.getMaybeTL(state, tlEv)
173-
174-
if (tlChars.isDefined) {
175-
//
176-
// possibly truncate
177-
//
178-
if (areTruncating) {
179-
val diSimple = state.currentInfosetNode.asSimple
180-
val v = diSimple.dataValue.getString
181-
val tl = tlChars.get
182-
if (v.length > tl) {
183-
// string is too long, truncate to target length
184-
val newV = v.substring(0, tl.toInt)
185-
//
186-
// BUG: JIRA DFDL-1592 - should not be overwriting the value with
187-
// truncated value.
188-
//
189-
diSimple.overwriteDataValue(newV)
190-
}
191-
}
192-
eUnparser.unparse1(state, erd)
193-
} else {
194-
// target length unknown
195-
// ignore constraining the output length. Just unparse it.
196-
//
197-
// This happens when we're unparsing, and this element depends on a prior element for
198-
// determining its length, but that prior element has dfdl:outputValueCalc that depends
199-
// on this element.
200-
// This breaks the chicken-egg cycle.
201-
//
202-
eUnparser.unparse1(state, erd)
203-
}
204-
}
205-
206-
private def getMaybeTL(state: UState, TLEv: Evaluatable[MaybeJULong]): MaybeJULong = {
207-
val maybeTLBits =
208-
try {
209-
val tlRes = TLEv.evaluate(state)
210-
Assert.invariant(tlRes.isDefined) // otherwise we shouldn't be in this method at all
211-
tlRes
212-
} catch {
213-
case e: RetryableException => {
214-
//
215-
// TargetLength expression couldn't be evaluated.
216-
//
217-
MaybeJULong.Nope
218-
}
219-
}
220-
maybeTLBits
221-
}
222-
223-
/**
224-
* Regardless of the type (text or binary), the target length
225-
* will be provided in bits.
226-
*/
227-
def unparseBits(state: UState): Unit = {
228-
val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)
229-
230-
if (maybeTLBits.isDefined) {
231-
//
232-
// We know the target length. We can use it.
233-
//
234-
// val nBits = maybeTLBits.get
235-
// val dos = state.dataOutputStream
236-
237-
//
238-
// withBitLengthLimit is incorrect. It doesn't take into account
239-
// that after the unparse, we could be looking at a current state
240-
// with a distinct DOS
241-
//
242-
// val isLimitOk = dos.withBitLengthLimit(nBits) {
243-
// eUnparser.unparse1(state, erd)
244-
// }
245-
//
246-
// if (!isLimitOk) {
247-
// val availBits = if (dos.remainingBits.isDefined) dos.remainingBits.get.toString else "(unknown)"
248-
// UE(state, "Insufficient bits available. Required %s bits, but only %s were available.", nBits, availBits)
249-
// }
250-
251-
eUnparser.unparse1(state, erd)
252-
253-
// at this point the recursive parse of the children is finished
254-
255-
if (state.processorStatus ne Success) return
256-
257-
// We might not have used up all the bits. So some bits may need to
258-
// be skipped and filled in by fillbyte.
259-
//
260-
// In the DFDL data grammar the region being skipped is either the
261-
// RightFill region, or the ElementUnused region. Skipping this is handled
262-
// elsewhere, along with insertion of padding before/after a string.
263-
//
264-
265-
} else {
266-
//
267-
// we couldn't get the target length
268-
//
269-
// This happens when we're unparsing, and this element depends on a prior element for
270-
// determining its length via a length expression, but that prior element
271-
// has dfdl:outputValueCalc that depends on this element, typically by a
272-
// call to dfdl:valueLength of this, or of some structure that includes
273-
// this.
274-
//
275-
// This breaks the chicken-egg cycle, by just unparsing it without
276-
// constraint. That produces the value which (ignoring truncation)
277-
// can be unparsed to produce the dfdl:valueLength of this element.
278-
//
279-
// This does assume that the value will get truncated properly for the
280-
// case where we do truncation (type string, with
281-
// dfdl:truncateSpecifiedLengthString 'yes') by some other mechanism.
282-
//
283-
eUnparser.unparse1(state, erd)
284-
}
285-
}
28674
}
28775

28876
// TODO: implement the capture length unparsers as just using this trait?

daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala

-15
Original file line numberDiff line numberDiff line change
@@ -281,19 +281,4 @@ trait ImplementsThrowsOrSavesSDE extends ImplementsThrowsSDE with SavesErrorsAnd
281281
}
282282
}
283283
}
284-
285-
/**
286-
* Use for cases where it is an SDE because of something we've chosen
287-
* not to implement. Not merely short term (haven't coded it yet, but intending to),
288-
* more like things we've chosen to defer intentionally to some future release.
289-
*/
290-
def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = {
291-
if (!testThatWillThrowIfFalse) subsetError(msg, args: _*)
292-
}
293-
294-
def subsetError(msg: String, args: Any*) = {
295-
val msgTxt = msg.format(args: _*)
296-
SDE("Subset: " + msgTxt)
297-
}
298-
299284
}

0 commit comments

Comments
 (0)