Skip to content

Commit 82231e1

Browse files
committed
Add upper bound on container pre-allocation during deserialization
1 parent 0bf06a3 commit 82231e1

File tree

4 files changed

+46
-3
lines changed

4 files changed

+46
-3
lines changed

codecs/cbor-codec/src/main/java/software/amazon/smithy/java/cbor/CborDeserializer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ public <T> void readList(Schema schema, T state, ListMemberConsumer<T> consumer)
445445

446446
@Override
447447
public int containerSize() {
448-
return -1; //parser.collectionSize(); FIXME : Renable once we have some bounds checking, otherwise a small payload can trigger a huge allocation
448+
return parser.collectionSize();
449449
}
450450

451451
@Override

codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/ListGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public void accept(${shape:B} values, ${shapeSerializer:T} serializer) {
6060
}
6161
6262
static ${shape:T} deserialize${name:U}(${schema:T} schema, ${shapeDeserializer:T} deserializer) {
63-
var size = deserializer.containerSize();
63+
var size = Math.min(deserializer.containerSize(), deserializer.containerPreAllocationLimit());
6464
${shape:T} result = size == -1 ? new ${collectionImpl:T}<>() : new ${collectionImpl:T}<>(size);
6565
deserializer.readList(schema, result, ${name:U}$$MemberDeserializer.INSTANCE);
6666
return result;

codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/MapGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public void accept(${value:B} values, ${shapeSerializer:T} serializer) {
8282
}
8383
8484
static ${shape:T} deserialize${name:U}(${schema:T} schema, ${shapeDeserializer:T} deserializer) {
85-
var size = deserializer.containerSize();
85+
var size = Math.min(deserializer.containerSize(), deserializer.containerPreAllocationLimit());
8686
${shape:T} result = size == -1 ? new ${collectionImpl:T}<>() : new ${collectionImpl:T}<>(size);
8787
deserializer.readStringMap(schema, result, ${name:U}$$ValueDeserializer.INSTANCE);
8888
return result;

core/src/main/java/software/amazon/smithy/java/core/serde/ShapeDeserializer.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
*/
2121
public interface ShapeDeserializer extends AutoCloseable {
2222

23+
int CONTAINER_PRE_ALLOCATION_LIMIT =
24+
Integer.parseInt(System.getProperty("smithy.java.serde.container-pre-allocation-limit", "10000"));
25+
2326
@Override
2427
default void close() {}
2528

@@ -155,6 +158,46 @@ default int containerSize() {
155158
return -1;
156159
}
157160

161+
/**
162+
* Returns the maximum number of elements to pre-allocate when deserializing container types (lists and maps).
163+
*
164+
* <p>This method provides a safety limit to prevent memory exhaustion attacks when deserializing untrusted data.
165+
* When a deserializer encounters a container (list or map) that reports its size via {@link #containerSize()},
166+
* the deserialization logic may attempt to pre-allocate space for all elements to improve performance. However,
167+
* if the reported size is maliciously large (e.g., a serialized value claims to contain billions of elements),
168+
* pre-allocating that much memory could cause an {@link OutOfMemoryError} or severe performance degradation.
169+
*
170+
* <p>This limit acts as a safeguard by capping the maximum pre-allocation size. If {@link #containerSize()}
171+
* returns a value greater than this limit, deserialization implementations should fall back to dynamic allocation
172+
* strategies (e.g., adding elements one at a time to a growable collection) rather than pre-allocating space
173+
* for the full reported size.
174+
*
175+
*
176+
* <p><b>Security Implications:</b>
177+
* Setting this limit too high may expose the application to denial-of-service attacks through memory exhaustion.
178+
* Setting it too low may reduce deserialization performance for legitimate large datasets. The default value
179+
* of 50,000 elements provides a reasonable balance for most use cases.
180+
*
181+
* <p><b>Configuration:</b>
182+
* The default implementation returns the value of {@link #CONTAINER_PRE_ALLOCATION_LIMIT}, which is controlled
183+
* by the {@code smithy.java.serde.container-pre-allocation-limit} system property (default: 10000). Implementations
184+
* may override this method to provide custom limits based on specific deserialization contexts or security requirements.
185+
*
186+
* <p><b>Relationship to {@link #containerSize()}:</b>
187+
* This method should be used in conjunction with {@link #containerSize()} when determining how much memory to
188+
* pre-allocate. While {@code containerSize()} reports the claimed size from the serialized data, this method
189+
* provides the trusted upper bound for pre-allocation decisions.
190+
*
191+
* @return the maximum number of elements to pre-allocate for container deserialization, must be non-negative.
192+
* A value of 0 indicates that pre-allocation should never be performed, forcing all containers to use
193+
* dynamic allocation strategies.
194+
* @see #containerSize()
195+
* @see #CONTAINER_PRE_ALLOCATION_LIMIT
196+
*/
197+
default int containerPreAllocationLimit() {
198+
return CONTAINER_PRE_ALLOCATION_LIMIT;
199+
}
200+
158201
/**
159202
* Attempt to read a map value.
160203
*

0 commit comments

Comments
 (0)