From 05fffd960eb2ffb88e1dd0e954606e31c6c44da1 Mon Sep 17 00:00:00 2001 From: Adwait Kumar Singh Date: Tue, 11 Nov 2025 14:55:55 -0800 Subject: [PATCH] Add upper bound on container pre-allocation during deserialization --- .../smithy/java/cbor/CborDeserializer.java | 2 +- .../codegen/generators/ListGenerator.java | 2 +- .../java/codegen/generators/MapGenerator.java | 2 +- .../java/core/serde/ShapeDeserializer.java | 43 +++++++++++++++++++ 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/codecs/cbor-codec/src/main/java/software/amazon/smithy/java/cbor/CborDeserializer.java b/codecs/cbor-codec/src/main/java/software/amazon/smithy/java/cbor/CborDeserializer.java index b0fcbfe0d..7679fc069 100644 --- a/codecs/cbor-codec/src/main/java/software/amazon/smithy/java/cbor/CborDeserializer.java +++ b/codecs/cbor-codec/src/main/java/software/amazon/smithy/java/cbor/CborDeserializer.java @@ -445,7 +445,7 @@ public void readList(Schema schema, T state, ListMemberConsumer consumer) @Override public int containerSize() { - return -1; //parser.collectionSize(); FIXME : Renable once we have some bounds checking, otherwise a small payload can trigger a huge allocation + return parser.collectionSize(); } @Override diff --git a/codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/ListGenerator.java b/codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/ListGenerator.java index 61259f303..854fa427a 100644 --- a/codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/ListGenerator.java +++ b/codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/ListGenerator.java @@ -60,7 +60,7 @@ public void accept(${shape:B} values, ${shapeSerializer:T} serializer) { } static ${shape:T} deserialize${name:U}(${schema:T} schema, ${shapeDeserializer:T} deserializer) { - var size = deserializer.containerSize(); + var size = Math.min(deserializer.containerSize(), deserializer.containerPreAllocationLimit()); ${shape:T} result = size == -1 ? new ${collectionImpl:T}<>() : new ${collectionImpl:T}<>(size); deserializer.readList(schema, result, ${name:U}$$MemberDeserializer.INSTANCE); return result; diff --git a/codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/MapGenerator.java b/codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/MapGenerator.java index 02244c6fc..974d66b2f 100644 --- a/codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/MapGenerator.java +++ b/codegen/codegen-core/src/main/java/software/amazon/smithy/java/codegen/generators/MapGenerator.java @@ -82,7 +82,7 @@ public void accept(${value:B} values, ${shapeSerializer:T} serializer) { } static ${shape:T} deserialize${name:U}(${schema:T} schema, ${shapeDeserializer:T} deserializer) { - var size = deserializer.containerSize(); + var size = Math.min(deserializer.containerSize(), deserializer.containerPreAllocationLimit()); ${shape:T} result = size == -1 ? new ${collectionImpl:T}<>() : new ${collectionImpl:T}<>(size); deserializer.readStringMap(schema, result, ${name:U}$$ValueDeserializer.INSTANCE); return result; diff --git a/core/src/main/java/software/amazon/smithy/java/core/serde/ShapeDeserializer.java b/core/src/main/java/software/amazon/smithy/java/core/serde/ShapeDeserializer.java index c7e910dc6..8a854f8b7 100644 --- a/core/src/main/java/software/amazon/smithy/java/core/serde/ShapeDeserializer.java +++ b/core/src/main/java/software/amazon/smithy/java/core/serde/ShapeDeserializer.java @@ -20,6 +20,9 @@ */ public interface ShapeDeserializer extends AutoCloseable { + int CONTAINER_PRE_ALLOCATION_LIMIT = + Integer.parseInt(System.getProperty("smithy.java.serde.container-pre-allocation-limit", "10000")); + @Override default void close() {} @@ -155,6 +158,46 @@ default int containerSize() { return -1; } + /** + * Returns the maximum number of elements to pre-allocate when deserializing container types (lists and maps). + * + *

This method provides a safety limit to prevent memory exhaustion attacks when deserializing untrusted data. + * When a deserializer encounters a container (list or map) that reports its size via {@link #containerSize()}, + * the deserialization logic may attempt to pre-allocate space for all elements to improve performance. However, + * if the reported size is maliciously large (e.g., a serialized value claims to contain billions of elements), + * pre-allocating that much memory could cause an {@link OutOfMemoryError} or severe performance degradation. + * + *

This limit acts as a safeguard by capping the maximum pre-allocation size. If {@link #containerSize()} + * returns a value greater than this limit, deserialization implementations should fall back to dynamic allocation + * strategies (e.g., adding elements one at a time to a growable collection) rather than pre-allocating space + * for the full reported size. + * + * + *

Security Implications: + * Setting this limit too high may expose the application to denial-of-service attacks through memory exhaustion. + * Setting it too low may reduce deserialization performance for legitimate large datasets. The default value + * of 10,000 elements provides a reasonable balance for most use cases. + * + *

Configuration: + * The default implementation returns the value of {@link #CONTAINER_PRE_ALLOCATION_LIMIT}, which is controlled + * by the {@code smithy.java.serde.container-pre-allocation-limit} system property (default: 10000). Implementations + * may override this method to provide custom limits based on specific deserialization contexts or security requirements. + * + *

Relationship to {@link #containerSize()}: + * This method should be used in conjunction with {@link #containerSize()} when determining how much memory to + * pre-allocate. While {@code containerSize()} reports the claimed size from the serialized data, this method + * provides the trusted upper bound for pre-allocation decisions. + * + * @return the maximum number of elements to pre-allocate for container deserialization, must be non-negative. + * A value of 0 indicates that pre-allocation should never be performed, forcing all containers to use + * dynamic allocation strategies. + * @see #containerSize() + * @see #CONTAINER_PRE_ALLOCATION_LIMIT + */ + default int containerPreAllocationLimit() { + return CONTAINER_PRE_ALLOCATION_LIMIT; + } + /** * Attempt to read a map value. *