@@ -278,30 +278,6 @@ TEST_F(AOTITorchEmptyStridedTest, LargeTensor) {
278278 EXPECT_EQ (tensor->size (2 ), 50 );
279279}
280280
281- // Test error handling with memory allocation failures
282- TEST_F (AOTITorchEmptyStridedTest, MemoryAllocationStress) {
283- // Try to create a very large tensor that might cause allocation failure
284- // (This test may pass or fail depending on available memory)
285- std::vector<int64_t > huge_sizes = {10000 , 10000 , 100 }; // ~38GB for float32
286- Tensor* tensor;
287-
288- AOTITorchError error = aoti_torch_empty_strided (
289- huge_sizes.size (),
290- huge_sizes.data (),
291- nullptr ,
292- 6 , // float32
293- 1 , // CUDA device
294- 0 , // device index
295- &tensor);
296-
297- // Either succeed or fail with memory allocation error
298- if (error == Error::Ok) {
299- EXPECT_NE (tensor, nullptr );
300- } else {
301- EXPECT_EQ (error, Error::MemoryAllocationFailed);
302- }
303- }
304-
305281// Test aoti_torch_empty_strided with bfloat16 dtype
306282TEST_F (AOTITorchEmptyStridedTest, BFloat16Tensor) {
307283 // Test creating bfloat16 tensor on CUDA
@@ -509,11 +485,11 @@ TEST_F(AOTITorchEmptyStridedTest, ZeroElementTensor) {
509485 EXPECT_EQ (sizes_ptr[2 ], 3 );
510486}
511487
512- // Test different data types (only float32 is currently supported )
488+ // Test different data types (currently we support bf16, fp32 and int32 )
513489TEST_F (AOTITorchEmptyStridedTest, DifferentDataTypes) {
514490 std::vector<int64_t > sizes = {2 , 3 };
515491
516- // Test float32 (dtype 6) - currently the only supported type
492+ // Test float32 (dtype 6) - one of the supported types
517493 Tensor* tensor_float32;
518494 AOTITorchError error = aoti_torch_empty_strided (
519495 sizes.size (),
@@ -527,7 +503,7 @@ TEST_F(AOTITorchEmptyStridedTest, DifferentDataTypes) {
527503 EXPECT_EQ (error, Error::Ok);
528504 EXPECT_NE (tensor_float32, nullptr );
529505
530- // Test unsupported data types should return error
506+ // Test int32 (dtype 3) - one of the supported types
531507 Tensor* tensor_int32;
532508 error = aoti_torch_empty_strided (
533509 sizes.size (),
@@ -538,7 +514,8 @@ TEST_F(AOTITorchEmptyStridedTest, DifferentDataTypes) {
538514 0 , // device index
539515 &tensor_int32);
540516
541- EXPECT_EQ (error, Error::InvalidArgument); // Should fail for unsupported dtype
517+ EXPECT_EQ (error, Error::Ok);
518+ EXPECT_NE (tensor_int32, nullptr );
542519
543520 // Test another unsupported data type
544521 Tensor* tensor_float64;
@@ -586,3 +563,105 @@ TEST_F(AOTITorchEmptyStridedTest, MultiDimensionalTensors) {
586563 EXPECT_EQ (tensor_5d->size (3 ), 4 );
587564 EXPECT_EQ (tensor_5d->size (4 ), 5 );
588565}
566+
567+ // Test incontiguous tensor creation - transpose-like layout
568+ TEST_F (AOTITorchEmptyStridedTest, IncontiguousTransposeLayout) {
569+ // Create a tensor with transpose-like strides (column-major)
570+ // For a 3x4 tensor in column-major order, strides should be [1, 3]
571+ // This means each row step is 1, and each column step is 3
572+ std::vector<int64_t > sizes = {3 , 4 };
573+ std::vector<int64_t > strides = {1 , 3 }; // Column-major (incontiguous)
574+
575+ Tensor* tensor;
576+ AOTITorchError error = aoti_torch_empty_strided (
577+ sizes.size (),
578+ sizes.data (),
579+ strides.data (),
580+ static_cast <int32_t >(SupportedDTypes::FLOAT32),
581+ static_cast <int32_t >(SupportedDevices::CUDA),
582+ 0 , // device index
583+ &tensor);
584+
585+ EXPECT_EQ (error, Error::Ok);
586+ EXPECT_NE (tensor, nullptr );
587+
588+ // Verify tensor properties
589+ EXPECT_EQ (tensor->dim (), 2 );
590+ EXPECT_EQ (tensor->size (0 ), 3 );
591+ EXPECT_EQ (tensor->size (1 ), 4 );
592+
593+ // Verify the strides are what we specified
594+ int64_t * strides_ptr;
595+ EXPECT_EQ (aoti_torch_get_strides (tensor, &strides_ptr), Error::Ok);
596+ EXPECT_EQ (strides_ptr[0 ], 1 ); // Column-major stride for dimension 0
597+ EXPECT_EQ (strides_ptr[1 ], 3 ); // Column-major stride for dimension 1
598+
599+ // Verify that memory was allocated correctly for incontiguous layout
600+ // Storage size should be: stride[0] * (size[0] - 1) + stride[1] * (size[1] -
601+ // 1) + 1 = 1 * (3 - 1) + 3 * (4 - 1) + 1 = 1 * 2 + 3 * 3 + 1 = 2 + 9 + 1 = 12
602+ // elements Total bytes = 12 * 4 = 48 bytes (for float32)
603+ EXPECT_EQ (tensor->numel (), 12 ); // numel is still 3*4=12 for logical shape
604+
605+ // The tensor should be accessible and writable
606+ void * data_ptr = tensor->mutable_data_ptr ();
607+ EXPECT_NE (data_ptr, nullptr );
608+
609+ // Verify we can use CUDA to write to the memory
610+ std::vector<float > test_data (12 , 1 .0f );
611+ cudaError_t cuda_err = cudaMemcpy (
612+ data_ptr, test_data.data (), 12 * sizeof (float ), cudaMemcpyHostToDevice);
613+ EXPECT_EQ (cuda_err, cudaSuccess);
614+ }
615+
616+ // Test incontiguous tensor creation - expanded/broadcasted stride pattern
617+ TEST_F (AOTITorchEmptyStridedTest, IncontiguousExpandedStrides) {
618+ // Create a tensor with expanded strides (simulating broadcasting)
619+ // A 2x3x4 tensor where the first dimension has stride 0 (expanded)
620+ // This creates a tensor where the first dimension is "broadcasted"
621+ std::vector<int64_t > sizes = {2 , 3 , 4 };
622+ std::vector<int64_t > strides = {0 , 4 , 1 }; // First dimension has stride 0
623+
624+ Tensor* tensor;
625+ AOTITorchError error = aoti_torch_empty_strided (
626+ sizes.size (),
627+ sizes.data (),
628+ strides.data (),
629+ static_cast <int32_t >(SupportedDTypes::FLOAT32),
630+ static_cast <int32_t >(SupportedDevices::CUDA),
631+ 0 , // device index
632+ &tensor);
633+
634+ EXPECT_EQ (error, Error::Ok);
635+ EXPECT_NE (tensor, nullptr );
636+
637+ // Verify tensor properties
638+ EXPECT_EQ (tensor->dim (), 3 );
639+ EXPECT_EQ (tensor->size (0 ), 2 );
640+ EXPECT_EQ (tensor->size (1 ), 3 );
641+ EXPECT_EQ (tensor->size (2 ), 4 );
642+
643+ // Verify the strides are what we specified
644+ int64_t * strides_ptr;
645+ EXPECT_EQ (aoti_torch_get_strides (tensor, &strides_ptr), Error::Ok);
646+ EXPECT_EQ (strides_ptr[0 ], 0 ); // Expanded dimension stride
647+ EXPECT_EQ (strides_ptr[1 ], 4 );
648+ EXPECT_EQ (strides_ptr[2 ], 1 );
649+
650+ // Verify that memory was allocated correctly for this incontiguous layout
651+ // Storage size should be: stride[0] * (size[0] - 1) + stride[1] * (size[1] -
652+ // 1) + stride[2] * (size[2] - 1) + 1 = 0 * (2 - 1) + 4 * (3 - 1) + 1 * (4 -
653+ // 1) + 1 = 0 + 8 + 3 + 1 = 12 elements Note: numel() returns logical number
654+ // of elements (2*3*4=24), not storage size
655+ EXPECT_EQ (tensor->numel (), 24 ); // Logical numel is 2*3*4=24
656+
657+ // The tensor should be accessible and writable
658+ void * data_ptr = tensor->mutable_data_ptr ();
659+ EXPECT_NE (data_ptr, nullptr );
660+
661+ // Verify we can use CUDA to write to the allocated memory
662+ // We only need to allocate 12 elements (storage size), not 24
663+ std::vector<float > test_data (12 , 2 .0f );
664+ cudaError_t cuda_err = cudaMemcpy (
665+ data_ptr, test_data.data (), 12 * sizeof (float ), cudaMemcpyHostToDevice);
666+ EXPECT_EQ (cuda_err, cudaSuccess);
667+ }
0 commit comments