@@ -29,114 +29,114 @@ class Registrar {
2929 Registrar () { CopyCaseFactory::Factory ().Register (std::make_shared<T>()); }
3030};
3131
32- #define DEFINE_COPY_CASE (ClassName, Key, Brief ) \
33- class ClassName : public CopyCase { \
34- public: \
35- ClassName () : CopyCase(Key, Brief) {} \
36- void Run (size_t size, size_t num, size_t iter, size_t nDevice) const override ; \
37- }; \
38- static Registrar<ClassName> global_##ClassName##_registrar; \
39- void ClassName::Run (size_t size, size_t num, size_t iter, size_t nDevice ) const
32+ #define DEFINE_COPY_CASE (ClassName, Key, Brief, Ctx ) \
33+ class ClassName : public CopyCase { \
34+ public: \
35+ ClassName () : CopyCase(Key, Brief) {} \
36+ void Run (const Context&) const override ; \
37+ }; \
38+ static Registrar<ClassName> global_##ClassName##_registrar; \
39+ void ClassName::Run (const Context& Ctx ) const
4040
4141DEFINE_COPY_CASE(Host2DeviceCECase, " host_to_device_ce" ,
42- " memcpy from host to device with ce one by one" )
42+ " memcpy from host to device with ce one by one" , ctx )
4343{
4444 CudaMemcpyHost2DeviceCopyInitiator initiator;
45- CopyInstance instance{&initiator, iter, false };
45+ CopyInstance instance{&initiator, ctx. iter , false };
4646 CopyResult result;
47- for (size_t device = 0 ; device < nDevice; device++) {
48- CudaHostCopyBuffer srcBuffer{device, size, num};
49- CudaDeviceCopyBuffer dstBuffer{device, size, num};
47+ for (size_t device = 0 ; device < ctx. nDevice ; device++) {
48+ CudaHostCopyBuffer srcBuffer{device, ctx. size , ctx. num };
49+ CudaDeviceCopyBuffer dstBuffer{device, ctx. size , ctx. num };
5050 result.Push (instance.DoCopy (&srcBuffer, &dstBuffer));
5151 }
5252 result.Show (" [[ " + Key () + " ]] " + Brief ());
5353}
5454
5555DEFINE_COPY_CASE (Host2DeviceSMCase, " host_to_device_sm" ,
56- " memcpy from host to device with sm one by one" )
56+ " memcpy from host to device with sm one by one" , ctx )
5757{
5858 CopyResult result;
59- for (size_t device = 0 ; device < nDevice; device++) {
60- CudaHostCopyBuffer srcBuffer{device, size, num};
61- CudaDeviceCopyBuffer dstBuffer{device, size, num};
62- CudaSMBatchCopyInitiator initiator (device, num);
63- CopyInstance instance{&initiator, iter, false };
59+ for (size_t device = 0 ; device < ctx. nDevice ; device++) {
60+ CudaHostCopyBuffer srcBuffer{device, ctx. size , ctx. num };
61+ CudaDeviceCopyBuffer dstBuffer{device, ctx. size , ctx. num };
62+ CudaSMBatchCopyInitiator initiator (device, ctx. num );
63+ CopyInstance instance{&initiator, ctx. iter , false };
6464 result.Push (instance.DoCopy (&srcBuffer, &dstBuffer));
6565 }
6666 result.Show (" [[ " + Key () + " ]] " + Brief ());
6767}
6868
6969DEFINE_COPY_CASE (OneHost2AllDeviceCECase, " one_host_to_all_device_ce" ,
70- " memcpy from one host to all device with ce" )
70+ " memcpy from one host to all device with ce" , ctx )
7171{
7272 CudaMemcpyHost2DeviceCopyInitiator initiator;
73- CopyInstance instance{&initiator, iter, false };
73+ CopyInstance instance{&initiator, ctx. iter , false };
7474 CopyResult result;
75- CudaHostCopyBuffer srcBuffer{0 , size, num};
76- for (size_t device = 0 ; device < nDevice; device++) {
77- CudaDeviceCopyBuffer dstBuffer{device, size, num};
75+ CudaHostCopyBuffer srcBuffer{0 , ctx. size , ctx. num };
76+ for (size_t device = 0 ; device < ctx. nDevice ; device++) {
77+ CudaDeviceCopyBuffer dstBuffer{device, ctx. size , ctx. num };
7878 result.Push (instance.DoCopy (&srcBuffer, &dstBuffer));
7979 }
8080 result.Show (" [[ " + Key () + " ]] " + Brief ());
8181}
8282
8383DEFINE_COPY_CASE (OneHost2AllDeviceSMCase, " one_host_to_all_device_sm" ,
84- " memcpy from one host to all device with sm" )
84+ " memcpy from one host to all device with sm" , ctx )
8585{
8686 CopyResult result;
87- CudaHostCopyBuffer srcBuffer{0 , size, num};
88- for (size_t device = 0 ; device < nDevice; device++) {
89- CudaDeviceCopyBuffer dstBuffer{device, size, num};
90- CudaSMBatchCopyInitiator initiator{device, num};
91- CopyInstance instance{&initiator, iter, false };
87+ CudaHostCopyBuffer srcBuffer{0 , ctx. size , ctx. num };
88+ for (size_t device = 0 ; device < ctx. nDevice ; device++) {
89+ CudaDeviceCopyBuffer dstBuffer{device, ctx. size , ctx. num };
90+ CudaSMBatchCopyInitiator initiator{device, ctx. num };
91+ CopyInstance instance{&initiator, ctx. iter , false };
9292 result.Push (instance.DoCopy (&srcBuffer, &dstBuffer));
9393 }
9494 result.Show (" [[ " + Key () + " ]] " + Brief ());
9595}
9696
9797DEFINE_COPY_CASE (AllHost2AllDeviceCECase, " all_host_to_all_device_ce" ,
98- " memcpy from all host to all device with ce at one time" )
98+ " memcpy from all host to all device with ce at one time" , ctx )
9999{
100100 CudaMemcpyHost2DeviceCopyInitiator initiator;
101- CopyInstance instance{&initiator, iter, false };
102- std::vector<const CopyBuffer*> srcBuffers (nDevice);
103- std::vector<const CopyBuffer*> dstBuffers (nDevice);
104- for (size_t device = 0 ; device < nDevice; device++) {
105- srcBuffers[device] = new CudaHostCopyBuffer{device, size, num};
106- dstBuffers[device] = new CudaDeviceCopyBuffer{device, size, num};
101+ CopyInstance instance{&initiator, ctx. iter , false };
102+ std::vector<const CopyBuffer*> srcBuffers (ctx. nDevice );
103+ std::vector<const CopyBuffer*> dstBuffers (ctx. nDevice );
104+ for (size_t device = 0 ; device < ctx. nDevice ; device++) {
105+ srcBuffers[device] = new CudaHostCopyBuffer{device, ctx. size , ctx. num };
106+ dstBuffers[device] = new CudaDeviceCopyBuffer{device, ctx. size , ctx. num };
107107 }
108108 CopyResult result;
109109 result.Push (instance.DoCopy (srcBuffers, dstBuffers));
110- for (size_t device = 0 ; device < nDevice; device++) {
110+ for (size_t device = 0 ; device < ctx. nDevice ; device++) {
111111 delete srcBuffers[device];
112112 delete dstBuffers[device];
113113 }
114114 result.Show (" [[ " + Key () + " ]] " + Brief ());
115115}
116116
117117DEFINE_COPY_CASE (Device2DeviceCECase, " device_to_device_ce" ,
118- " memcpy from device to device with ce one by one" )
118+ " memcpy from device to device with ce one by one" , ctx )
119119{
120120 CudaMemcpyDevice2DeviceCopyInitiator initiator;
121- CopyInstance instance{&initiator, iter, false };
121+ CopyInstance instance{&initiator, ctx. iter , false };
122122 CopyResult result;
123- for (size_t device = 0 ; device < nDevice; device++) {
124- CudaDeviceCopyBuffer srcBuffer{device, size, num};
125- CudaDeviceCopyBuffer dstBuffer{device, size, num};
123+ for (size_t device = 0 ; device < ctx. nDevice ; device++) {
124+ CudaDeviceCopyBuffer srcBuffer{device, ctx. size , ctx. num };
125+ CudaDeviceCopyBuffer dstBuffer{device, ctx. size , ctx. num };
126126 result.Push (instance.DoCopy (&srcBuffer, &dstBuffer));
127127 }
128128 result.Show (" [[ " + Key () + " ]] " + Brief ());
129129}
130130
131131DEFINE_COPY_CASE (OneDevice2AllDeviceCECase, " one_device_to_all_device_ce" ,
132- " memcpy from one device to all device with ce" )
132+ " memcpy from one device to all device with ce" , ctx )
133133{
134134 CudaMemcpyDevice2DeviceCopyInitiator initiator;
135- CopyInstance instance{&initiator, iter, false };
135+ CopyInstance instance{&initiator, ctx. iter , false };
136136 CopyResult result;
137- CudaDeviceCopyBuffer srcBuffer{0 , size, num};
138- for (size_t device = 0 ; device < nDevice; device++) {
139- CudaDeviceCopyBuffer dstBuffer{device, size, num};
137+ CudaDeviceCopyBuffer srcBuffer{0 , ctx. size , ctx. num };
138+ for (size_t device = 0 ; device < ctx. nDevice ; device++) {
139+ CudaDeviceCopyBuffer dstBuffer{device, ctx. size , ctx. num };
140140 result.Push (instance.DoCopy (&srcBuffer, &dstBuffer));
141141 }
142142 result.Show (" [[ " + Key () + " ]] " + Brief ());
0 commit comments