Skip to content

Commit 998b3ba

Browse files
authored
feat: add missing kernels to ndarray/base/unary-reduce-subarray
PR-URL: #6421 Reviewed-by: Athan Reines <[email protected]> Signed-off-by: Muhammad Haris <[email protected]>
1 parent a1e230f commit 998b3ba

32 files changed

+8381
-3
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,394 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
/* eslint-disable max-depth, max-len */
20+
21+
'use strict';
22+
23+
// MODULES //
24+
25+
var loopOrder = require( '@stdlib/ndarray/base/unary-loop-interchange-order' );
26+
var blockSize = require( '@stdlib/ndarray/base/unary-tiling-block-size' );
27+
var takeIndexed = require( '@stdlib/array/base/take-indexed' );
28+
var zeros = require( '@stdlib/array/base/zeros' );
29+
var incrementOffsets = require( './increment_offsets.js' );
30+
var setViewOffsets = require( './set_view_offsets.js' );
31+
var offsets = require( './offsets.js' );
32+
33+
34+
// MAIN //
35+
36+
/**
37+
* Performs a reduction over an input ndarray and assigns results to a provided output ndarray via loop blocking.
38+
*
39+
* @private
40+
* @param {Function} fcn - reduction function
41+
* @param {Array<Object>} arrays - ndarrays
42+
* @param {Array<Object>} views - initialized ndarray-like objects representing sub-array views
43+
* @param {IntegerArray} strides - loop dimension strides for the input ndarray
44+
* @param {Options} opts - function options
45+
* @returns {void}
46+
*
47+
* @example
48+
* var toAccessorArray = require( '@stdlib/array/base/to-accessor-array' );
49+
* var accessors = require( '@stdlib/array/base/accessors' );
50+
* var Float64Array = require( '@stdlib/array/float64' );
51+
* var filled = require( '@stdlib/array/base/filled' );
52+
* var ndarray2array = require( '@stdlib/ndarray/base/to-array' );
53+
* var base = require( '@stdlib/ndarray/base/every' );
54+
*
55+
* // Create data buffers:
56+
* var xbuf = toAccessorArray( new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 0.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 ] ) );
57+
* var ybuf = toAccessorArray( filled( false, 3 ) );
58+
*
59+
* // Define the array shapes:
60+
* var xsh = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 2 ];
61+
* var ysh = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 3 ];
62+
*
63+
* // Define the array strides:
64+
* var sx = [ 12, 12, 12, 12, 12, 12, 12, 12, 12, 4, 2, 1 ];
65+
* var sy = [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 1 ];
66+
*
67+
* // Define the index offsets:
68+
* var ox = 0;
69+
* var oy = 0;
70+
*
71+
* // Create an input ndarray-like object:
72+
* var x = {
73+
* 'dtype': 'float64',
74+
* 'data': xbuf,
75+
* 'shape': xsh,
76+
* 'strides': sx,
77+
* 'offset': ox,
78+
* 'order': 'row-major',
79+
* 'accessors': accessors( xbuf ).accessors
80+
* };
81+
*
82+
* // Create an output ndarray-like object:
83+
* var y = {
84+
* 'dtype': 'generic',
85+
* 'data': ybuf,
86+
* 'shape': ysh,
87+
* 'strides': sy,
88+
* 'offset': oy,
89+
* 'order': 'row-major',
90+
* 'accessors': accessors( ybuf ).accessors
91+
* };
92+
*
93+
* // Initialize ndarray-like objects representing sub-array views:
94+
* var views = [
95+
* {
96+
* 'dtype': x.dtype,
97+
* 'data': x.data,
98+
* 'shape': [ 2, 2 ],
99+
* 'strides': [ 2, 1 ],
100+
* 'offset': x.offset,
101+
* 'order': x.order
102+
* }
103+
* ];
104+
*
105+
* // Perform a reduction:
106+
* blockedunary10d( base, [ x, y ], views, [ 12, 12, 12, 12, 12, 12, 12, 12, 12, 4 ], {} );
107+
*
108+
* var arr = ndarray2array( y.data, y.shape, y.strides, y.offset, y.order );
109+
* // returns [ [ [ [ [ [ [ [ [ [ true, false, true ] ] ] ] ] ] ] ] ] ]
110+
*/
111+
function blockedunary10d( fcn, arrays, views, strides, opts ) { // eslint-disable-line max-statements, max-lines-per-function
112+
var bsize;
113+
var ybuf;
114+
var set;
115+
var dv0;
116+
var dv1;
117+
var dv2;
118+
var dv3;
119+
var dv4;
120+
var dv5;
121+
var dv6;
122+
var dv7;
123+
var dv8;
124+
var dv9;
125+
var ov1;
126+
var ov2;
127+
var ov3;
128+
var ov4;
129+
var ov5;
130+
var ov6;
131+
var ov7;
132+
var ov8;
133+
var ov9;
134+
var sh;
135+
var s0;
136+
var s1;
137+
var s2;
138+
var s3;
139+
var s4;
140+
var s5;
141+
var s6;
142+
var s7;
143+
var s8;
144+
var s9;
145+
var sv;
146+
var ov;
147+
var iv;
148+
var i0;
149+
var i1;
150+
var i2;
151+
var i3;
152+
var i4;
153+
var i5;
154+
var i6;
155+
var i7;
156+
var i8;
157+
var i9;
158+
var j0;
159+
var j1;
160+
var j2;
161+
var j3;
162+
var j4;
163+
var j5;
164+
var j6;
165+
var j7;
166+
var j8;
167+
var j9;
168+
var N;
169+
var x;
170+
var y;
171+
var o;
172+
var k;
173+
174+
// Note on variable naming convention: S#, dv#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop...
175+
176+
N = arrays.length;
177+
x = arrays[ 0 ];
178+
y = arrays[ 1 ];
179+
180+
// Resolve the loop interchange order:
181+
o = loopOrder( y.shape, strides, y.strides );
182+
sh = o.sh;
183+
sv = [ o.sx, o.sy ];
184+
for ( k = 2; k < N; k++ ) {
185+
sv.push( takeIndexed( arrays[k].strides, o.idx ) );
186+
}
187+
// Determine the block size:
188+
bsize = blockSize( x.dtype, y.dtype );
189+
190+
// Resolve a list of pointers to the first indexed elements in the respective ndarrays:
191+
ov = offsets( arrays );
192+
193+
// Cache a reference to the output ndarray buffer:
194+
ybuf = y.data;
195+
196+
// Cache accessors:
197+
set = y.accessors[ 1 ];
198+
199+
// Cache offset increments for the innermost loop...
200+
dv0 = [];
201+
for ( k = 0; k < N; k++ ) {
202+
dv0.push( sv[k][0] );
203+
}
204+
// Initialize loop variables...
205+
ov1 = zeros( N );
206+
ov2 = zeros( N );
207+
ov3 = zeros( N );
208+
ov4 = zeros( N );
209+
ov5 = zeros( N );
210+
ov6 = zeros( N );
211+
ov7 = zeros( N );
212+
ov8 = zeros( N );
213+
ov9 = zeros( N );
214+
dv1 = zeros( N );
215+
dv2 = zeros( N );
216+
dv3 = zeros( N );
217+
dv4 = zeros( N );
218+
dv5 = zeros( N );
219+
dv6 = zeros( N );
220+
dv7 = zeros( N );
221+
dv8 = zeros( N );
222+
dv9 = zeros( N );
223+
iv = zeros( N );
224+
225+
// Iterate over blocks...
226+
for ( j9 = sh[9]; j9 > 0; ) {
227+
if ( j9 < bsize ) {
228+
s9 = j9;
229+
j9 = 0;
230+
} else {
231+
s9 = bsize;
232+
j9 -= bsize;
233+
}
234+
for ( k = 0; k < N; k++ ) {
235+
ov9[ k ] = ov[k] + ( j9*sv[k][9] );
236+
}
237+
for ( j8 = sh[8]; j8 > 0; ) {
238+
if ( j8 < bsize ) {
239+
s8 = j8;
240+
j8 = 0;
241+
} else {
242+
s8 = bsize;
243+
j8 -= bsize;
244+
}
245+
for ( k = 0; k < N; k++ ) {
246+
dv9 = sv[k][9] - ( s8*sv[k][8] );
247+
ov8[ k ] = ov9[k] + ( j8*sv[k][8] );
248+
}
249+
for ( j7 = sh[7]; j7 > 0; ) {
250+
if ( j7 < bsize ) {
251+
s7 = j7;
252+
j7 = 0;
253+
} else {
254+
s7 = bsize;
255+
j7 -= bsize;
256+
}
257+
for ( k = 0; k < N; k++ ) {
258+
dv8 = sv[k][8] - ( s7*sv[k][7] );
259+
ov7[ k ] = ov8[k] + ( j7*sv[k][7] );
260+
}
261+
for ( j6 = sh[6]; j6 > 0; ) {
262+
if ( j6 < bsize ) {
263+
s6 = j6;
264+
j6 = 0;
265+
} else {
266+
s6 = bsize;
267+
j6 -= bsize;
268+
}
269+
for ( k = 0; k < N; k++ ) {
270+
dv7 = sv[k][7] - ( s6*sv[k][6] );
271+
ov6[ k ] = ov7[k] + ( j6*sv[k][6] );
272+
}
273+
for ( j5 = sh[5]; j5 > 0; ) {
274+
if ( j5 < bsize ) {
275+
s5 = j5;
276+
j5 = 0;
277+
} else {
278+
s5 = bsize;
279+
j5 -= bsize;
280+
}
281+
for ( k = 0; k < N; k++ ) {
282+
dv6 = sv[k][6] - ( s5*sv[k][5] );
283+
ov5[ k ] = ov6[k] + ( j5*sv[k][5] );
284+
}
285+
for ( j4 = sh[4]; j4 > 0; ) {
286+
if ( j4 < bsize ) {
287+
s4 = j4;
288+
j4 = 0;
289+
} else {
290+
s4 = bsize;
291+
j4 -= bsize;
292+
}
293+
for ( k = 0; k < N; k++ ) {
294+
dv5 = sv[k][5] - ( s4*sv[k][4] );
295+
ov4[ k ] = ov5[k] + ( j4*sv[k][4] );
296+
}
297+
for ( j3 = sh[3]; j3 > 0; ) {
298+
if ( j3 < bsize ) {
299+
s3 = j3;
300+
j3 = 0;
301+
} else {
302+
s3 = bsize;
303+
j3 -= bsize;
304+
}
305+
for ( k = 0; k < N; k++ ) {
306+
dv4[ k ] = sv[k][4] - ( s3*sv[k][3] );
307+
ov3[ k ] = ov4[k] + ( j3*sv[k][3] );
308+
}
309+
for ( j2 = sh[2]; j2 > 0; ) {
310+
if ( j2 < bsize ) {
311+
s2 = j2;
312+
j2 = 0;
313+
} else {
314+
s2 = bsize;
315+
j2 -= bsize;
316+
}
317+
for ( k = 0; k < N; k++ ) {
318+
dv3[ k ] = sv[k][3] - ( s2*sv[k][2] );
319+
ov2[ k ] = ov3[k] + ( j2*sv[k][2] );
320+
}
321+
for ( j1 = sh[1]; j1 > 0; ) {
322+
if ( j1 < bsize ) {
323+
s1 = j1;
324+
j1 = 0;
325+
} else {
326+
s1 = bsize;
327+
j1 -= bsize;
328+
}
329+
for ( k = 0; k < N; k++ ) {
330+
dv2[ k ] = sv[k][2] - ( s1*sv[k][1] );
331+
ov1[ k ] = ov2[k] + ( j1*sv[k][1] );
332+
}
333+
for ( j0 = sh[0]; j0 > 0; ) {
334+
if ( j0 < bsize ) {
335+
s0 = j0;
336+
j0 = 0;
337+
} else {
338+
s0 = bsize;
339+
j0 -= bsize;
340+
}
341+
// Compute index offsets and loop offset increments for the first ndarray elements in the current block...
342+
for ( k = 0; k < N; k++ ) {
343+
iv[ k ] = ov1[k] + ( j0*sv[k][0] );
344+
dv1[ k ] = sv[k][1] - ( s0*sv[k][0] );
345+
}
346+
// Iterate over the non-reduced ndarray dimensions...
347+
for ( i9 = 0; i9 < s9; i9++ ) {
348+
for ( i8 = 0; i8 < s8; i8++ ) {
349+
for ( i7 = 0; i7 < s7; i7++ ) {
350+
for ( i6 = 0; i6 < s6; i6++ ) {
351+
for ( i5 = 0; i5 < s5; i5++ ) {
352+
for ( i4 = 0; i4 < s4; i4++ ) {
353+
for ( i3 = 0; i3 < s3; i3++ ) {
354+
for ( i2 = 0; i2 < s2; i2++ ) {
355+
for ( i1 = 0; i1 < s1; i1++ ) {
356+
for ( i0 = 0; i0 < s0; i0++ ) {
357+
setViewOffsets( views, iv );
358+
set( ybuf, iv[ 1 ], fcn( views, opts ) );
359+
incrementOffsets( iv, dv0 );
360+
}
361+
incrementOffsets( iv, dv1 );
362+
}
363+
incrementOffsets( iv, dv2 );
364+
}
365+
incrementOffsets( iv, dv3 );
366+
}
367+
incrementOffsets( iv, dv4 );
368+
}
369+
incrementOffsets( iv, dv5 );
370+
}
371+
incrementOffsets( iv, dv6 );
372+
}
373+
incrementOffsets( iv, dv7 );
374+
}
375+
incrementOffsets( iv, dv8 );
376+
}
377+
incrementOffsets( iv, dv9 );
378+
}
379+
}
380+
}
381+
}
382+
}
383+
}
384+
}
385+
}
386+
}
387+
}
388+
}
389+
}
390+
391+
392+
// EXPORTS //
393+
394+
module.exports = blockedunary10d;

0 commit comments

Comments
 (0)