|
| 1 | +/** |
| 2 | +* @license Apache-2.0 |
| 3 | +* |
| 4 | +* Copyright (c) 2025 The Stdlib Authors. |
| 5 | +* |
| 6 | +* Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | +* you may not use this file except in compliance with the License. |
| 8 | +* You may obtain a copy of the License at |
| 9 | +* |
| 10 | +* http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +* |
| 12 | +* Unless required by applicable law or agreed to in writing, software |
| 13 | +* distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +* See the License for the specific language governing permissions and |
| 16 | +* limitations under the License. |
| 17 | +*/ |
| 18 | + |
| 19 | +/* eslint-disable max-depth, max-len */ |
| 20 | + |
| 21 | +'use strict'; |
| 22 | + |
| 23 | +// MODULES // |
| 24 | + |
| 25 | +var loopOrder = require( '@stdlib/ndarray/base/unary-loop-interchange-order' ); |
| 26 | +var blockSize = require( '@stdlib/ndarray/base/unary-tiling-block-size' ); |
| 27 | +var takeIndexed = require( '@stdlib/array/base/take-indexed' ); |
| 28 | +var zeros = require( '@stdlib/array/base/zeros' ); |
| 29 | +var incrementOffsets = require( './increment_offsets.js' ); |
| 30 | +var setViewOffsets = require( './set_view_offsets.js' ); |
| 31 | +var offsets = require( './offsets.js' ); |
| 32 | + |
| 33 | + |
| 34 | +// MAIN // |
| 35 | + |
| 36 | +/** |
| 37 | +* Performs a reduction over an input ndarray and assigns results to a provided output ndarray via loop blocking. |
| 38 | +* |
| 39 | +* @private |
| 40 | +* @param {Function} fcn - reduction function |
| 41 | +* @param {Array<Object>} arrays - ndarrays |
| 42 | +* @param {Array<Object>} views - initialized ndarray-like objects representing sub-array views |
| 43 | +* @param {IntegerArray} strides - loop dimension strides for the input ndarray |
| 44 | +* @param {Options} opts - function options |
| 45 | +* @returns {void} |
| 46 | +* |
| 47 | +* @example |
| 48 | +* var toAccessorArray = require( '@stdlib/array/base/to-accessor-array' ); |
| 49 | +* var accessors = require( '@stdlib/array/base/accessors' ); |
| 50 | +* var Float64Array = require( '@stdlib/array/float64' ); |
| 51 | +* var filled = require( '@stdlib/array/base/filled' ); |
| 52 | +* var ndarray2array = require( '@stdlib/ndarray/base/to-array' ); |
| 53 | +* var base = require( '@stdlib/ndarray/base/every' ); |
| 54 | +* |
| 55 | +* // Create data buffers: |
| 56 | +* var xbuf = toAccessorArray( new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 0.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 ] ) ); |
| 57 | +* var ybuf = toAccessorArray( filled( false, 3 ) ); |
| 58 | +* |
| 59 | +* // Define the array shapes: |
| 60 | +* var xsh = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 2 ]; |
| 61 | +* var ysh = [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 3 ]; |
| 62 | +* |
| 63 | +* // Define the array strides: |
| 64 | +* var sx = [ 12, 12, 12, 12, 12, 12, 12, 12, 12, 4, 2, 1 ]; |
| 65 | +* var sy = [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 1 ]; |
| 66 | +* |
| 67 | +* // Define the index offsets: |
| 68 | +* var ox = 0; |
| 69 | +* var oy = 0; |
| 70 | +* |
| 71 | +* // Create an input ndarray-like object: |
| 72 | +* var x = { |
| 73 | +* 'dtype': 'float64', |
| 74 | +* 'data': xbuf, |
| 75 | +* 'shape': xsh, |
| 76 | +* 'strides': sx, |
| 77 | +* 'offset': ox, |
| 78 | +* 'order': 'row-major', |
| 79 | +* 'accessors': accessors( xbuf ).accessors |
| 80 | +* }; |
| 81 | +* |
| 82 | +* // Create an output ndarray-like object: |
| 83 | +* var y = { |
| 84 | +* 'dtype': 'generic', |
| 85 | +* 'data': ybuf, |
| 86 | +* 'shape': ysh, |
| 87 | +* 'strides': sy, |
| 88 | +* 'offset': oy, |
| 89 | +* 'order': 'row-major', |
| 90 | +* 'accessors': accessors( ybuf ).accessors |
| 91 | +* }; |
| 92 | +* |
| 93 | +* // Initialize ndarray-like objects representing sub-array views: |
| 94 | +* var views = [ |
| 95 | +* { |
| 96 | +* 'dtype': x.dtype, |
| 97 | +* 'data': x.data, |
| 98 | +* 'shape': [ 2, 2 ], |
| 99 | +* 'strides': [ 2, 1 ], |
| 100 | +* 'offset': x.offset, |
| 101 | +* 'order': x.order |
| 102 | +* } |
| 103 | +* ]; |
| 104 | +* |
| 105 | +* // Perform a reduction: |
| 106 | +* blockedunary10d( base, [ x, y ], views, [ 12, 12, 12, 12, 12, 12, 12, 12, 12, 4 ], {} ); |
| 107 | +* |
| 108 | +* var arr = ndarray2array( y.data, y.shape, y.strides, y.offset, y.order ); |
| 109 | +* // returns [ [ [ [ [ [ [ [ [ [ true, false, true ] ] ] ] ] ] ] ] ] ] |
| 110 | +*/ |
| 111 | +function blockedunary10d( fcn, arrays, views, strides, opts ) { // eslint-disable-line max-statements, max-lines-per-function |
| 112 | + var bsize; |
| 113 | + var ybuf; |
| 114 | + var set; |
| 115 | + var dv0; |
| 116 | + var dv1; |
| 117 | + var dv2; |
| 118 | + var dv3; |
| 119 | + var dv4; |
| 120 | + var dv5; |
| 121 | + var dv6; |
| 122 | + var dv7; |
| 123 | + var dv8; |
| 124 | + var dv9; |
| 125 | + var ov1; |
| 126 | + var ov2; |
| 127 | + var ov3; |
| 128 | + var ov4; |
| 129 | + var ov5; |
| 130 | + var ov6; |
| 131 | + var ov7; |
| 132 | + var ov8; |
| 133 | + var ov9; |
| 134 | + var sh; |
| 135 | + var s0; |
| 136 | + var s1; |
| 137 | + var s2; |
| 138 | + var s3; |
| 139 | + var s4; |
| 140 | + var s5; |
| 141 | + var s6; |
| 142 | + var s7; |
| 143 | + var s8; |
| 144 | + var s9; |
| 145 | + var sv; |
| 146 | + var ov; |
| 147 | + var iv; |
| 148 | + var i0; |
| 149 | + var i1; |
| 150 | + var i2; |
| 151 | + var i3; |
| 152 | + var i4; |
| 153 | + var i5; |
| 154 | + var i6; |
| 155 | + var i7; |
| 156 | + var i8; |
| 157 | + var i9; |
| 158 | + var j0; |
| 159 | + var j1; |
| 160 | + var j2; |
| 161 | + var j3; |
| 162 | + var j4; |
| 163 | + var j5; |
| 164 | + var j6; |
| 165 | + var j7; |
| 166 | + var j8; |
| 167 | + var j9; |
| 168 | + var N; |
| 169 | + var x; |
| 170 | + var y; |
| 171 | + var o; |
| 172 | + var k; |
| 173 | + |
| 174 | + // Note on variable naming convention: S#, dv#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop... |
| 175 | + |
| 176 | + N = arrays.length; |
| 177 | + x = arrays[ 0 ]; |
| 178 | + y = arrays[ 1 ]; |
| 179 | + |
| 180 | + // Resolve the loop interchange order: |
| 181 | + o = loopOrder( y.shape, strides, y.strides ); |
| 182 | + sh = o.sh; |
| 183 | + sv = [ o.sx, o.sy ]; |
| 184 | + for ( k = 2; k < N; k++ ) { |
| 185 | + sv.push( takeIndexed( arrays[k].strides, o.idx ) ); |
| 186 | + } |
| 187 | + // Determine the block size: |
| 188 | + bsize = blockSize( x.dtype, y.dtype ); |
| 189 | + |
| 190 | + // Resolve a list of pointers to the first indexed elements in the respective ndarrays: |
| 191 | + ov = offsets( arrays ); |
| 192 | + |
| 193 | + // Cache a reference to the output ndarray buffer: |
| 194 | + ybuf = y.data; |
| 195 | + |
| 196 | + // Cache accessors: |
| 197 | + set = y.accessors[ 1 ]; |
| 198 | + |
| 199 | + // Cache offset increments for the innermost loop... |
| 200 | + dv0 = []; |
| 201 | + for ( k = 0; k < N; k++ ) { |
| 202 | + dv0.push( sv[k][0] ); |
| 203 | + } |
| 204 | + // Initialize loop variables... |
| 205 | + ov1 = zeros( N ); |
| 206 | + ov2 = zeros( N ); |
| 207 | + ov3 = zeros( N ); |
| 208 | + ov4 = zeros( N ); |
| 209 | + ov5 = zeros( N ); |
| 210 | + ov6 = zeros( N ); |
| 211 | + ov7 = zeros( N ); |
| 212 | + ov8 = zeros( N ); |
| 213 | + ov9 = zeros( N ); |
| 214 | + dv1 = zeros( N ); |
| 215 | + dv2 = zeros( N ); |
| 216 | + dv3 = zeros( N ); |
| 217 | + dv4 = zeros( N ); |
| 218 | + dv5 = zeros( N ); |
| 219 | + dv6 = zeros( N ); |
| 220 | + dv7 = zeros( N ); |
| 221 | + dv8 = zeros( N ); |
| 222 | + dv9 = zeros( N ); |
| 223 | + iv = zeros( N ); |
| 224 | + |
| 225 | + // Iterate over blocks... |
| 226 | + for ( j9 = sh[9]; j9 > 0; ) { |
| 227 | + if ( j9 < bsize ) { |
| 228 | + s9 = j9; |
| 229 | + j9 = 0; |
| 230 | + } else { |
| 231 | + s9 = bsize; |
| 232 | + j9 -= bsize; |
| 233 | + } |
| 234 | + for ( k = 0; k < N; k++ ) { |
| 235 | + ov9[ k ] = ov[k] + ( j9*sv[k][9] ); |
| 236 | + } |
| 237 | + for ( j8 = sh[8]; j8 > 0; ) { |
| 238 | + if ( j8 < bsize ) { |
| 239 | + s8 = j8; |
| 240 | + j8 = 0; |
| 241 | + } else { |
| 242 | + s8 = bsize; |
| 243 | + j8 -= bsize; |
| 244 | + } |
| 245 | + for ( k = 0; k < N; k++ ) { |
| 246 | + dv9 = sv[k][9] - ( s8*sv[k][8] ); |
| 247 | + ov8[ k ] = ov9[k] + ( j8*sv[k][8] ); |
| 248 | + } |
| 249 | + for ( j7 = sh[7]; j7 > 0; ) { |
| 250 | + if ( j7 < bsize ) { |
| 251 | + s7 = j7; |
| 252 | + j7 = 0; |
| 253 | + } else { |
| 254 | + s7 = bsize; |
| 255 | + j7 -= bsize; |
| 256 | + } |
| 257 | + for ( k = 0; k < N; k++ ) { |
| 258 | + dv8 = sv[k][8] - ( s7*sv[k][7] ); |
| 259 | + ov7[ k ] = ov8[k] + ( j7*sv[k][7] ); |
| 260 | + } |
| 261 | + for ( j6 = sh[6]; j6 > 0; ) { |
| 262 | + if ( j6 < bsize ) { |
| 263 | + s6 = j6; |
| 264 | + j6 = 0; |
| 265 | + } else { |
| 266 | + s6 = bsize; |
| 267 | + j6 -= bsize; |
| 268 | + } |
| 269 | + for ( k = 0; k < N; k++ ) { |
| 270 | + dv7 = sv[k][7] - ( s6*sv[k][6] ); |
| 271 | + ov6[ k ] = ov7[k] + ( j6*sv[k][6] ); |
| 272 | + } |
| 273 | + for ( j5 = sh[5]; j5 > 0; ) { |
| 274 | + if ( j5 < bsize ) { |
| 275 | + s5 = j5; |
| 276 | + j5 = 0; |
| 277 | + } else { |
| 278 | + s5 = bsize; |
| 279 | + j5 -= bsize; |
| 280 | + } |
| 281 | + for ( k = 0; k < N; k++ ) { |
| 282 | + dv6 = sv[k][6] - ( s5*sv[k][5] ); |
| 283 | + ov5[ k ] = ov6[k] + ( j5*sv[k][5] ); |
| 284 | + } |
| 285 | + for ( j4 = sh[4]; j4 > 0; ) { |
| 286 | + if ( j4 < bsize ) { |
| 287 | + s4 = j4; |
| 288 | + j4 = 0; |
| 289 | + } else { |
| 290 | + s4 = bsize; |
| 291 | + j4 -= bsize; |
| 292 | + } |
| 293 | + for ( k = 0; k < N; k++ ) { |
| 294 | + dv5 = sv[k][5] - ( s4*sv[k][4] ); |
| 295 | + ov4[ k ] = ov5[k] + ( j4*sv[k][4] ); |
| 296 | + } |
| 297 | + for ( j3 = sh[3]; j3 > 0; ) { |
| 298 | + if ( j3 < bsize ) { |
| 299 | + s3 = j3; |
| 300 | + j3 = 0; |
| 301 | + } else { |
| 302 | + s3 = bsize; |
| 303 | + j3 -= bsize; |
| 304 | + } |
| 305 | + for ( k = 0; k < N; k++ ) { |
| 306 | + dv4[ k ] = sv[k][4] - ( s3*sv[k][3] ); |
| 307 | + ov3[ k ] = ov4[k] + ( j3*sv[k][3] ); |
| 308 | + } |
| 309 | + for ( j2 = sh[2]; j2 > 0; ) { |
| 310 | + if ( j2 < bsize ) { |
| 311 | + s2 = j2; |
| 312 | + j2 = 0; |
| 313 | + } else { |
| 314 | + s2 = bsize; |
| 315 | + j2 -= bsize; |
| 316 | + } |
| 317 | + for ( k = 0; k < N; k++ ) { |
| 318 | + dv3[ k ] = sv[k][3] - ( s2*sv[k][2] ); |
| 319 | + ov2[ k ] = ov3[k] + ( j2*sv[k][2] ); |
| 320 | + } |
| 321 | + for ( j1 = sh[1]; j1 > 0; ) { |
| 322 | + if ( j1 < bsize ) { |
| 323 | + s1 = j1; |
| 324 | + j1 = 0; |
| 325 | + } else { |
| 326 | + s1 = bsize; |
| 327 | + j1 -= bsize; |
| 328 | + } |
| 329 | + for ( k = 0; k < N; k++ ) { |
| 330 | + dv2[ k ] = sv[k][2] - ( s1*sv[k][1] ); |
| 331 | + ov1[ k ] = ov2[k] + ( j1*sv[k][1] ); |
| 332 | + } |
| 333 | + for ( j0 = sh[0]; j0 > 0; ) { |
| 334 | + if ( j0 < bsize ) { |
| 335 | + s0 = j0; |
| 336 | + j0 = 0; |
| 337 | + } else { |
| 338 | + s0 = bsize; |
| 339 | + j0 -= bsize; |
| 340 | + } |
| 341 | + // Compute index offsets and loop offset increments for the first ndarray elements in the current block... |
| 342 | + for ( k = 0; k < N; k++ ) { |
| 343 | + iv[ k ] = ov1[k] + ( j0*sv[k][0] ); |
| 344 | + dv1[ k ] = sv[k][1] - ( s0*sv[k][0] ); |
| 345 | + } |
| 346 | + // Iterate over the non-reduced ndarray dimensions... |
| 347 | + for ( i9 = 0; i9 < s9; i9++ ) { |
| 348 | + for ( i8 = 0; i8 < s8; i8++ ) { |
| 349 | + for ( i7 = 0; i7 < s7; i7++ ) { |
| 350 | + for ( i6 = 0; i6 < s6; i6++ ) { |
| 351 | + for ( i5 = 0; i5 < s5; i5++ ) { |
| 352 | + for ( i4 = 0; i4 < s4; i4++ ) { |
| 353 | + for ( i3 = 0; i3 < s3; i3++ ) { |
| 354 | + for ( i2 = 0; i2 < s2; i2++ ) { |
| 355 | + for ( i1 = 0; i1 < s1; i1++ ) { |
| 356 | + for ( i0 = 0; i0 < s0; i0++ ) { |
| 357 | + setViewOffsets( views, iv ); |
| 358 | + set( ybuf, iv[ 1 ], fcn( views, opts ) ); |
| 359 | + incrementOffsets( iv, dv0 ); |
| 360 | + } |
| 361 | + incrementOffsets( iv, dv1 ); |
| 362 | + } |
| 363 | + incrementOffsets( iv, dv2 ); |
| 364 | + } |
| 365 | + incrementOffsets( iv, dv3 ); |
| 366 | + } |
| 367 | + incrementOffsets( iv, dv4 ); |
| 368 | + } |
| 369 | + incrementOffsets( iv, dv5 ); |
| 370 | + } |
| 371 | + incrementOffsets( iv, dv6 ); |
| 372 | + } |
| 373 | + incrementOffsets( iv, dv7 ); |
| 374 | + } |
| 375 | + incrementOffsets( iv, dv8 ); |
| 376 | + } |
| 377 | + incrementOffsets( iv, dv9 ); |
| 378 | + } |
| 379 | + } |
| 380 | + } |
| 381 | + } |
| 382 | + } |
| 383 | + } |
| 384 | + } |
| 385 | + } |
| 386 | + } |
| 387 | + } |
| 388 | + } |
| 389 | +} |
| 390 | + |
| 391 | + |
| 392 | +// EXPORTS // |
| 393 | + |
| 394 | +module.exports = blockedunary10d; |
0 commit comments