From 48a35135fb654bbecb8a1dfe08c7e378829e9b99 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Tue, 9 Apr 2019 08:02:30 -0700 Subject: [PATCH] Convert all tabs to spaces, add CI. (#18959) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18959 ghimport-source-id: a934163fa34cb2019732d5f49dc7290c376bf156 Differential Revision: D14831246 Pulled By: ezyang fbshipit-source-id: beb92dc4ee8c82f4c8259c081dd72e477fe7a9d0 --- .travis.yml | 4 + aten/src/ATen/CMakeLists.txt | 22 +- aten/src/ATen/cpu/vec256/intrinsics.h | 2 +- aten/src/ATen/native/Linear.cpp | 28 +- aten/src/ATen/native/LossCTC.cpp | 34 +- aten/src/ATen/native/NNPACK.cpp | 6 +- aten/src/ATen/native/RNN.cpp | 2 +- aten/src/ATen/native/RangeFactories.cpp | 4 +- aten/src/ATen/native/cpu/avx_mathfun.h | 6 +- aten/src/ATen/native/cuda/CuFFTPlanCache.h | 2 +- aten/src/ATen/native/cuda/Embedding.cu | 6 +- aten/src/ATen/native/cuda/LossCTC.cu | 56 +- aten/src/ATen/native/cuda/RangeFactories.cu | 4 +- aten/src/ATen/native/cuda/WeightNorm.cu | 22 +- aten/src/ATen/native/cudnn/LossCTC.cpp | 10 +- aten/src/ATen/native/miopen/Conv_miopen.cpp | 2 +- aten/src/TH/THLapack.h | 10 +- aten/src/TH/THMemoryFile.cpp | 2 +- aten/src/TH/generic/THTensorLapack.cpp | 32 +- aten/src/TH/generic/THTensorMoreMath.cpp | 48 +- aten/src/TH/generic/THVector.h | 8 +- aten/src/TH/vector/VSX.cpp | 2 +- aten/src/TH/vector/simd.h | 2 +- aten/src/THC/THCBlas.cu | 4 +- aten/src/THC/THCTensorMath.cuh | 4 +- aten/src/THC/THCTensorRandom.cuh | 4 +- aten/src/THC/generic/THCTensorMath.cu | 4 +- aten/src/THC/generic/THCTensorMathReduce.h | 6 +- aten/src/THC/generic/THCTensorRandom.cu | 2 +- aten/src/THCUNN/LogSigmoid.cu | 4 +- aten/src/THCUNN/LookupTable.cu | 2 +- aten/src/THCUNN/LookupTableBag.cu | 12 +- aten/src/THCUNN/SpatialUpSamplingNearest.cu | 32 +- aten/src/THCUNN/TemporalUpSamplingNearest.cu | 32 +- .../src/THCUNN/VolumetricUpSamplingNearest.cu | 20 +- aten/src/THCUNN/common.h | 10 +- aten/src/THCUNN/generic/LookupTableBag.cu | 6 +- .../THCUNN/generic/SpatialConvolutionMM.cu | 2 +- .../generic/SpatialDilatedConvolution.cu | 2 +- .../generic/SpatialUpSamplingNearest.cu | 24 +- aten/src/THCUNN/generic/THCUNN.h | 6 +- .../generic/TemporalUpSamplingNearest.cu | 4 +- .../generic/VolumetricUpSamplingNearest.cu | 14 +- aten/src/THCUNN/upsampling.h | 2 +- aten/src/THNN/generic/BCECriterion.c | 12 +- aten/src/THNN/generic/ClassNLLCriterion.c | 6 +- aten/src/THNN/generic/MultiMarginCriterion.c | 2 +- .../THNN/generic/SpatialAdaptiveMaxPooling.c | 2 +- aten/src/THNN/generic/SpatialAveragePooling.c | 16 +- .../THNN/generic/SpatialClassNLLCriterion.c | 16 +- aten/src/THNN/generic/SpatialConvolutionMM.c | 50 +- .../THNN/generic/SpatialDilatedConvolution.c | 14 +- .../THNN/generic/SpatialDilatedMaxPooling.c | 58 +- .../generic/SpatialFullDilatedConvolution.c | 16 +- aten/src/THNN/generic/SpatialMaxUnpooling.c | 14 +- .../src/THNN/generic/TemporalRowConvolution.c | 776 ++++++++-------- .../VolumetricAdaptiveAveragePooling.c | 2 +- .../THNN/generic/VolumetricAveragePooling.c | 2 +- .../THNN/generic/VolumetricConvolutionMM.c | 2 +- .../VolumetricFullDilatedConvolution.c | 2 +- aten/src/THNN/init.cpp | 66 +- c10/test/util/LeftRight_test.cpp | 2 +- c10/util/Half.h | 298 +++--- caffe2/operators/assert_op.cc | 2 +- caffe2/operators/counter_ops.cc | 22 +- caffe2/operators/expand_op.cc | 10 +- cmake/Dependencies.cmake | 8 +- cmake/Modules/FindCUB.cmake | 8 +- cmake/Modules/FindMIOpen.cmake | 26 +- cmake/Modules/Findpybind11.cmake | 8 +- docs/caffe2/stylesheet.css | 850 +++++++++--------- docs/cpp/source/notes/tensor_creation.rst | 12 +- docs/make.bat | 20 +- docs/source/jit.rst | 148 +-- docs/source/notes/windows.rst | 4 +- tools/pytorch.version | 34 +- .../csrc/api/include/torch/nn/modules/conv.h | 6 +- torch/csrc/jit/README.md | 68 +- torch/csrc/utils/pybind.h | 18 +- torch/csrc/utils/tensor_numpy.cpp | 2 +- 80 files changed, 1558 insertions(+), 1554 deletions(-) diff --git a/.travis.yml b/.travis.yml index beb13346311576..beb0d69545ea0c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,10 @@ matrix: python: "3.6" dist: xenial script: cd .circleci && ./ensure-consistency.py + - name: "Ensure no tabs" + python: "2.7" + script: + - (! git grep -I -l $'\t' -- . ':(exclude)*.svg' ':(exclude)**Makefile' ':(exclude)**/contrib/**' ':(exclude)third_party' ':(exclude).gitattributes' ':(exclude).gitmodules' || (echo "The above files have tabs; please convert them to spaces"; false)) - name: "Python 2.7 Lint" python: "2.7" install: pip install flake8 diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt index 98af97d223a1f3..5c9627ab65bb58 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt @@ -252,21 +252,21 @@ IF(USE_CUDA AND NOT USE_ROCM) EXECUTE_PROCESS(COMMAND touch ${CMAKE_CURRENT_BINARY_DIR}/empty_file.cc) if(${CUDA_VERSION_MAJOR} EQUAL "8") SET(CUFFT_FAKELINK_OPTIONS - --generate-code arch=compute_35,code=sm_35 - --generate-code arch=compute_50,code=sm_50 - --generate-code arch=compute_60,code=sm_60) + --generate-code arch=compute_35,code=sm_35 + --generate-code arch=compute_50,code=sm_50 + --generate-code arch=compute_60,code=sm_60) elseif(${CUDA_VERSION_MAJOR} EQUAL "9") SET(CUFFT_FAKELINK_OPTIONS - --generate-code arch=compute_35,code=sm_35 - --generate-code arch=compute_50,code=sm_50 - --generate-code arch=compute_60,code=sm_60 - --generate-code arch=compute_70,code=sm_70) + --generate-code arch=compute_35,code=sm_35 + --generate-code arch=compute_50,code=sm_50 + --generate-code arch=compute_60,code=sm_60 + --generate-code arch=compute_70,code=sm_70) elseif(${CUDA_VERSION_MAJOR} EQUAL "10") SET(CUFFT_FAKELINK_OPTIONS - --generate-code arch=compute_35,code=sm_35 - --generate-code arch=compute_50,code=sm_50 - --generate-code arch=compute_60,code=sm_60 - --generate-code arch=compute_70,code=sm_70) + --generate-code arch=compute_35,code=sm_35 + --generate-code arch=compute_50,code=sm_50 + --generate-code arch=compute_60,code=sm_60 + --generate-code arch=compute_70,code=sm_70) else() MESSAGE(FATAL_ERROR "Unhandled major cuda version ${CUDA_VERSION_MAJOR}") endif() diff --git a/aten/src/ATen/cpu/vec256/intrinsics.h b/aten/src/ATen/cpu/vec256/intrinsics.h index 76779aada7a0b3..85a887551156ee 100644 --- a/aten/src/ATen/cpu/vec256/intrinsics.h +++ b/aten/src/ATen/cpu/vec256/intrinsics.h @@ -19,7 +19,7 @@ /* GCC-compatible compiler, targeting ARM with WMMX */ #include #elif (defined(__GNUC__) || defined(__xlC__)) && \ - (defined(__VEC__) || defined(__ALTIVEC__)) + (defined(__VEC__) || defined(__ALTIVEC__)) /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */ #include #elif defined(__GNUC__) && defined(__SPE__) diff --git a/aten/src/ATen/native/Linear.cpp b/aten/src/ATen/native/Linear.cpp index a166a64f1a7652..f3558a3cbe561b 100644 --- a/aten/src/ATen/native/Linear.cpp +++ b/aten/src/ATen/native/Linear.cpp @@ -46,12 +46,12 @@ static Tensor sumproduct_pair(const Tensor& left_, const Tensor& right_, IntArra auto sr = right.size(i)>1; if (sum_dims[i]) { // first dimensions that will be summed over after multiplication if (sl && sr) { // dimensions nontrivially in both left and right must be of the same size - AT_CHECK(left.size(i)==right.size(i), "non-broadcast dimensions must match"); - sum_size *= left.size(i); + AT_CHECK(left.size(i)==right.size(i), "non-broadcast dimensions must match"); + sum_size *= left.size(i); } else if (sl) { // if it is only in one of left and right, we can sum right away - left = left.sum(i, true); + left = left.sum(i, true); } else if (sr) { - right = right.sum(i, true); + right = right.sum(i, true); } } else if (sl && sr) { // now deal with dimensions dimensions that will be in the output // dimensions nontrivially in both left and right must be of the same size @@ -117,7 +117,7 @@ static Tensor sumproduct_pair(const Tensor& left_, const Tensor& right_, IntArra if (! keepdim) { for (int i = dim-1; i>=0; i--) if (sum_dims[i]) - result.squeeze_(i); + result.squeeze_(i); } return result; } @@ -183,7 +183,7 @@ Tensor einsum(std::string eqn, TensorList tensors) { } else { // we have seen an ellipsis before, so we check compatibility AT_CHECK(candidate_num_ell_idxes == num_ell_idxes, - "ellipsis must represent ", num_ell_idxes, " dimensions in all terms"); + "ellipsis must represent ", num_ell_idxes, " dimensions in all terms"); } for (int64_t i = 0; i < num_ell_idxes; ++i) { // map ellipsis dimensions in operand to indices current_op_idxes.push_back(first_ell_idx + i); @@ -360,8 +360,8 @@ Tensor einsum(std::string eqn, TensorList tensors) { // the computation is unrolled in the unroll_dim dimension // its main purpose is to unify the computations in bilinear and bilinear_backward Tensor _trilinear(const Tensor& i1_, const Tensor& i2_, const Tensor& i3_, - IntArrayRef expand1_, IntArrayRef expand2_, IntArrayRef expand3_, - IntArrayRef sumdim_, int64_t unroll_dim) { + IntArrayRef expand1_, IntArrayRef expand2_, IntArrayRef expand3_, + IntArrayRef sumdim_, int64_t unroll_dim) { int64_t total_dim = i1_.dim()+expand1_.size(); AT_CHECK((unroll_dim >= 0) && (unroll_dim < total_dim), "unroll_dim must be in [0,", total_dim-1, "]"); auto expand1 = at::dim_list_to_bitset(expand1_, total_dim); @@ -390,11 +390,11 @@ Tensor _trilinear(const Tensor& i1_, const Tensor& i2_, const Tensor& i3_, if (expand3[i]) { i3 = i3.unsqueeze(i); if (sumdim[i] && (i != unroll_dim)) - sum_dims_12.push_back(i); + sum_dims_12.push_back(i); } else { s = i3.size(i); if (sumdim[i] && (i != unroll_dim)) - sum_dims_23.push_back(i); + sum_dims_23.push_back(i); } output_size.push_back(sumdim[i] ? 1 : s); if (i == unroll_dim) @@ -408,8 +408,8 @@ Tensor _trilinear(const Tensor& i1_, const Tensor& i2_, const Tensor& i3_, if (! sumdim[unroll_dim]) { for (int64_t k = 0; k < unroll_size; k++) { Tensor buf = at::native::sumproduct_pair(i1.narrow(unroll_dim, k * slicemul1, 1), - i2.narrow(unroll_dim, k * slicemul2, 1), - sum_dims_12, true); + i2.narrow(unroll_dim, k * slicemul2, 1), + sum_dims_12, true); buf = at::native::sumproduct_pair(buf, i3.narrow(unroll_dim, k * slicemul3, 1), sum_dims_23, true); output.narrow(unroll_dim, k, 1).add_(buf); } @@ -417,7 +417,7 @@ Tensor _trilinear(const Tensor& i1_, const Tensor& i2_, const Tensor& i3_, else { for (int64_t k = 0; k < unroll_size; k++) { Tensor buf = at::native::sumproduct_pair(i1.narrow(unroll_dim, k*slicemul1, 1), - i2.narrow(unroll_dim, k*slicemul2, 1), sum_dims_12, true); + i2.narrow(unroll_dim, k*slicemul2, 1), sum_dims_12, true); buf = at::native::sumproduct_pair(buf, i3.narrow(unroll_dim, k*slicemul3, 1), sum_dims_23, true); output.add_(buf); } @@ -473,7 +473,7 @@ Tensor tensordot(const Tensor& input1, const Tensor& input2, IntArrayRef dims1, t2 = t2.sum(dims2[i], true); } else { AT_CHECK(s1 == s2, "contracted dimensions need to match, but first has size ", s1, " in dim ", dims1[i], - " and second has size ", s2, " in dim ", dims2[i]); + " and second has size ", s2, " in dim ", dims2[i]); csize *= s1; } } diff --git a/aten/src/ATen/native/LossCTC.cpp b/aten/src/ATen/native/LossCTC.cpp index cdd7a4e284d04f..eb5a80dcb7a00f 100644 --- a/aten/src/ATen/native/LossCTC.cpp +++ b/aten/src/ATen/native/LossCTC.cpp @@ -61,7 +61,7 @@ std::tuple ctc_loss_cpu_template(const Tensor& log_probs, const tg_batch_offsets[i] = pos; pos += target_lengths[i]; if (max_target_length < target_lengths[i]) - max_target_length = target_lengths[i]; + max_target_length = target_lengths[i]; } tg_target_stride = targets.stride(0); checkSize(c, targets_arg, 0, pos); @@ -83,8 +83,8 @@ std::tuple ctc_loss_cpu_template(const Tensor& log_probs, const int64_t max_input_length = log_probs.size(0); for (int64_t b = 0; b < batch_size; b++) { AT_CHECK(input_lengths[b] <= max_input_length, - "Expected tensor to have size at least ", max_input_length, " at dimension 1, but got size ", input_lengths[b], " for ", log_probs_arg, - " (while checking arguments for ", c, ")"); + "Expected tensor to have size at least ", max_input_length, " at dimension 1, but got size ", input_lengths[b], " for ", log_probs_arg, + " (while checking arguments for ", c, ")"); } Tensor log_alpha = at::empty({batch_size, log_probs.size(0), 2*max_target_length+1}, log_probs.options()); @@ -115,11 +115,11 @@ std::tuple ctc_loss_cpu_template(const Tensor& log_probs, const // now the loop over the inputs for (int64_t t=1; t ctc_loss_cpu_template(const Tensor& log_probs, const } if (lamax == neginf) // cannot do neginf-neginf lamax = 0; - // this is the assignment of eq (6) + // this is the assignment of eq (6) log_alpha_a[t][s] = std::log(std::exp(la1-lamax)+std::exp(la2-lamax)+std::exp(la3-lamax))+lamax + log_probs_a[t][current_target_prime]; } } @@ -182,7 +182,7 @@ Tensor ctc_loss_backward_cpu_template(const Tensor& grad_out, const Tensor& log_ tg_batch_offsets[i] = pos; pos += target_lengths[i]; if (max_target_length < target_lengths[i]) - max_target_length = target_lengths[i]; + max_target_length = target_lengths[i]; } tg_target_stride = targets.stride(0); } @@ -268,9 +268,9 @@ Tensor ctc_loss_backward_cpu_template(const Tensor& grad_out, const Tensor& log_ log_beta_a[t][s] = std::log(std::exp(lb1-lbmax)+std::exp(lb2-lbmax)+std::exp(lb3-lbmax))+lbmax + log_probs_a[t][current_target_prime]; // one might check whether one can vectorize this better when done after the t-loop... - // now that we have beta, we fill in the sum of alpha*beta in eq (16) - // in contrast to the cuda implementation, we only parallelize over the batch, so we don't have a concurrency - // issue (several s can map to the same target character) + // now that we have beta, we fill in the sum of alpha*beta in eq (16) + // in contrast to the cuda implementation, we only parallelize over the batch, so we don't have a concurrency + // issue (several s can map to the same target character) // collected[b, t, target'[s]] "log+=" log_alpha[t, s]+log_beta[t, s] scalar_t log_alpha_beta = log_alpha_a[t][s] + log_beta_a[t][s]; scalar_t &lcab = grad_a[t][current_target_prime]; @@ -309,9 +309,9 @@ std::tuple ctc_loss_cpu(const Tensor& log_probs, const Tensor& t (void)zero_infinity; // only used for backwards return AT_DISPATCH_FLOATING_TYPES(log_probs.scalar_type(), "ctc_loss_cpu", [&] { if (targets.scalar_type() == kLong) { - return ctc_loss_cpu_template(log_probs, targets, input_lengths, target_lengths, BLANK); + return ctc_loss_cpu_template(log_probs, targets, input_lengths, target_lengths, BLANK); } else { - return ctc_loss_cpu_template(log_probs, targets, input_lengths, target_lengths, BLANK); + return ctc_loss_cpu_template(log_probs, targets, input_lengths, target_lengths, BLANK); } }); } @@ -320,9 +320,9 @@ Tensor ctc_loss_backward_cpu(const Tensor& grad, const Tensor& log_probs, const const Tensor& neg_log_likelihood, const Tensor& log_alpha, int64_t BLANK, bool zero_infinity) { return AT_DISPATCH_FLOATING_TYPES(log_probs.scalar_type(), "ctc_loss_backward_cpu", [&] { if (targets.scalar_type() == kLong) { - return ctc_loss_backward_cpu_template(grad, log_probs, targets, input_lengths, target_lengths, neg_log_likelihood, log_alpha, BLANK, zero_infinity); + return ctc_loss_backward_cpu_template(grad, log_probs, targets, input_lengths, target_lengths, neg_log_likelihood, log_alpha, BLANK, zero_infinity); } else { - return ctc_loss_backward_cpu_template(grad, log_probs, targets, input_lengths, target_lengths, neg_log_likelihood, log_alpha, BLANK, zero_infinity); + return ctc_loss_backward_cpu_template(grad, log_probs, targets, input_lengths, target_lengths, neg_log_likelihood, log_alpha, BLANK, zero_infinity); } }); } diff --git a/aten/src/ATen/native/NNPACK.cpp b/aten/src/ATen/native/NNPACK.cpp index 874a65b99d9194..25cb444c90428b 100644 --- a/aten/src/ATen/native/NNPACK.cpp +++ b/aten/src/ATen/native/NNPACK.cpp @@ -76,11 +76,11 @@ pthreadpool_t nnpack_threadpool() { enum nnp_status nnpack_status = nnp_initialize(); if (nnpack_status != nnp_status_success) { if (nnpack_status == nnp_status_out_of_memory) { - throw std::runtime_error("could not initialize NNPack (out of memory)"); + throw std::runtime_error("could not initialize NNPack (out of memory)"); } else if (nnpack_status == nnp_status_unsupported_hardware) { - throw std::runtime_error("could not initialize NNPack (unsupported hardware)"); + throw std::runtime_error("could not initialize NNPack (unsupported hardware)"); } else { - throw std::runtime_error("could not initialize NNPack (unknown error)"); + throw std::runtime_error("could not initialize NNPack (unknown error)"); } } unsigned int threads; diff --git a/aten/src/ATen/native/RNN.cpp b/aten/src/ATen/native/RNN.cpp index 7ec339ccb98f10..420e0ea4df041d 100644 --- a/aten/src/ATen/native/RNN.cpp +++ b/aten/src/ATen/native/RNN.cpp @@ -614,7 +614,7 @@ std::tuple NAME( \ num_layers, dropout_p, train, bidirectional, batch_first); \ return std::make_tuple(output, hy); \ } \ - check_device(_input, _params, hx); \ + check_device(_input, _params, hx); \ auto input = batch_first ? _input.transpose(0, 1) : _input; \ auto params = gather_params(_params, has_biases); \ auto results = _rnn_impl_with_concat( \ diff --git a/aten/src/ATen/native/RangeFactories.cpp b/aten/src/ATen/native/RangeFactories.cpp index ef9e63aa9a819c..da362422bb30b0 100644 --- a/aten/src/ATen/native/RangeFactories.cpp +++ b/aten/src/ATen/native/RangeFactories.cpp @@ -126,10 +126,10 @@ Tensor& arange_cpu_out(Tensor& result, Scalar start, Scalar end, Scalar step) { double size_d; if (std::is_same::value) { size_d = std::ceil(static_cast(end.to() - start.to()) - / step.to()); + / step.to()); } else { size_d = std::ceil(static_cast(end.to() - start.to()) - / step.to()); + / step.to()); } AT_CHECK(xstep > 0 || xstep < 0, "step must be nonzero"); diff --git a/aten/src/ATen/native/cpu/avx_mathfun.h b/aten/src/ATen/native/cpu/avx_mathfun.h index 6beb73c28b60e6..85ba484870e8a8 100644 --- a/aten/src/ATen/native/cpu/avx_mathfun.h +++ b/aten/src/ATen/native/cpu/avx_mathfun.h @@ -100,7 +100,7 @@ typedef union imm_xmm_union { #define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \ imm_xmm_union u __attribute__((aligned(32))); \ - u.imm = imm_; \ + u.imm = imm_; \ xmm0_ = u.xmm[0]; \ xmm1_ = u.xmm[1]; \ } @@ -228,8 +228,8 @@ inline v8sf log256_ps(v8sf x) { return x; } -_PS256_CONST(exp_hi, 88.3762626647949f); -_PS256_CONST(exp_lo, -88.3762626647949f); +_PS256_CONST(exp_hi, 88.3762626647949f); +_PS256_CONST(exp_lo, -88.3762626647949f); _PS256_CONST(cephes_LOG2EF, 1.44269504088896341); _PS256_CONST(cephes_exp_C1, 0.693359375); diff --git a/aten/src/ATen/native/cuda/CuFFTPlanCache.h b/aten/src/ATen/native/cuda/CuFFTPlanCache.h index 062ea2e3bc3730..28631d17eca733 100644 --- a/aten/src/ATen/native/cuda/CuFFTPlanCache.h +++ b/aten/src/ATen/native/cuda/CuFFTPlanCache.h @@ -266,7 +266,7 @@ class CuFFTConfig { CUFFT_CHECK(hipfftMakePlanMany(plan(), signal_ndim, signal_sizes.data(), /* inembed */ nullptr, /* base_istride */ 1, /* idist */ 1, /* onembed */ nullptr, /* base_ostride */ 1, /* odist */ 1, - exec_type, batch, &ws_size_t)); + exec_type, batch, &ws_size_t)); #else CUFFT_CHECK(cufftXtMakePlanMany(plan(), signal_ndim, signal_sizes.data(), /* inembed */ nullptr, /* base_istride */ 1, /* idist */ 1, itype, diff --git a/aten/src/ATen/native/cuda/Embedding.cu b/aten/src/ATen/native/cuda/Embedding.cu index 8a24923eff6a00..88d2daa2edbc08 100644 --- a/aten/src/ATen/native/cuda/Embedding.cu +++ b/aten/src/ATen/native/cuda/Embedding.cu @@ -87,10 +87,10 @@ __global__ void embedding_backward_feature_kernel match_found_this_thread = 0; #ifdef __HIP_PLATFORM_HCC__ unsigned long long int matchmask = WARP_BALLOT(match_found_this_thread); - int first_remaining_peer = __ffsll(matchmask) - 1; + int first_remaining_peer = __ffsll(matchmask) - 1; #else unsigned int matchmask = WARP_BALLOT(match_found_this_thread); - int first_remaining_peer = __ffs(matchmask) - 1; + int first_remaining_peer = __ffs(matchmask) - 1; #endif if(threadIdx.y == first_remaining_peer) // Nominate lowest-indexed warp as the leader @@ -103,7 +103,7 @@ __global__ void embedding_backward_feature_kernel #else first_remaining_peer = __ffs(matchmask) - 1; #endif - my_s[threadIdx.x] += smem[threadIdx.x + WARP_SIZE*first_remaining_peer]; + my_s[threadIdx.x] += smem[threadIdx.x + WARP_SIZE*first_remaining_peer]; matchmask ^= (1 << first_remaining_peer); } if(f < s) diff --git a/aten/src/ATen/native/cuda/LossCTC.cu b/aten/src/ATen/native/cuda/LossCTC.cu index 9fe969703c581d..8521a344a77bc3 100644 --- a/aten/src/ATen/native/cuda/LossCTC.cu +++ b/aten/src/ATen/native/cuda/LossCTC.cu @@ -110,8 +110,8 @@ ctc_loss_log_alpha_gpu_kernel(scalar_t* __restrict__ log_alpha_data, for (int64_t t=1; t < max_input_length; t++) { __syncthreads(); // on cuda 9 we might use partial synchronization of only the threads within the same batch if ((t < input_length) && (target_length > 0) && (s < 2*target_length+1)) { - // only for valid t, s. This is equation (6) and (7), la1, la2, la3 are the three summands, - // lamax is the maximum for the logsumexp trick. + // only for valid t, s. This is equation (6) and (7), la1, la2, la3 are the three summands, + // lamax is the maximum for the logsumexp trick. scalar_t la1 = log_alpha_data[la_batch_offset + la_input_stride * (t-1) + la_target_stride * s]; scalar_t lamax = la1; scalar_t la2, la3; @@ -135,7 +135,7 @@ ctc_loss_log_alpha_gpu_kernel(scalar_t* __restrict__ log_alpha_data, log_alpha_data[la_batch_offset + la_input_stride * t + la_target_stride * s] = std::log(std::exp(la1-lamax)+std::exp(la2-lamax)+std::exp(la3-lamax))+lamax + log_probs_data[lp_batch_offset + t * lp_input_stride + lp_char_stride * current_char]; } else { - // otherwise we just set to neginf + // otherwise we just set to neginf if (s < 2*max_target_length+1) log_alpha_data[la_batch_offset + la_input_stride * t + la_target_stride * s] = neginf; } @@ -218,8 +218,8 @@ std::tuple ctc_loss_gpu_template(const Tensor& log_probs, const int64_t max_input_length = log_probs.size(0); for (int64_t b = 0; b < batch_size; b++) { AT_CHECK(input_lengths[b] <= max_input_length, - "Expected tensor to have size at least ", max_input_length, " at dimension 1, but got size ", targets.size(0), " for ", targets_arg, - " (while checking arguments for ", c, ")"); + "Expected tensor to have size at least ", max_input_length, " at dimension 1, but got size ", targets.size(0), " for ", targets_arg, + " (while checking arguments for ", c, ")"); } auto target_lengths_t = at::tensor(target_lengths, targets.options().dtype(kLong)); @@ -242,7 +242,7 @@ std::tuple ctc_loss_gpu_template(const Tensor& log_probs, const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); ctc_loss_log_alpha_gpu_kernel<<>>( - log_alpha.data(), + log_alpha.data(), log_probs.data(), input_lengths_t.data(), log_probs.size(0), targets.data(), target_lengths_t.data(), max_target_length, neg_log_likelihood.data(), @@ -304,8 +304,8 @@ ctc_loss_backward_log_beta_gpu_kernel(scalar_t* __restrict__ log_beta_data, if (s < 2*target_length+1) { current_target_prime = get_target_prime(targets_data, tg_batch_offset, tg_target_stride, s, BLANK); have_three = ((s < 2*target_length-1) && - (get_target_prime(targets_data, tg_batch_offset, tg_target_stride, s+2, BLANK) != - current_target_prime)); + (get_target_prime(targets_data, tg_batch_offset, tg_target_stride, s+2, BLANK) != + current_target_prime)); } else { current_target_prime = BLANK; have_three = false; @@ -377,7 +377,7 @@ ctc_loss_backward_collect_nonblank_gpu_kernel(scalar_t* __restrict__ gradient_da int64_t la_batch_stride, int64_t la_input_stride, int64_t la_target_stride, int64_t lb_batch_stride, int64_t lb_input_stride, int64_t lb_target_stride, const int64_t* __restrict__ tg_batch_offsets, int64_t tg_target_stride, - int64_t batch_size, int64_t num_labels, int64_t BLANK, bool zero_infinity) { + int64_t batch_size, int64_t num_labels, int64_t BLANK, bool zero_infinity) { int64_t b = threadIdx.y + blockIdx.y * blockDim.y; int64_t s = threadIdx.x + blockIdx.x * blockDim.y; // note, this directly indexes into targets, no targets prime! @@ -405,9 +405,9 @@ ctc_loss_backward_collect_nonblank_gpu_kernel(scalar_t* __restrict__ gradient_da for (int64_t t = 0; t < input_length; t++) { scalar_t lp = log_probs_data[lp_batch_offset + t * lp_input_stride + lp_char_stride * target]; atomicAdd(&gradient_data[gr_batch_offset + t * gr_input_stride + gr_char_stride * target], - -std::exp(log_alpha_data[la_batch_offset + la_input_stride * t + la_target_stride * (s*2+1)] - + log_beta_data[lb_batch_offset + lb_input_stride * t + lb_target_stride * (s*2+1)] - + nll - lp) * gr); + -std::exp(log_alpha_data[la_batch_offset + la_input_stride * t + la_target_stride * (s*2+1)] + + log_beta_data[lb_batch_offset + lb_input_stride * t + lb_target_stride * (s*2+1)] + + nll - lp) * gr); } } @@ -429,7 +429,7 @@ ctc_loss_backward_collect_gpu_kernel(scalar_t* __restrict__ gradient_data, int64_t la_batch_stride, int64_t la_input_stride, int64_t la_target_stride, int64_t lb_batch_stride, int64_t lb_input_stride, int64_t lb_target_stride, const int64_t* __restrict__ tg_batch_offsets, int64_t tg_target_stride, - int64_t batch_size, int64_t num_labels, int64_t BLANK, bool zero_infinity) { + int64_t batch_size, int64_t num_labels, int64_t BLANK, bool zero_infinity) { constexpr scalar_t neginf = -INFINITY; int64_t b = threadIdx.y + blockIdx.y * blockDim.y; @@ -481,7 +481,7 @@ ctc_loss_backward_collect_gpu_kernel(scalar_t* __restrict__ gradient_data, // We don't do a lot of checking as we envision this to be called only when backpropagating through a (well-checked) forward. template Tensor ctc_loss_backward_gpu_template(const Tensor& grad_out, const Tensor& log_probs, const Tensor& targets, IntArrayRef input_lengths, IntArrayRef target_lengths, - const Tensor& neg_log_likelihood, const Tensor& log_alpha, int64_t BLANK, bool zero_infinity) { + const Tensor& neg_log_likelihood, const Tensor& log_alpha, int64_t BLANK, bool zero_infinity) { constexpr scalar_t neginf = -INFINITY; using target_t = typename std::conditional::type; int64_t batch_size = log_probs.size(1); @@ -500,7 +500,7 @@ Tensor ctc_loss_backward_gpu_template(const Tensor& grad_out, const Tensor& log_ tg_batch_offsets_data[i] = pos; pos += target_lengths[i]; if (max_target_length < target_lengths[i]) - max_target_length = target_lengths[i]; + max_target_length = target_lengths[i]; } tg_target_stride = targets.stride(0); } @@ -558,15 +558,15 @@ Tensor ctc_loss_backward_gpu_template(const Tensor& grad_out, const Tensor& log_ // maybe we should kernelize this, too. auto grad_blank = grad.narrow(2, BLANK, 1); grad_blank -= (at::logsumexp(log_alpha.as_strided({batch_size, log_alpha.size(1), max_target_length+1}, - {log_alpha.stride(0), log_alpha.stride(1), log_alpha.stride(2)*2}) - + log_beta.as_strided({batch_size, log_beta.size(1), max_target_length+1}, - {log_beta.stride(0), log_beta.stride(1), log_beta.stride(2)*2}), - 2, true) - .permute({1, 0, 2}) - .add_(neg_log_likelihood.view({1, batch_size, 1})) - .sub_(log_probs.narrow(2, BLANK, 1)) - .exp_() - ); + {log_alpha.stride(0), log_alpha.stride(1), log_alpha.stride(2)*2}) + + log_beta.as_strided({batch_size, log_beta.size(1), max_target_length+1}, + {log_beta.stride(0), log_beta.stride(1), log_beta.stride(2)*2}), + 2, true) + .permute({1, 0, 2}) + .add_(neg_log_likelihood.view({1, batch_size, 1})) + .sub_(log_probs.narrow(2, BLANK, 1)) + .exp_() + ); // scale by output gradient (blanks and first summand of non-blanks) grad *= grad_out.view({1, batch_size, 1}); if (zero_infinity) { @@ -630,9 +630,9 @@ std::tuple ctc_loss_gpu(const Tensor& log_probs, const Tensor& t (void)zero_infinity; // only used for backward return AT_DISPATCH_FLOATING_TYPES(log_probs.scalar_type(), "ctc_loss_cuda", [&] { if (targets.scalar_type() == kLong) { - return ctc_loss_gpu_template(log_probs, targets, input_lengths, target_lengths, BLANK); + return ctc_loss_gpu_template(log_probs, targets, input_lengths, target_lengths, BLANK); } else { - return ctc_loss_gpu_template(log_probs, targets, input_lengths, target_lengths, BLANK); + return ctc_loss_gpu_template(log_probs, targets, input_lengths, target_lengths, BLANK); } }); } @@ -641,9 +641,9 @@ Tensor ctc_loss_backward_gpu(const Tensor& grad, const Tensor& log_probs, const const Tensor& neg_log_likelihood, const Tensor& log_alpha, int64_t BLANK, bool zero_infinity) { return AT_DISPATCH_FLOATING_TYPES(log_probs.scalar_type(), "ctc_loss_backward_cuda", [&] { if (targets.scalar_type() == kLong) { - return ctc_loss_backward_gpu_template(grad, log_probs, targets, input_lengths, target_lengths, neg_log_likelihood, log_alpha, BLANK, zero_infinity); + return ctc_loss_backward_gpu_template(grad, log_probs, targets, input_lengths, target_lengths, neg_log_likelihood, log_alpha, BLANK, zero_infinity); } else { - return ctc_loss_backward_gpu_template(grad, log_probs, targets, input_lengths, target_lengths, neg_log_likelihood, log_alpha, BLANK, zero_infinity); + return ctc_loss_backward_gpu_template(grad, log_probs, targets, input_lengths, target_lengths, neg_log_likelihood, log_alpha, BLANK, zero_infinity); } }); } diff --git a/aten/src/ATen/native/cuda/RangeFactories.cu b/aten/src/ATen/native/cuda/RangeFactories.cu index ead7f2c9321510..7797316df86ccb 100644 --- a/aten/src/ATen/native/cuda/RangeFactories.cu +++ b/aten/src/ATen/native/cuda/RangeFactories.cu @@ -146,10 +146,10 @@ Tensor& arange_cuda_out(Tensor& result, Scalar start, Scalar end, Scalar step) { double size_d; if (std::is_same::value) { size_d = std::ceil(static_cast(end.to() - start.to()) - / step.to()); + / step.to()); } else { size_d = std::ceil(static_cast(end.to() - start.to()) - / step.to()); + / step.to()); } AT_CHECK(xstep > 0 || xstep < 0, "step must be nonzero"); diff --git a/aten/src/ATen/native/cuda/WeightNorm.cu b/aten/src/ATen/native/cuda/WeightNorm.cu index 151f4c9929bbee..fcb737fd95b8bc 100644 --- a/aten/src/ATen/native/cuda/WeightNorm.cu +++ b/aten/src/ATen/native/cuda/WeightNorm.cu @@ -441,18 +441,18 @@ std::tuple weight_norm_cuda_backward { using accscalar_t = acc_type; - weight_norm_bwd_first_dim_kernel - << + <<>> - (grad_v.data(), - grad_g.data(), - grad_w.data(), - saved_v.data(), - saved_g.data(), - saved_norms.data(), - rowSize); + (grad_v.data(), + grad_g.data(), + grad_w.data(), + saved_v.data(), + saved_g.data(), + saved_norms.data(), + rowSize); }); } else if(dim == ndims - 1) diff --git a/aten/src/ATen/native/cudnn/LossCTC.cpp b/aten/src/ATen/native/cudnn/LossCTC.cpp index bb7c1fe539f3b9..c634305d33d2b3 100644 --- a/aten/src/ATen/native/cudnn/LossCTC.cpp +++ b/aten/src/ATen/native/cudnn/LossCTC.cpp @@ -72,17 +72,17 @@ std::tuple _cudnn_ctc_loss(const Tensor& log_probs_t, const Tens size_t workspace_size; AT_CUDNN_CHECK(cudnnGetCTCLossWorkspaceSize(handle, probs_desc.desc(), grad_desc.desc(), - targets->data(), target_lengths.data(), input_lengths.data(), - algo, ctc_loss_desc.desc(), &workspace_size)); + targets->data(), target_lengths.data(), input_lengths.data(), + algo, ctc_loss_desc.desc(), &workspace_size)); Tensor workspace = at::empty(workspace_size, log_probs->options().dtype(kByte)); Tensor costs = at::empty({log_probs->size(1)}, log_probs->options()); AT_CUDNN_CHECK(cudnnCTCLoss(handle, probs_desc.desc(), probs.data_ptr(), - targets->data(), target_lengths.data(), input_lengths.data(), - costs.data_ptr(), grad_desc.desc(), grad.data_ptr(), algo, - ctc_loss_desc.desc(), workspace.data_ptr(), workspace_size)); + targets->data(), target_lengths.data(), input_lengths.data(), + costs.data_ptr(), grad_desc.desc(), grad.data_ptr(), algo, + ctc_loss_desc.desc(), workspace.data_ptr(), workspace_size)); return std::make_tuple(costs, grad); } diff --git a/aten/src/ATen/native/miopen/Conv_miopen.cpp b/aten/src/ATen/native/miopen/Conv_miopen.cpp index 47e399e82deb4d..cffbb1b1ac77d0 100644 --- a/aten/src/ATen/native/miopen/Conv_miopen.cpp +++ b/aten/src/ATen/native/miopen/Conv_miopen.cpp @@ -456,7 +456,7 @@ struct algorithm_search { args.wdesc.desc(), args.weight.data_ptr(), args.cdesc.desc(), args.odesc.desc(), args.output.data_ptr(), - 1, // just return the fastest + 1, // just return the fastest &perf_count, &perf_results, ws.data, diff --git a/aten/src/TH/THLapack.h b/aten/src/TH/THLapack.h index c90ee09f488522..e50faa8f4584b0 100644 --- a/aten/src/TH/THLapack.h +++ b/aten/src/TH/THLapack.h @@ -5,12 +5,12 @@ #define THLapack_(NAME) TH_CONCAT_4(TH,Real,Lapack_,NAME) -#define THLapackCheck(fmt, func, info , ...) \ -if (info < 0) { \ +#define THLapackCheck(fmt, func, info , ...) \ +if (info < 0) { \ THError("Lapack Error in %s : Illegal Argument %d", func, -info); \ -} else if(info > 0) { \ - THError(fmt, func, info, ##__VA_ARGS__); \ -} \ +} else if(info > 0) { \ + THError(fmt, func, info, ##__VA_ARGS__); \ +} \ #define THLapackCheckWithCleanup(fmt, cleanup, func, info , ...) \ if (info < 0) { \ diff --git a/aten/src/TH/THMemoryFile.cpp b/aten/src/TH/THMemoryFile.cpp index 42230c1d7cef45..55ea446f9fe025 100644 --- a/aten/src/TH/THMemoryFile.cpp +++ b/aten/src/TH/THMemoryFile.cpp @@ -14,7 +14,7 @@ typedef struct THMemoryFile__ THCharStorage *storage; ssize_t size; ssize_t position; - int longSize; + int longSize; } THMemoryFile; diff --git a/aten/src/TH/generic/THTensorLapack.cpp b/aten/src/TH/generic/THTensorLapack.cpp index 3ace8a6b1405e7..e0ed46a8b2410a 100644 --- a/aten/src/TH/generic/THTensorLapack.cpp +++ b/aten/src/TH/generic/THTensorLapack.cpp @@ -149,13 +149,13 @@ void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a) /* get optimal workspace size */ THLapack_(gels)('N', m, n, nrhs, ra__->data(), lda, - rb__->data(), ldb, - &wkopt, -1, &info); + rb__->data(), ldb, + &wkopt, -1, &info); lwork = (int)wkopt; work = THTensor_(newWithSize1d)(lwork); THLapack_(gels)('N', m, n, nrhs, ra__->data(), lda, - rb__->data(), ldb, - work->data(), lwork, &info); + rb__->data(), ldb, + work->data(), lwork, &info); THLapackCheckWithCleanup("Lapack Error in %s : The %d-th diagonal element of the triangular factor of A is zero", THCleanup(c10::raw::intrusive_ptr::decref(ra__); @@ -378,21 +378,21 @@ void THTensor_(gesdd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra } THLapack_(gesdd)(jobz, - m,n,ra__->data(),lda, - rs__data, - ru__data, - ldu, - rv__data, ldvt, - &wkopt, -1, THIntTensor_data(iwork), &info); + m,n,ra__->data(),lda, + rs__data, + ru__data, + ldu, + rv__data, ldvt, + &wkopt, -1, THIntTensor_data(iwork), &info); lwork = (int)wkopt; work = THTensor_(newWithSize1d)(lwork); THLapack_(gesdd)(jobz, - m,n,ra__->data(),lda, - rs__data, - ru__data, - ldu, - rv__data, ldvt, - work->data(),lwork, THIntTensor_data(iwork), &info); + m,n,ra__->data(),lda, + rs__data, + ru__data, + ldu, + rv__data, ldvt, + work->data(),lwork, THIntTensor_data(iwork), &info); if (jobz != 'N') { THLapackCheckWithCleanup("Lapack Error %s : %d superdiagonals failed to converge.", diff --git a/aten/src/TH/generic/THTensorMoreMath.cpp b/aten/src/TH/generic/THTensorMoreMath.cpp index 8461192baca66e..48fa3733f3fcf0 100644 --- a/aten/src/TH/generic/THTensorMoreMath.cpp +++ b/aten/src/TH/generic/THTensorMoreMath.cpp @@ -999,31 +999,31 @@ int THTensor_(equal)(THTensor *ta, THTensor* tb) return equal; } -#define TENSOR_IMPLEMENT_LOGICAL(NAME,OP) \ +#define TENSOR_IMPLEMENT_LOGICAL(NAME,OP) \ void THTensor_(NAME##Value)(THByteTensor *r_, THTensor* t, scalar_t value) \ - { \ - THByteTensor_resizeNd(r_, t->dim(), THTensor_getSizePtr(t), NULL); \ - TH_TENSOR_APPLY2(unsigned char, r_, scalar_t, t, \ - *r__data = (*t_data OP value) ? 1 : 0;); \ - } \ - void THTensor_(NAME##ValueT)(THTensor* r_, THTensor* t, scalar_t value) \ - { \ - THTensor_(resizeNd)(r_, t->dim(), THTensor_getSizePtr(t), NULL); \ - TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, \ - *r__data = (*t_data OP value) ? 1 : 0;); \ - } \ + { \ + THByteTensor_resizeNd(r_, t->dim(), THTensor_getSizePtr(t), NULL); \ + TH_TENSOR_APPLY2(unsigned char, r_, scalar_t, t, \ + *r__data = (*t_data OP value) ? 1 : 0;); \ + } \ + void THTensor_(NAME##ValueT)(THTensor* r_, THTensor* t, scalar_t value) \ + { \ + THTensor_(resizeNd)(r_, t->dim(), THTensor_getSizePtr(t), NULL); \ + TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, \ + *r__data = (*t_data OP value) ? 1 : 0;); \ + } \ void THTensor_(NAME##Tensor)(THByteTensor *r_, THTensor *ta, THTensor *tb) \ - { \ - THByteTensor_resizeNd(r_, ta->dim(), THTensor_getSizePtr(ta), NULL); \ - TH_TENSOR_APPLY3(unsigned char, r_, scalar_t, ta, scalar_t, tb, \ - *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ - } \ + { \ + THByteTensor_resizeNd(r_, ta->dim(), THTensor_getSizePtr(ta), NULL); \ + TH_TENSOR_APPLY3(unsigned char, r_, scalar_t, ta, scalar_t, tb, \ + *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ + } \ void THTensor_(NAME##TensorT)(THTensor *r_, THTensor *ta, THTensor *tb) \ - { \ - THTensor_(resizeNd)(r_, ta->dim(), THTensor_getSizePtr(ta), NULL); \ - TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, ta, scalar_t, tb, \ - *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ - } \ + { \ + THTensor_(resizeNd)(r_, ta->dim(), THTensor_getSizePtr(ta), NULL); \ + TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, ta, scalar_t, tb, \ + *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ + } \ TENSOR_IMPLEMENT_LOGICAL(lt,<) @@ -1302,10 +1302,10 @@ void THTensor_(norm)(THTensor *r_, THTensor *t, scalar_t value, int dimension, i *r__data = TH_MATH_NAME(pow)(sum, 1.0/3), 0); } else if (value == INFINITY) { DIM_REDUCE(sum = THMax(sum, TH_MATH_NAME(fabs)(t_data[i*t_stride])), - *r__data = sum, 0); + *r__data = sum, 0); } else if (value == -INFINITY) { DIM_REDUCE(sum = THMin(sum, TH_MATH_NAME(fabs)(t_data[i*t_stride])), - *r__data = sum, INFINITY); + *r__data = sum, INFINITY); } else { DIM_REDUCE(sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(t_data[i*t_stride]), value), *r__data = TH_MATH_NAME(pow)(sum, 1.0/value), 0); diff --git a/aten/src/TH/generic/THVector.h b/aten/src/TH/generic/THVector.h index 8db75fa7203970..aa3b858d2bb2c4 100644 --- a/aten/src/TH/generic/THVector.h +++ b/aten/src/TH/generic/THVector.h @@ -17,10 +17,10 @@ TH_API void THVector_(cdiv)(scalar_t *z, const scalar_t *x, const scalar_t *y, c TH_API void THVector_(divs)(scalar_t *y, const scalar_t *x, const scalar_t c, const ptrdiff_t n); TH_API void THVector_(neg)(scalar_t *y, const scalar_t *x, const ptrdiff_t n); TH_API void THVector_(normal_fill)(scalar_t *data, - const int64_t size, - struct THGenerator *generator, - const scalar_t mean, - const scalar_t stddev); + const int64_t size, + struct THGenerator *generator, + const scalar_t mean, + const scalar_t stddev); #endif /* non bool only part */ diff --git a/aten/src/TH/vector/VSX.cpp b/aten/src/TH/vector/VSX.cpp index ddffd2a35cb4e4..f95e2f091b930a 100644 --- a/aten/src/TH/vector/VSX.cpp +++ b/aten/src/TH/vector/VSX.cpp @@ -1342,7 +1342,7 @@ static void THFloatVector_divs_VSX(float *y, const float*x, const float c, const // $ gcc VSX.c -O2 -D RUN_VSX_TESTS -o vsxtest // $ ./vsxtest // -// TODO +// TODO // // // Finished running all tests. All tests PASSED. diff --git a/aten/src/TH/vector/simd.h b/aten/src/TH/vector/simd.h index de277935a64b29..28a0e1d40bb662 100644 --- a/aten/src/TH/vector/simd.h +++ b/aten/src/TH/vector/simd.h @@ -119,7 +119,7 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t * #else uint32_t a = *eax, b, c = *ecx, d; asm volatile ( "cpuid\n\t" - : "+a"(a), "=b"(b), "+c"(c), "=d"(d) ); + : "+a"(a), "=b"(b), "+c"(c), "=d"(d) ); *eax = a; *ebx = b; *ecx = c; diff --git a/aten/src/THC/THCBlas.cu b/aten/src/THC/THCBlas.cu index e627658c100a2c..39048730a6ee8a 100644 --- a/aten/src/THC/THCBlas.cu +++ b/aten/src/THC/THCBlas.cu @@ -308,12 +308,12 @@ void THCudaBlas_Hgemm(THCState *state, char transa, char transb, int64_t m, int6 cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties(); if (prop->major >= 5){ THCublasCheck(cublasSetMathMode(handle, CUBLAS_TENSOR_OP_MATH)); - THCublasCheck(cublasGemmEx(handle, opa, opb, + THCublasCheck(cublasGemmEx(handle, opa, opb, i_m, i_n, i_k, &fAlpha, a, CUDA_R_16F, i_lda, b, CUDA_R_16F, i_ldb, &fBeta, c, CUDA_R_16F, i_ldc, CUDA_R_32F, CUBLAS_GEMM_DFALT_TENSOR_OP)); - THCublasCheck(cublasSetMathMode(handle, CUBLAS_DEFAULT_MATH)); + THCublasCheck(cublasSetMathMode(handle, CUBLAS_DEFAULT_MATH)); }else{ THCublasCheck(cublasSgemmEx(handle, opa, opb, i_m, i_n, i_k, &fAlpha, diff --git a/aten/src/THC/THCTensorMath.cuh b/aten/src/THC/THCTensorMath.cuh index b949dd291c2d50..4613fbf2cd48ed 100644 --- a/aten/src/THC/THCTensorMath.cuh +++ b/aten/src/THC/THCTensorMath.cuh @@ -48,7 +48,7 @@ inline bool getCatGrid(THCState* state, ptrdiff_t nTensors, dim3& grid) { //X dim of grid for cat array cooperates on a single tensor in the cat. //Given half of the GPU, full utilization will always occur. grid = dim3( 2LL * numSM, (long long) nTensors ); - + return true; } @@ -131,7 +131,7 @@ __global__ void CatArrayBatchedCopy( while( tid < nElements){ IndexType elementOffset = CatArrIndexToOffset::compute( - os.outputSize, os.outputStride, dimSize, concatDim, tid); + os.outputSize, os.outputStride, dimSize, concatDim, tid); output[dataOffset + elementOffset] = data[tid]; tid += stride; diff --git a/aten/src/THC/THCTensorRandom.cuh b/aten/src/THC/THCTensorRandom.cuh index 652f02a5680a5d..33c8d3c0eb9b54 100644 --- a/aten/src/THC/THCTensorRandom.cuh +++ b/aten/src/THC/THCTensorRandom.cuh @@ -79,7 +79,7 @@ condDiv(T *q, int64_t *J, int64_t inputsize, T q_max) { q[idx] = one; } else { if (THCNumerics::gt(q_max, one)) { - q[idx] = THCNumerics::div(q[idx], q_max); + q[idx] = THCNumerics::div(q[idx], q_max); } } } @@ -236,7 +236,7 @@ sampleMultinomialOnce(int64_t* dest, THCNumerics::div( ScalarConvert::to(dist[curDist * stride_dist + cat * stride_categories]), sum) : - accZero); + accZero); smem[threadIdx.x] = dist_val; __syncthreads(); diff --git a/aten/src/THC/generic/THCTensorMath.cu b/aten/src/THC/generic/THCTensorMath.cu index b6c322dc8a2f6d..22a00d858a6b15 100644 --- a/aten/src/THC/generic/THCTensorMath.cu +++ b/aten/src/THC/generic/THCTensorMath.cu @@ -42,7 +42,7 @@ THCTensor_(numel)(THCState *state, THCTensor *t) } void THCTensor_(cat)(THCState *state, THCTensor *result, - THCTensor *ta, THCTensor *tb, int dimension) + THCTensor *ta, THCTensor *tb, int dimension) { THCTensor* inputs[2]; inputs[0] = ta; @@ -73,7 +73,7 @@ inline void THCTensor_(check_shape_except_dim)(THCState *state, } void THCTensor_(catArray)(THCState *state, THCTensor *result, - THCTensor **inputs, int numInputs, int dimension) + THCTensor **inputs, int numInputs, int dimension) { // previously, size [0] tensors were the only possible empty tensors; thus, it wasn't possible // to cat empty tensors unless all the other tensors were 1-dimensional, so we allowed these tensors diff --git a/aten/src/THC/generic/THCTensorMathReduce.h b/aten/src/THC/generic/THCTensorMathReduce.h index 0bb6c31ed972e0..ae402e28a4c272 100644 --- a/aten/src/THC/generic/THCTensorMathReduce.h +++ b/aten/src/THC/generic/THCTensorMathReduce.h @@ -34,9 +34,9 @@ THC_API scalar_t THCTensor_(maxall)(THCState *state, THCTensor *self); THC_API scalar_t THCTensor_(medianall)(THCState *state, THCTensor *self); THC_API void THCTensor_(median)(THCState *state, - THCTensor *values, - THCudaLongTensor *indices, - THCTensor *src, int dim, int keepdim); + THCTensor *values, + THCudaLongTensor *indices, + THCTensor *src, int dim, int keepdim); THC_API accreal THCTensor_(dist)(THCState *state, THCTensor *self, THCTensor *src, scalar_t value); diff --git a/aten/src/THC/generic/THCTensorRandom.cu b/aten/src/THC/generic/THCTensorRandom.cu index 842259b0e6c1f0..0ee87a27abb899 100644 --- a/aten/src/THC/generic/THCTensorRandom.cu +++ b/aten/src/THC/generic/THCTensorRandom.cu @@ -249,7 +249,7 @@ void THCTensor_(multinomial)(struct THCState *state, THCudaLongTensor_data(state, self), numDist, numCategories, THCTensor_(data)(state, prefixSum), - THCTensor_(data)(state, normDist)); + THCTensor_(data)(state, normDist)); } else { // Sample without replacement diff --git a/aten/src/THCUNN/LogSigmoid.cu b/aten/src/THCUNN/LogSigmoid.cu index 7191ceb345848f..9be29918f82f46 100644 --- a/aten/src/THCUNN/LogSigmoid.cu +++ b/aten/src/THCUNN/LogSigmoid.cu @@ -7,12 +7,12 @@ #define ZERO_MACRO zero() template inline __device__ typename std::enable_if::value, T>::type zero() { - return 0.; + return 0.; } template inline __device__ typename std::enable_if::value, T>::type zero() { - return 0.f; + return 0.f; } #else #define ZERO_MACRO 0.f diff --git a/aten/src/THCUNN/LookupTable.cu b/aten/src/THCUNN/LookupTable.cu index 1eeaedc221b8d7..0fea265d4e9b97 100644 --- a/aten/src/THCUNN/LookupTable.cu +++ b/aten/src/THCUNN/LookupTable.cu @@ -88,7 +88,7 @@ __global__ void cunn_LookupTable_accGradParametersKernelByFeature #else first_remaining_peer = __ffs(matchmask) - 1; #endif - my_s[threadIdx.x] += smem[threadIdx.x + WARP_SIZE*first_remaining_peer]; + my_s[threadIdx.x] += smem[threadIdx.x + WARP_SIZE*first_remaining_peer]; matchmask ^= (1 << first_remaining_peer); } if(f < s) diff --git a/aten/src/THCUNN/LookupTableBag.cu b/aten/src/THCUNN/LookupTableBag.cu index 993192ab79fb3a..bd552b1354b771 100644 --- a/aten/src/THCUNN/LookupTableBag.cu +++ b/aten/src/THCUNN/LookupTableBag.cu @@ -49,14 +49,14 @@ __global__ void cunn_LookupTableBag_updateOutputKernel( for (int64_t emb = begin; emb < end; emb++) { const int weightRow = ((int) input[emb]) * stride; weightFeatSum += ScalarConvert::to(weightFeat[weightRow]); - bag_size_ ++; + bag_size_ ++; if (featureDim == 0) { offset2bag[emb] = bag; } } if (mode == MODE_MEAN) { - weightFeatSum = weightFeatSum / ScalarConvert::to(bag_size_); - bag_size[bag] = bag_size_; + weightFeatSum = weightFeatSum / ScalarConvert::to(bag_size_); + bag_size[bag] = bag_size_; } (void) MODE_SUM; //silence warnings about unused MODE_SUM; output[bag * stride + featureDim] = ScalarConvert::to(weightFeatSum); @@ -114,9 +114,9 @@ __global__ void cunn_LookupTableBag_accGradParametersKernel( if (featureDim < stride) { gradient[ii] = ScalarConvert::to(gradOutput[gradOutputRow + featureDim]); - if (mode == MODE_MEAN) { - gradient[ii] /= bag_size[seq_number]; - } + if (mode == MODE_MEAN) { + gradient[ii] /= bag_size[seq_number]; + } weight[ii] = ScalarConvert::to(gradWeight[weightRow + featureDim]); } } diff --git a/aten/src/THCUNN/SpatialUpSamplingNearest.cu b/aten/src/THCUNN/SpatialUpSamplingNearest.cu index ae650f5c8c459d..13777b2bb28af6 100644 --- a/aten/src/THCUNN/SpatialUpSamplingNearest.cu +++ b/aten/src/THCUNN/SpatialUpSamplingNearest.cu @@ -16,9 +16,9 @@ template C10_LAUNCH_BOUNDS_1(1024) #endif __global__ void nearest_neighbor_4d_kernel( - const int n, - const THCDeviceTensor data1, - THCDeviceTensor data2) { + const int n, + const THCDeviceTensor data1, + THCDeviceTensor data2) { int index = threadIdx.x + blockIdx.x * blockDim.x; const int batchsize = data1.getSize(0); const int channels = data1.getSize(1); @@ -37,10 +37,10 @@ __global__ void nearest_neighbor_4d_kernel( const int h1 = h2; const int w1 = w2; for (int n = 0; n < batchsize; n++) { - for (int c = 0; c < channels; ++c) { - const Dtype val = data1[n][c][h1][w1]; - data2[n][c][h2][w2] = val; - } + for (int c = 0; c < channels; ++c) { + const Dtype val = data1[n][c][h1][w1]; + data2[n][c][h2][w2] = val; + } } return; } @@ -49,8 +49,8 @@ __global__ void nearest_neighbor_4d_kernel( const int w1 = nearest_neighbor_compute_source_index(width_scale, w2, width1); for (int n = 0; n < batchsize; n++) { for (int c = 0; c < channels; ++c) { - const Dtype val = data1[n][c][h1][w1]; - data2[n][c][h2][w2] = val; + const Dtype val = data1[n][c][h1][w1]; + data2[n][c][h2][w2] = val; } } } @@ -62,9 +62,9 @@ template C10_LAUNCH_BOUNDS_1(1024) #endif __global__ void nearest_neighbor_4d_kernel_backward( - const int n, - THCDeviceTensor data1, - const THCDeviceTensor data2) { + const int n, + THCDeviceTensor data1, + const THCDeviceTensor data2) { int index = threadIdx.x + blockIdx.x * blockDim.x; const int batchsize = data1.getSize(0); const int channels = data1.getSize(1); @@ -83,10 +83,10 @@ __global__ void nearest_neighbor_4d_kernel_backward( const int h1 = h2; const int w1 = w2; for (int n = 0; n < batchsize; n++) { - for (int c = 0; c < channels; ++c) { - const Dtype val = data2[n][c][h2][w2]; - data1[n][c][h1][w1] = val; - } + for (int c = 0; c < channels; ++c) { + const Dtype val = data2[n][c][h2][w2]; + data1[n][c][h1][w1] = val; + } } return; } diff --git a/aten/src/THCUNN/TemporalUpSamplingNearest.cu b/aten/src/THCUNN/TemporalUpSamplingNearest.cu index d2fd4ab552e45c..b10f5e1392e794 100644 --- a/aten/src/THCUNN/TemporalUpSamplingNearest.cu +++ b/aten/src/THCUNN/TemporalUpSamplingNearest.cu @@ -16,9 +16,9 @@ template C10_LAUNCH_BOUNDS_1(1024) #endif __global__ void nearest_neighbor_3d_kernel( - const int n, - const THCDeviceTensor data1, - THCDeviceTensor data2) { + const int n, + const THCDeviceTensor data1, + THCDeviceTensor data2) { int index = threadIdx.x + blockIdx.x * blockDim.x; const int batchsize = data1.getSize(0); const int channels = data1.getSize(1); @@ -32,10 +32,10 @@ __global__ void nearest_neighbor_3d_kernel( if (width1 == width2) { const int w1 = w2; for (int n = 0; n < batchsize; n++) { - for (int c = 0; c < channels; ++c) { - const Dtype val = data1[n][c][w1]; - data2[n][c][w2] = val; - } + for (int c = 0; c < channels; ++c) { + const Dtype val = data1[n][c][w1]; + data2[n][c][w2] = val; + } } return; } @@ -43,8 +43,8 @@ __global__ void nearest_neighbor_3d_kernel( const int w1 = nearest_neighbor_compute_source_index(scale, w2, width1); for (int n = 0; n < batchsize; n++) { for (int c = 0; c < channels; ++c) { - const Dtype val = data1[n][c][w1]; - data2[n][c][w2] = val; + const Dtype val = data1[n][c][w1]; + data2[n][c][w2] = val; } } } @@ -56,9 +56,9 @@ template C10_LAUNCH_BOUNDS_1(1024) #endif __global__ void nearest_neighbor_3d_kernel_backward( - const int n, - THCDeviceTensor data1, - const THCDeviceTensor data2) { + const int n, + THCDeviceTensor data1, + const THCDeviceTensor data2) { int index = threadIdx.x + blockIdx.x * blockDim.x; const int batchsize = data1.getSize(0); const int channels = data1.getSize(1); @@ -72,10 +72,10 @@ __global__ void nearest_neighbor_3d_kernel_backward( if (width1 == width2) { const int w1 = w2; for (int n = 0; n < batchsize; n++) { - for (int c = 0; c < channels; ++c) { - const Dtype val = data2[n][c][w1]; - data1[n][c][w2] = val; - } + for (int c = 0; c < channels; ++c) { + const Dtype val = data2[n][c][w1]; + data1[n][c][w2] = val; + } } return; } diff --git a/aten/src/THCUNN/VolumetricUpSamplingNearest.cu b/aten/src/THCUNN/VolumetricUpSamplingNearest.cu index 952694d7b6dfbf..8e98b400a0f040 100644 --- a/aten/src/THCUNN/VolumetricUpSamplingNearest.cu +++ b/aten/src/THCUNN/VolumetricUpSamplingNearest.cu @@ -16,9 +16,9 @@ template C10_LAUNCH_BOUNDS_1(1024) #endif __global__ void nearest_neighbor_5d_kernel( - const int n, - const THCDeviceTensor data1, - THCDeviceTensor data2) { + const int n, + const THCDeviceTensor data1, + THCDeviceTensor data2) { int index = threadIdx.x + blockIdx.x * blockDim.x; const int batchsize = data1.getSize(0); const int channels = data1.getSize(1); @@ -55,8 +55,8 @@ __global__ void nearest_neighbor_5d_kernel( const int d1 = nearest_neighbor_compute_source_index(depth_scale, d2, depth1); for (int n = 0; n < batchsize; n++) { for (int c = 0; c < channels; ++c) { - const Dtype val = data1[n][c][d1][h1][w1]; - data2[n][c][d2][h2][w2] = val; + const Dtype val = data1[n][c][d1][h1][w1]; + data2[n][c][d2][h2][w2] = val; } } } @@ -68,9 +68,9 @@ template C10_LAUNCH_BOUNDS_1(1024) #endif __global__ void nearest_neighbor_5d_kernel_backward( - const int n, - THCDeviceTensor data1, - const THCDeviceTensor data2) { + const int n, + THCDeviceTensor data1, + const THCDeviceTensor data2) { int index = threadIdx.x + blockIdx.x * blockDim.x; const int batchsize = data1.getSize(0); const int channels = data1.getSize(1); @@ -108,8 +108,8 @@ __global__ void nearest_neighbor_5d_kernel_backward( const int d1 = nearest_neighbor_compute_source_index(depth_scale, d2, depth1); for (int n = 0; n < batchsize; n++) { for (int c = 0; c < channels; ++c) { - const Dtype val = data2[n][c][d2][h2][w2]; - atomicAdd(data1[n][c][d1][h1][w1].data(), val); + const Dtype val = data2[n][c][d2][h2][w2]; + atomicAdd(data1[n][c][d1][h1][w1].data(), val); } } } diff --git a/aten/src/THCUNN/common.h b/aten/src/THCUNN/common.h index e8a98079b85b06..9e3ed7d85a62f6 100644 --- a/aten/src/THCUNN/common.h +++ b/aten/src/THCUNN/common.h @@ -24,7 +24,7 @@ inline int GET_BLOCKS(const int N) } #define THCUNN_check_shape(STATE, I1, I2) \ - if (I1 != NULL && I2 != NULL && !THCTensor_(isSameSizeAs)(STATE, I1, I2)) \ + if (I1 != NULL && I2 != NULL && !THCTensor_(isSameSizeAs)(STATE, I1, I2)) \ { \ THCDescBuff s1 = THCTensor_(sizeDesc)(STATE, I1); \ THCDescBuff s2 = THCTensor_(sizeDesc)(STATE, I2); \ @@ -47,20 +47,20 @@ inline int GET_BLOCKS(const int N) ptrdiff_t n1 = THCTensor_(nElement)(STATE, I1); \ ptrdiff_t n2 = THCTensor_(nElement)(STATE, I2); \ if (n1 != n2) \ - { \ + { \ THCDescBuff s1 = THCTensor_(sizeDesc)(state, I1); \ THCDescBuff s2 = THCTensor_(sizeDesc)(state, I2); \ - THError(#I1 " and " #I2 " have different number of elements: " \ + THError(#I1 " and " #I2 " have different number of elements: " \ #I1 "%s has %ld elements, while " \ #I2 "%s has %ld elements", s1.str, n1, s2.str, n2); \ - } \ + } \ } #define THCUNN_check_dim_size(STATE, T, DIM, DIM_SIZE, SIZE) \ if (THCTensor_(nDimensionLegacyNoScalars)(STATE, T) != DIM || \ THCTensor_(sizeLegacyNoScalars)(STATE, T, DIM_SIZE) != SIZE) { \ THCDescBuff s1 = THCTensor_(sizeDesc)(state, T); \ - THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ + THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \ } diff --git a/aten/src/THCUNN/generic/LookupTableBag.cu b/aten/src/THCUNN/generic/LookupTableBag.cu index 32dc2f20e482ff..a2735bdd46350f 100644 --- a/aten/src/THCUNN/generic/LookupTableBag.cu +++ b/aten/src/THCUNN/generic/LookupTableBag.cu @@ -10,7 +10,7 @@ void THNN_(LookupTableBag_updateOutput)( THCTensor *weight, THCTensor *output, THCIndexTensor *offset2bag, - int mode, + int mode, THCIndexTensor *bag_size) { THCUNN_assertSameGPU(state, 5, input, offsets, weight, output, offset2bag); @@ -65,8 +65,8 @@ void THNN_(LookupTableBag_accGradParameters)( THCIndexTensor *sortedIndices, THCIndexTensor *origIndices, bool scaleGradByFreq, - int mode, - THCIndexTensor *bag_size, + int mode, + THCIndexTensor *bag_size, accreal scale_) { scalar_t scale = ScalarConvert::to(scale_); diff --git a/aten/src/THCUNN/generic/SpatialConvolutionMM.cu b/aten/src/THCUNN/generic/SpatialConvolutionMM.cu index a66eec8f99f019..0ca36e6236b160 100644 --- a/aten/src/THCUNN/generic/SpatialConvolutionMM.cu +++ b/aten/src/THCUNN/generic/SpatialConvolutionMM.cu @@ -88,7 +88,7 @@ static THCTensor* THNN_(newViewWeightMM2d)(THCState *state, THCTensor *weight) { int64_t s2 = weight->size(1) * weight->size(2) * weight->size(3); THCTensor *old_weight = weight; weight = THCTensor_(newWithStorage2d)(state, THTensor_getStoragePtr(weight), weight->storage_offset(), - s1, -1, s2, -1); + s1, -1, s2, -1); THCTensor_(free)(state, old_weight); } return weight; diff --git a/aten/src/THCUNN/generic/SpatialDilatedConvolution.cu b/aten/src/THCUNN/generic/SpatialDilatedConvolution.cu index 4d5c6d7414d305..2aeb4e49cb5e69 100644 --- a/aten/src/THCUNN/generic/SpatialDilatedConvolution.cu +++ b/aten/src/THCUNN/generic/SpatialDilatedConvolution.cu @@ -11,7 +11,7 @@ static inline void THNN_(SpatialDilatedConvolution_shapeCheck)( int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, int weight_nullable) { THArgCheck(kW > 0 && kH > 0, 9, - "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); THArgCheck(dilationW > 0 && dilationH > 0, 14, diff --git a/aten/src/THCUNN/generic/SpatialUpSamplingNearest.cu b/aten/src/THCUNN/generic/SpatialUpSamplingNearest.cu index 30dd5cde1d72e2..85a7b831561d78 100644 --- a/aten/src/THCUNN/generic/SpatialUpSamplingNearest.cu +++ b/aten/src/THCUNN/generic/SpatialUpSamplingNearest.cu @@ -34,7 +34,7 @@ void THNN_(SpatialUpSamplingNearest_updateOutput)( THCState *state, THCTensor *input, THCTensor *output, - int outputHeight, + int outputHeight, int outputWidth) { THCUNN_assertSameGPU(state, 2, input, output); @@ -44,14 +44,14 @@ void THNN_(SpatialUpSamplingNearest_updateOutput)( int inputWidth = THCTensor_(size)(state, input, 3); THNN_(SpatialUpSamplingNearest_shapeCheck)(state, input, NULL, nbatch, channels, - inputHeight, inputWidth, - outputHeight, outputWidth); + inputHeight, inputWidth, + outputHeight, outputWidth); THAssert(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0); THCTensor_(resize4d)(state, output, THCTensor_(size)(state, input, 0), THCTensor_(size)(state, input, 1), - outputHeight, + outputHeight, outputWidth); THCTensor_(zero)(state, output); @@ -62,7 +62,7 @@ void THNN_(SpatialUpSamplingNearest_updateOutput)( const int num_threads = at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock; cudaStream_t stream = THCState_getCurrentStream(state); nearest_neighbor_4d_kernel <<>>(num_kernels, idata, odata); + 0, stream>>>(num_kernels, idata, odata); THCudaCheck(cudaGetLastError()); } @@ -73,15 +73,15 @@ void THNN_(SpatialUpSamplingNearest_updateGradInput)( THCTensor *gradOutput, THCTensor *gradInput, int nbatch, - int nchannels, - int inputHeight, - int inputWidth, - int outputHeight, - int outputWidth) + int nchannels, + int inputHeight, + int inputWidth, + int outputHeight, + int outputWidth) { THCUNN_assertSameGPU(state, 2, gradOutput, gradInput); THNN_(SpatialUpSamplingNearest_shapeCheck)(state, NULL, gradOutput, nbatch, nchannels, - inputHeight, inputWidth, outputHeight, outputWidth); + inputHeight, inputWidth, outputHeight, outputWidth); gradOutput = THCTensor_(newContiguous)(state, gradOutput); THCTensor_(resize4d)(state, gradInput, nbatch, nchannels, inputHeight, inputWidth); @@ -94,7 +94,7 @@ void THNN_(SpatialUpSamplingNearest_updateGradInput)( cudaStream_t stream = THCState_getCurrentStream(state); nearest_neighbor_4d_kernel_backward <<>>(num_kernels, data1, data2); + num_threads, 0, stream>>>(num_kernels, data1, data2); THCudaCheck(cudaGetLastError()); THCTensor_(free)(state, gradOutput); } diff --git a/aten/src/THCUNN/generic/THCUNN.h b/aten/src/THCUNN/generic/THCUNN.h index 0bba1124518b52..9dc879bed63bcf 100644 --- a/aten/src/THCUNN/generic/THCUNN.h +++ b/aten/src/THCUNN/generic/THCUNN.h @@ -243,7 +243,7 @@ THC_API void THNN_(LookupTableBag_updateOutput)( THCTensor *weight, THCTensor *output, THCIndexTensor *offset2bag, - int mode, + int mode, THCIndexTensor *seq_length); // [OPTIONAL] THC_API void THNN_(LookupTableBag_accGradParameters)( @@ -256,8 +256,8 @@ THC_API void THNN_(LookupTableBag_accGradParameters)( THCIndexTensor *sortedIndices, THCIndexTensor *origIndices, bool scaleGradByFreq, - int mode, - THCIndexTensor *seq_length, // [OPTIONAL] + int mode, + THCIndexTensor *seq_length, // [OPTIONAL] accreal scale_); THC_API void THNN_(L1Cost_updateOutput)( diff --git a/aten/src/THCUNN/generic/TemporalUpSamplingNearest.cu b/aten/src/THCUNN/generic/TemporalUpSamplingNearest.cu index 26cfc00655898d..1658b180c85aad 100644 --- a/aten/src/THCUNN/generic/TemporalUpSamplingNearest.cu +++ b/aten/src/THCUNN/generic/TemporalUpSamplingNearest.cu @@ -54,7 +54,7 @@ void THNN_(TemporalUpSamplingNearest_updateOutput)( const int num_threads = at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock; cudaStream_t stream = THCState_getCurrentStream(state); nearest_neighbor_3d_kernel <<>>(num_kernels, idata, odata); + 0, stream>>>(num_kernels, idata, odata); THCudaCheck(cudaGetLastError()); } @@ -82,7 +82,7 @@ void THNN_(TemporalUpSamplingNearest_updateGradInput)( cudaStream_t stream = THCState_getCurrentStream(state); nearest_neighbor_3d_kernel_backward <<>>(num_kernels, data1, data2); + num_threads, 0, stream>>>(num_kernels, data1, data2); THCudaCheck(cudaGetLastError()); THCTensor_(free)(state, gradOutput); diff --git a/aten/src/THCUNN/generic/VolumetricUpSamplingNearest.cu b/aten/src/THCUNN/generic/VolumetricUpSamplingNearest.cu index a9afc8b9cbbfab..7b3a142876f340 100644 --- a/aten/src/THCUNN/generic/VolumetricUpSamplingNearest.cu +++ b/aten/src/THCUNN/generic/VolumetricUpSamplingNearest.cu @@ -47,10 +47,10 @@ void THNN_(VolumetricUpSamplingNearest_updateOutput)( int inputWidth = THCTensor_(size)(state, input, 4); THNN_(VolumetricUpSamplingNearest_shapeCheck)(state, input, NULL, nbatch, channels, - inputDepth, inputHeight, inputWidth, - outputDepth, outputHeight, outputWidth); + inputDepth, inputHeight, inputWidth, + outputDepth, outputHeight, outputWidth); THAssert(inputDepth > 0 && inputHeight > 0 && inputWidth > 0 && - outputDepth > 0 && outputHeight > 0 && outputWidth > 0); + outputDepth > 0 && outputHeight > 0 && outputWidth > 0); THCTensor_(resize5d)(state, output, THCTensor_(size)(state, input, 0), @@ -67,7 +67,7 @@ void THNN_(VolumetricUpSamplingNearest_updateOutput)( const int num_threads = at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock; cudaStream_t stream = THCState_getCurrentStream(state); nearest_neighbor_5d_kernel <<>>(num_kernels, idata, odata); + 0, stream>>>(num_kernels, idata, odata); THCudaCheck(cudaGetLastError()); } @@ -88,8 +88,8 @@ void THNN_(VolumetricUpSamplingNearest_updateGradInput)( { THCUNN_assertSameGPU(state, 2, gradOutput, gradInput); THNN_(VolumetricUpSamplingNearest_shapeCheck)(state, NULL, gradOutput, nbatch, nchannels, - inputDepth, inputHeight, inputWidth, - outputDepth, outputHeight, outputWidth); + inputDepth, inputHeight, inputWidth, + outputDepth, outputHeight, outputWidth); gradOutput = THCTensor_(newContiguous)(state, gradOutput); THCTensor_(resize5d)(state, gradInput, nbatch, nchannels, inputDepth, inputHeight, inputWidth); @@ -100,7 +100,7 @@ void THNN_(VolumetricUpSamplingNearest_updateGradInput)( const int num_threads = at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock; cudaStream_t stream = THCState_getCurrentStream(state); nearest_neighbor_5d_kernel_backward <<>>(num_kernels, data1, data2); + num_threads, 0, stream>>>(num_kernels, data1, data2); THCudaCheck(cudaGetLastError()); THCTensor_(free)(state, gradOutput); } diff --git a/aten/src/THCUNN/upsampling.h b/aten/src/THCUNN/upsampling.h index 7a7c45d0c6a7db..313888d208431c 100644 --- a/aten/src/THCUNN/upsampling.h +++ b/aten/src/THCUNN/upsampling.h @@ -36,7 +36,7 @@ static Acctype linear_upsampling_compute_source_index( __device__ __forceinline__ static int nearest_neighbor_compute_source_index( - const float scale, int dst_index, int inputSize) { + const float scale, int dst_index, int inputSize) { const int src_index = MIN(floor(dst_index * scale), inputSize - 1); return src_index; } diff --git a/aten/src/THNN/generic/BCECriterion.c b/aten/src/THNN/generic/BCECriterion.c index 1b5b475fab276c..4523f068f9ca9f 100644 --- a/aten/src/THNN/generic/BCECriterion.c +++ b/aten/src/THNN/generic/BCECriterion.c @@ -29,16 +29,16 @@ void THNN_(BCECriterion_updateOutput)( scalar_t y = *target_data; THAssertMsg(x >= 0. && x <= 1., "input value should be between 0~1, but got %f", - (double) x); - *output_data = -(safe_log(x) * y + safe_log(1. - x) * (1. - y)); + (double) x); + *output_data = -(safe_log(x) * y + safe_log(1. - x) * (1. - y)); ); - if (weights) { + if (weights) { THTensor_(cmul)(output, output, weights); } return; } - THTensor_(resize0d)(output); + THTensor_(resize0d)(output); scalar_t sum = 0; if (weights) { @@ -48,7 +48,7 @@ void THNN_(BCECriterion_updateOutput)( scalar_t w = *weights_data; THAssertMsg(x >= 0. && x <= 1., "input value should be between 0~1, but got %f", - (double) x); + (double) x); sum -= (safe_log(x) * y + safe_log(1. - x) * (1. - y)) * w; ); } else { @@ -57,7 +57,7 @@ void THNN_(BCECriterion_updateOutput)( scalar_t y = *target_data; THAssertMsg(x >= 0. && x <= 1., "input value should be between 0~1, but got %f", - (double) x); + (double) x); sum -= safe_log(x) * y + safe_log(1. - x) * (1. - y); ); } diff --git a/aten/src/THNN/generic/ClassNLLCriterion.c b/aten/src/THNN/generic/ClassNLLCriterion.c index 16f58eecafb2b6..fb2877a151565d 100644 --- a/aten/src/THNN/generic/ClassNLLCriterion.c +++ b/aten/src/THNN/generic/ClassNLLCriterion.c @@ -25,7 +25,7 @@ void THNN_(ClassNLLCriterion_updateOutput)( if (weights && THTensor_(nElement)(weights) != n_classes) { THDescBuff s1 = THTensor_(sizeDesc)(weights); THError("weight tensor should be defined either for all %d classes or no classes" - " but got weight tensor of shape: %s", n_classes, s1.str); + " but got weight tensor of shape: %s", n_classes, s1.str); } if (reduction == Reduction::None && n_dims == 2) { @@ -39,8 +39,8 @@ void THNN_(ClassNLLCriterion_updateOutput)( int cur_target = THLongTensor_fastGetLegacy1dNoScalars(target, i); if (cur_target == ignore_index) { - THTensor_(fastSet1d)(output, i, 0.0f); - continue; + THTensor_(fastSet1d)(output, i, 0.0f); + continue; } if (cur_target >= 0 && cur_target < n_classes) { scalar_t cur_weight = weights ? THTensor_(fastGetLegacy1dNoScalars)(weights, cur_target) : 1.0f; diff --git a/aten/src/THNN/generic/MultiMarginCriterion.c b/aten/src/THNN/generic/MultiMarginCriterion.c index 4d5883fbb8eb82..34ecf26b8ac0d8 100644 --- a/aten/src/THNN/generic/MultiMarginCriterion.c +++ b/aten/src/THNN/generic/MultiMarginCriterion.c @@ -40,7 +40,7 @@ void THNN_(MultiMarginCriterion_updateOutput)( { THIndex_t idx = THIndexTensor_(get1d)(target, t); THArgCheck((idx >= 0) && (idx < dim), 3, - "target out of range"); + "target out of range"); } input = THTensor_(newContiguous)(input); diff --git a/aten/src/THNN/generic/SpatialAdaptiveMaxPooling.c b/aten/src/THNN/generic/SpatialAdaptiveMaxPooling.c index 026223b948a90b..7d6f6e063b256b 100644 --- a/aten/src/THNN/generic/SpatialAdaptiveMaxPooling.c +++ b/aten/src/THNN/generic/SpatialAdaptiveMaxPooling.c @@ -98,7 +98,7 @@ void THNN_(SpatialAdaptiveMaxPooling_updateOutput)( THNN_ARGCHECK(!input->is_empty() && (input->dim() == 3 || input->dim() == 4), 2, input, - "non-empty 3D or 4D (batch mode) tensor expected for input, but got: %s"); + "non-empty 3D or 4D (batch mode) tensor expected for input, but got: %s"); if (input->dim() == 4) { diff --git a/aten/src/THNN/generic/SpatialAveragePooling.c b/aten/src/THNN/generic/SpatialAveragePooling.c index df039ee285e53c..f399bdb5a7a860 100644 --- a/aten/src/THNN/generic/SpatialAveragePooling.c +++ b/aten/src/THNN/generic/SpatialAveragePooling.c @@ -6,9 +6,9 @@ #include static inline void THNN_(SpatialAveragePooling_shapeCheck)( - THTensor *input, THTensor *gradOutput, - int kH, int kW, int dH, int dW, int padH, int padW, - bool ceil_mode) { + THTensor *input, THTensor *gradOutput, + int kH, int kW, int dH, int dW, int padH, int padW, + bool ceil_mode) { THArgCheck(kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); @@ -27,12 +27,12 @@ static inline void THNN_(SpatialAveragePooling_shapeCheck)( } THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, - "non-empty 3D or 4D input tensor expected but got: %s"); + "non-empty 3D or 4D input tensor expected but got: %s"); THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, - "pad should be smaller than half of kernel size, but got " - "padW = %d, padH = %d, kW = %d, kH = %d", - padW, padH, kW, kH); + "pad should be smaller than half of kernel size, but got " + "padW = %d, padH = %d, kW = %d, kH = %d", + padW, padH, kW, kH); int64_t nInputPlane = input->size(dimh-1); int64_t inputHeight = input->size(dimh); @@ -44,7 +44,7 @@ static inline void THNN_(SpatialAveragePooling_shapeCheck)( if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). " - "Calculated output size: (%dx%dx%d). Output size is too small", + "Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); if (gradOutput != NULL) { diff --git a/aten/src/THNN/generic/SpatialClassNLLCriterion.c b/aten/src/THNN/generic/SpatialClassNLLCriterion.c index 53b3abcece8069..c35cd4629cdc3f 100644 --- a/aten/src/THNN/generic/SpatialClassNLLCriterion.c +++ b/aten/src/THNN/generic/SpatialClassNLLCriterion.c @@ -4,12 +4,12 @@ #define INITIAL_CHECK \ THArgCheck(THIndexTensor_(nDimensionLegacyAll)(target) == 3, 3, \ - "only batches of spatial targets supported (3D tensors)" \ - " but got targets of dimension: %d", \ - THIndexTensor_(nDimensionLegacyAll)(target)); \ - THArgCheck(THTensor_(nDimensionLegacyAll)(input) == 4, 2, \ - "only batches of spatial inputs supported (4D tensors), " \ - "but got input of dimension: %d", THTensor_(nDimensionLegacyAll)(input)); \ + "only batches of spatial targets supported (3D tensors)" \ + " but got targets of dimension: %d", \ + THIndexTensor_(nDimensionLegacyAll)(target)); \ + THArgCheck(THTensor_(nDimensionLegacyAll)(input) == 4, 2, \ + "only batches of spatial inputs supported (4D tensors), " \ + "but got input of dimension: %d", THTensor_(nDimensionLegacyAll)(input)); \ if (weights && THTensor_(nElement)(weights) != THTensor_(size)(input, 1)) { \ THError("weight tensor should be defined either for all or no classes"); \ } \ @@ -30,8 +30,8 @@ #define GRADOUTPUT_SHAPE_CHECK \ THArgCheck(THTensor_(nDimensionLegacyAll)(gradOutput) == 3, 3, \ "gradOutput must have same dimension as target (3)" \ - " but got dimension: %d", \ - THTensor_(nDimensionLegacyAll)(gradOutput)); \ + " but got dimension: %d", \ + THTensor_(nDimensionLegacyAll)(gradOutput)); \ { \ int64_t gradOutput0 = THTensor_(size)(gradOutput, 0); \ int64_t gradOutput1 = THTensor_(size)(gradOutput, 1); \ diff --git a/aten/src/THNN/generic/SpatialConvolutionMM.c b/aten/src/THNN/generic/SpatialConvolutionMM.c index a8fc2771f130d4..597a901e354e22 100644 --- a/aten/src/THNN/generic/SpatialConvolutionMM.c +++ b/aten/src/THNN/generic/SpatialConvolutionMM.c @@ -5,14 +5,14 @@ #include static inline void THNN_(SpatialConvolutionMM_shapeCheck)( - THTensor *input, THTensor *gradOutput, - THTensor *weight, THTensor *bias, - int kH, int kW, int dH, int dW, int padH, int padW, int weight_nullable) { + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int kH, int kW, int dH, int dW, int padH, int padW, int weight_nullable) { THArgCheck(kW > 0 && kH > 0, 9, - "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); + "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 11, - "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); if (weight != NULL) { THNN_ARGCHECK(!weight->is_empty() && (weight->dim() == 2 || weight->dim() == 4), 5, weight, @@ -36,7 +36,7 @@ static inline void THNN_(SpatialConvolutionMM_shapeCheck)( } THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, - "non-empty 3D or 4D input tensor expected but got: %s"); + "non-empty 3D or 4D input tensor expected but got: %s"); int64_t inputHeight = input->size(dimh); int64_t inputWidth = input->size(dimw); @@ -87,8 +87,8 @@ static THTensor* THNN_(newViewWeightMM2d)(THTensor *weight) { int64_t s2 = weight->size(1) * weight->size(2) * weight->size(3); THTensor *old_weight = weight; weight = THTensor_(newWithStorage2d)(THTensor_getStoragePtr(weight), weight->storage_offset(), - s1, -1, s2, -1); - c10::raw::intrusive_ptr::decref(old_weight); + s1, -1, s2, -1); + c10::raw::intrusive_ptr::decref(old_weight); } return weight; } @@ -116,8 +116,8 @@ static void THNN_(SpatialConvolutionMM_updateOutput_frame)( THTensor *output2d; THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, - nInputPlane, inputWidth, inputHeight, - outputWidth, outputHeight); + nInputPlane, inputWidth, inputHeight, + outputWidth, outputHeight); output2d = THTensor_(newWithStorage2d)(THTensor_getStoragePtr(output), output->storage_offset(), nOutputPlane, -1, @@ -125,8 +125,8 @@ static void THNN_(SpatialConvolutionMM_updateOutput_frame)( if (bias) { for(i = 0; i < nOutputPlane; i++) THVector_(fill) - (THStorage_(data)(THTensor_getStoragePtr(output)) + output->storage_offset() + output->stride(0) * i, - THTensor_(get1d)(bias, i), outputHeight*outputWidth); + (THStorage_(data)(THTensor_getStoragePtr(output)) + output->storage_offset() + output->stride(0) * i, + THTensor_(get1d)(bias, i), outputHeight*outputWidth); } else { THTensor_(zero)(output); } @@ -202,10 +202,10 @@ void THNN_(SpatialConvolutionMM_updateOutput)( THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); THNN_(SpatialConvolutionMM_updateOutput_frame) - (input_t, output_t, weight, bias, finput_t, - kW, kH, dW, dH, padW, padH, - nInputPlane, inputWidth, inputHeight, - nOutputPlane, outputWidth, outputHeight); + (input_t, output_t, weight, bias, finput_t, + kW, kH, dW, dH, padW, padH, + nInputPlane, inputWidth, inputHeight, + nOutputPlane, outputWidth, outputHeight); c10::raw::intrusive_ptr::decref(input_t); c10::raw::intrusive_ptr::decref(output_t); @@ -239,9 +239,9 @@ static void THNN_(SpatialConvolutionMM_updateGradInput_frame)( THTensor_(zero)(gradInput); THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, - padW, padH, - gradInput->size(0), gradInput->size(2), gradInput->size(1), - gradOutput->size(2), gradOutput->size(1)); + padW, padH, + gradInput->size(0), gradInput->size(2), gradInput->size(1), + gradOutput->size(2), gradOutput->size(1)); } void THNN_(SpatialConvolutionMM_updateGradInput)( @@ -280,8 +280,8 @@ void THNN_(SpatialConvolutionMM_updateGradInput)( if(input->dim() == 3) { THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, - tweight, fgradInput, - kW, kH, dW, dH, padW, padH); + tweight, fgradInput, + kW, kH, dW, dH, padW, padH); } else { @@ -296,8 +296,8 @@ void THNN_(SpatialConvolutionMM_updateGradInput)( THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, - tweight, fgradInput_t, - kW, kH, dW, dH, padW, padH); + tweight, fgradInput_t, + kW, kH, dW, dH, padW, padH); c10::raw::intrusive_ptr::decref(gradInput_t); c10::raw::intrusive_ptr::decref(gradOutput_t); @@ -380,7 +380,7 @@ void THNN_(SpatialConvolutionMM_accGradParameters)( if(input->dim() == 3) { THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, - gradBias, finput, scale); + gradBias, finput, scale); } else { @@ -396,7 +396,7 @@ void THNN_(SpatialConvolutionMM_accGradParameters)( } THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, - gradBias, finput_t, scale); + gradBias, finput_t, scale); c10::raw::intrusive_ptr::decref(gradOutput_t); if (gradWeight) { diff --git a/aten/src/THNN/generic/SpatialDilatedConvolution.c b/aten/src/THNN/generic/SpatialDilatedConvolution.c index 7496e5017082bb..4ad1ffec165543 100644 --- a/aten/src/THNN/generic/SpatialDilatedConvolution.c +++ b/aten/src/THNN/generic/SpatialDilatedConvolution.c @@ -5,10 +5,10 @@ #include static inline void THNN_(SpatialDilatedConvolution_shapeCheck)( - THTensor *input, THTensor *gradOutput, - THTensor *weight, THTensor *bias, - int kH, int kW, int dH, int dW, int padH, int padW, - int dilationH, int dilationW, int weight_nullable) { + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int kH, int kW, int dH, int dW, int padH, int padW, + int dilationH, int dilationW, int weight_nullable) { THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 11, @@ -40,7 +40,7 @@ static inline void THNN_(SpatialDilatedConvolution_shapeCheck)( } THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, - "non-empty 3D or 4D input tensor expected but got: %s"); + "non-empty 3D or 4D input tensor expected but got: %s"); int64_t inputHeight = input->size(dimh); int64_t inputWidth = input->size(dimw); @@ -235,7 +235,7 @@ void THNN_(SpatialDilatedConvolution_updateGradInput)( is_batch = 0; THTensor_(resize4d)(input, 1, input->size(0), input->size(1), input->size(2)); THTensor_(resize4d)(gradOutput, 1, gradOutput->size(0), gradOutput->size(1), - gradOutput->size(2)); + gradOutput->size(2)); } int64_t inputWidth = input->size(3); @@ -342,7 +342,7 @@ void THNN_(SpatialDilatedConvolution_accGradParameters)( is_batch = 0; THTensor_(resize4d)(input, 1, input->size(0), input->size(1), input->size(2)); THTensor_(resize4d)(gradOutput, 1, gradOutput->size(0), - gradOutput->size(1), gradOutput->size(2)); + gradOutput->size(1), gradOutput->size(2)); } int64_t nInputPlane = input->size(1); diff --git a/aten/src/THNN/generic/SpatialDilatedMaxPooling.c b/aten/src/THNN/generic/SpatialDilatedMaxPooling.c index 6f434288a5b3ce..bd788016148332 100644 --- a/aten/src/THNN/generic/SpatialDilatedMaxPooling.c +++ b/aten/src/THNN/generic/SpatialDilatedMaxPooling.c @@ -6,9 +6,9 @@ #include static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)( - THTensor *input, THTensor *gradOutput, THIndexTensor *indices, - int kH, int kW, int dH, int dW, int padH, int padW, - int dilationH, int dilationW, bool ceil_mode) { + THTensor *input, THTensor *gradOutput, THIndexTensor *indices, + int kH, int kW, int dH, int dW, int padH, int padW, + int dilationH, int dilationW, bool ceil_mode) { THArgCheck(kW > 0 && kH > 0, 5, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); @@ -30,12 +30,12 @@ static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)( } THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, - "non-empty 3D or 4D input tensor expected but got: %s"); + "non-empty 3D or 4D input tensor expected but got: %s"); THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, - "pad should be smaller than half of kernel size, but got " - "padW = %d, padH = %d, kW = %d, kH = %d", - padW, padH, kW, kH); + "pad should be smaller than half of kernel size, but got " + "padW = %d, padH = %d, kW = %d, kH = %d", + padW, padH, kW, kH); int64_t nInputPlane = input->size(dimh-1); int64_t inputHeight = input->size(dimh); @@ -47,7 +47,7 @@ static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)( if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). " - "Calculated output size: (%dx%dx%d). Output size is too small", + "Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth); if (gradOutput != NULL) { @@ -221,16 +221,16 @@ void THNN_(SpatialDilatedMaxPooling_updateOutput)( for (p = 0; p < nbatch; p++) { THNN_(SpatialDilatedMaxPooling_updateOutput_frame) - (input_data+p*nInputPlane*inputWidth*inputHeight, - output_data+p*nInputPlane*outputWidth*outputHeight, - indices_data+p*nInputPlane*outputWidth*outputHeight, - nInputPlane, - inputWidth, inputHeight, - outputWidth, outputHeight, - kW, kH, dW, dH, - padW, padH, - dilationW, dilationH - ); + (input_data+p*nInputPlane*inputWidth*inputHeight, + output_data+p*nInputPlane*outputWidth*outputHeight, + indices_data+p*nInputPlane*outputWidth*outputHeight, + nInputPlane, + inputWidth, inputHeight, + outputWidth, outputHeight, + kW, kH, dW, dH, + padW, padH, + dilationW, dilationH + ); } } @@ -266,10 +266,10 @@ static void THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)( { /* retrieve position of max */ int64_t maxp = ind_p_k[i*outputWidth + j]; - if (maxp != -1) { - /* update gradient */ - gradInput_p_k[maxp] += gradOutput_p_k[i*outputWidth + j]; - } + if (maxp != -1) { + /* update gradient */ + gradInput_p_k[maxp] += gradOutput_p_k[i*outputWidth + j]; + } } } } @@ -350,13 +350,13 @@ void THNN_(SpatialDilatedMaxPooling_updateGradInput)( for (p = 0; p < nbatch; p++) { THNN_(SpatialDilatedMaxPooling_updateGradInput_frame) - (gradInput_data+p*nInputPlane*inputWidth*inputHeight, - gradOutput_data+p*nInputPlane*outputWidth*outputHeight, - indices_data+p*nInputPlane*outputWidth*outputHeight, - nInputPlane, - inputWidth, inputHeight, - outputWidth, outputHeight, - dW, dH); + (gradInput_data+p*nInputPlane*inputWidth*inputHeight, + gradOutput_data+p*nInputPlane*outputWidth*outputHeight, + indices_data+p*nInputPlane*outputWidth*outputHeight, + nInputPlane, + inputWidth, inputHeight, + outputWidth, outputHeight, + dW, dH); } } diff --git a/aten/src/THNN/generic/SpatialFullDilatedConvolution.c b/aten/src/THNN/generic/SpatialFullDilatedConvolution.c index 8994ec6af62f12..89021befa3ef12 100644 --- a/aten/src/THNN/generic/SpatialFullDilatedConvolution.c +++ b/aten/src/THNN/generic/SpatialFullDilatedConvolution.c @@ -3,15 +3,15 @@ #else static inline void THNN_(SpatialFullDilatedConvolution_shapeCheck)( - THTensor *input, THTensor *gradOutput, - THTensor *weight, THTensor *bias, - int kH, int kW, int dH, int dW, int padH, int padW, - int dilationH, int dilationW, int adjH, int adjW, int weight_nullable) { + THTensor *input, THTensor *gradOutput, + THTensor *weight, THTensor *bias, + int kH, int kW, int dH, int dW, int padH, int padW, + int dilationH, int dilationW, int adjH, int adjW, int weight_nullable) { THArgCheck(kW > 0 && kH > 0, 9, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 11, - "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); + "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); THArgCheck(dilationW > 0 && dilationH > 0, 15, "dilation should be greater than zero, but got dilationH: %d, dilationW: %d", dilationH, dilationW); @@ -41,7 +41,7 @@ static inline void THNN_(SpatialFullDilatedConvolution_shapeCheck)( } THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, - "non-empty 3D or 4D input tensor expected but got: %s"); + "non-empty 3D or 4D input tensor expected but got: %s"); int64_t inputHeight = input->size(dimh); int64_t inputWidth = input->size(dimw); @@ -50,8 +50,8 @@ static inline void THNN_(SpatialFullDilatedConvolution_shapeCheck)( if (outputWidth < 1 || outputHeight < 1) { THError("Given input size per channel: (%ld x %ld). " - "Calculated output size per channel: (%ld x %ld). Output size is too small", - inputHeight, inputWidth, outputHeight, outputWidth); + "Calculated output size per channel: (%ld x %ld). Output size is too small", + inputHeight, inputWidth, outputHeight, outputWidth); } if (weight != NULL) { diff --git a/aten/src/THNN/generic/SpatialMaxUnpooling.c b/aten/src/THNN/generic/SpatialMaxUnpooling.c index 83ca064c10b25e..2a28b837bc2eba 100644 --- a/aten/src/THNN/generic/SpatialMaxUnpooling.c +++ b/aten/src/THNN/generic/SpatialMaxUnpooling.c @@ -111,12 +111,12 @@ void THNN_(SpatialMaxUnpooling_updateOutput)( for (p = 0; p < nbatch; p++) { THNN_(SpatialMaxUnpooling_updateOutput_frame)( - input_data+p*nslices*iwidth*iheight, - output_data+p*nslices*owidth*oheight, - indices_data+p*nslices*iwidth*iheight, - nslices, - iwidth, iheight, - owidth, oheight); + input_data+p*nslices*iwidth*iheight, + output_data+p*nslices*owidth*oheight, + indices_data+p*nslices*iwidth*iheight, + nslices, + iwidth, iheight, + owidth, oheight); } } @@ -196,7 +196,7 @@ void THNN_(SpatialMaxUnpooling_updateGradInput)( if(owidth!=gradOutput->size(dimw) || oheight!=gradOutput->size(dimh)){ THError("Inconsistent gradOutput size. oheight= %d, owidth= %d, gradOutput: %dx%d", - oheight, owidth, gradOutput->size(dimh), gradOutput->size(dimw)); + oheight, owidth, gradOutput->size(dimh), gradOutput->size(dimw)); } /* get raw pointers */ diff --git a/aten/src/THNN/generic/TemporalRowConvolution.c b/aten/src/THNN/generic/TemporalRowConvolution.c index d1870ac18180fa..b9ce1b7339f775 100644 --- a/aten/src/THNN/generic/TemporalRowConvolution.c +++ b/aten/src/THNN/generic/TemporalRowConvolution.c @@ -3,467 +3,467 @@ #else static inline void THNN_(TemporalRowConvolution_shapeCheck)( - THNNState *state, - THTensor *input, - THTensor *gradOutput, - THTensor *weight, - THTensor *bias, - int kW, - int dW, - int padW) { - - THArgCheck(kW > 0, 5, - "kernel size should be greater than zero, but got kW: %d", kW); - THArgCheck(dW > 0, 6, - "stride should be greater than zero, but got dW: %d", dW); - THNN_ARGCHECK(!weight->is_empty() && weight->dim() == 3, 3, weight, - "non-empty 3D weight tensor expected, but got: %s"); + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *weight, + THTensor *bias, + int kW, + int dW, + int padW) { + + THArgCheck(kW > 0, 5, + "kernel size should be greater than zero, but got kW: %d", kW); + THArgCheck(dW > 0, 6, + "stride should be greater than zero, but got dW: %d", dW); + THNN_ARGCHECK(!weight->is_empty() && weight->dim() == 3, 3, weight, + "non-empty 3D weight tensor expected, but got: %s"); THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous"); THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous"); - if (bias != NULL) { - THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size(0)); - } + if (bias != NULL) { + THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size(0)); + } - // we're always looking at (possibly batch) x feats x seq - int ndim = input->dim(); - int dimF = 0; - int dimS = 1; + // we're always looking at (possibly batch) x feats x seq + int ndim = input->dim(); + int dimF = 0; + int dimS = 1; - if (ndim == 3) { - ++dimS; - ++dimF; - } + if (ndim == 3) { + ++dimS; + ++dimF; + } - THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 1, input, - "non-empty 2D or 3D (batch mode) input tensor expected, but got :%s"); + THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 1, input, + "non-empty 2D or 3D (batch mode) input tensor expected, but got :%s"); - int64_t inputFrameSize = THTensor_sizeLegacyNoScalars(weight, 0); - int64_t nInputFrame = input->size(dimS); - int64_t nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + int64_t inputFrameSize = THTensor_sizeLegacyNoScalars(weight, 0); + int64_t nInputFrame = input->size(dimS); + int64_t nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; - if (nOutputFrame < 1) { - THError("Given input size: (%d x %d). " - "Calculated output size: (%d x %d). Output size is too small", - inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame); - } + if (nOutputFrame < 1) { + THError("Given input size: (%d x %d). " + "Calculated output size: (%d x %d). Output size is too small", + inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame); + } - THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize); + THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize); - if (gradOutput != NULL) { - THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize); - THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame); - } + if (gradOutput != NULL) { + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize); + THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame); + } } static void THNN_(unfolded_acc_row)( - THTensor *finput, - THTensor *input, - int kW, - int dW, - int padW, - int64_t inputFrameSize, - int64_t nInputFrame, - int64_t nOutputFrame) { - - int64_t c; - scalar_t *input_data = input->data(); - scalar_t *finput_data = finput->data(); + THTensor *finput, + THTensor *input, + int kW, + int dW, + int padW, + int64_t inputFrameSize, + int64_t nInputFrame, + int64_t nOutputFrame) { + + int64_t c; + scalar_t *input_data = input->data(); + scalar_t *finput_data = finput->data(); // #pragma omp parallel for private(c) - for (c = 0; c < inputFrameSize; c++) { - int64_t kw, x; - int64_t ix = 0; - - for (kw = 0; kw < kW; kw++) { - scalar_t *src = finput_data - + c * (kW * nOutputFrame) - + kw * (nOutputFrame); - scalar_t *dst = input_data + c * (nInputFrame); - - ix = (size_t)(kw); - if (dW == 1) { - scalar_t *dst_slice = dst + (size_t)(ix); - THVector_(cadd)(dst_slice, dst_slice, src, 1, nOutputFrame); - } else { - for (x = 0; x < nOutputFrame; x++) { - scalar_t *dst_slice = dst + (size_t)(ix + x * dW); - THVector_(cadd)(dst_slice, dst_slice, - src + (size_t)(x), 1, 1); - } - } - } - } + for (c = 0; c < inputFrameSize; c++) { + int64_t kw, x; + int64_t ix = 0; + + for (kw = 0; kw < kW; kw++) { + scalar_t *src = finput_data + + c * (kW * nOutputFrame) + + kw * (nOutputFrame); + scalar_t *dst = input_data + c * (nInputFrame); + + ix = (size_t)(kw); + if (dW == 1) { + scalar_t *dst_slice = dst + (size_t)(ix); + THVector_(cadd)(dst_slice, dst_slice, src, 1, nOutputFrame); + } else { + for (x = 0; x < nOutputFrame; x++) { + scalar_t *dst_slice = dst + (size_t)(ix + x * dW); + THVector_(cadd)(dst_slice, dst_slice, + src + (size_t)(x), 1, 1); + } + } + } + } } static void THNN_(unfolded_copy_row)( - THTensor *finput, - THTensor *input, - int kW, - int dW, - int padW, - int64_t inputFrameSize, - int64_t nInputFrame, - int64_t nOutputFrame) { - - int64_t k; - scalar_t *input_data = input->data(); - scalar_t *finput_data = finput->data(); + THTensor *finput, + THTensor *input, + int kW, + int dW, + int padW, + int64_t inputFrameSize, + int64_t nInputFrame, + int64_t nOutputFrame) { + + int64_t k; + scalar_t *input_data = input->data(); + scalar_t *finput_data = finput->data(); // #pragma omp parallel for private(k) - for (k = 0; k < inputFrameSize * kW; k++) { - int64_t c = k / kW; - int64_t rest = k % kW; - int64_t kw = rest % kW; - int64_t x; - int64_t ix; - scalar_t *dst = finput_data + c * (kW * nOutputFrame) + kw * (nOutputFrame); - scalar_t *src = input_data + c * (nInputFrame); - - ix = (size_t)(kw); - if (dW == 1) { - memcpy(dst, src+(size_t)(ix), sizeof(scalar_t) * (nOutputFrame)); - } else { - for (x = 0; x < nOutputFrame; x++) { - memcpy(dst + (size_t)(x), src + (size_t)(ix + x * dW), - sizeof(scalar_t) * 1); - } - } - } + for (k = 0; k < inputFrameSize * kW; k++) { + int64_t c = k / kW; + int64_t rest = k % kW; + int64_t kw = rest % kW; + int64_t x; + int64_t ix; + scalar_t *dst = finput_data + c * (kW * nOutputFrame) + kw * (nOutputFrame); + scalar_t *src = input_data + c * (nInputFrame); + + ix = (size_t)(kw); + if (dW == 1) { + memcpy(dst, src+(size_t)(ix), sizeof(scalar_t) * (nOutputFrame)); + } else { + for (x = 0; x < nOutputFrame; x++) { + memcpy(dst + (size_t)(x), src + (size_t)(ix + x * dW), + sizeof(scalar_t) * 1); + } + } + } } static void THNN_(TemporalRowConvolution_updateOutput_frame)( - THTensor *input, - THTensor *output, - THTensor *weight, - THTensor *bias, - THTensor *finput, - int kW, - int dW, - int padW, - int64_t inputFrameSize, - int64_t nInputFrame, - int64_t nOutputFrame) { - - int64_t i; - - THTensor *output3d = THTensor_(newWithStorage3d)( - THTensor_getStoragePtr(output), output->storage_offset(), - inputFrameSize, -1, - 1, -1, - nOutputFrame, -1); - - THNN_(unfolded_copy_row)(finput, input, kW, dW, padW, - inputFrameSize, nInputFrame, nOutputFrame); - - THTensor_(zero)(output); - - if (bias != NULL) { - for (i = 0; i < inputFrameSize; i++) - THVector_(fill) - (THStorage_(data)(THTensor_getStoragePtr(output)) + output->storage_offset() - + output->stride(0) * i, - THTensor_(get1d)(bias, i), nOutputFrame); - } - - THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput); - - c10::raw::intrusive_ptr::decref(output3d); + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + int kW, + int dW, + int padW, + int64_t inputFrameSize, + int64_t nInputFrame, + int64_t nOutputFrame) { + + int64_t i; + + THTensor *output3d = THTensor_(newWithStorage3d)( + THTensor_getStoragePtr(output), output->storage_offset(), + inputFrameSize, -1, + 1, -1, + nOutputFrame, -1); + + THNN_(unfolded_copy_row)(finput, input, kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + + THTensor_(zero)(output); + + if (bias != NULL) { + for (i = 0; i < inputFrameSize; i++) + THVector_(fill) + (THStorage_(data)(THTensor_getStoragePtr(output)) + output->storage_offset() + + output->stride(0) * i, + THTensor_(get1d)(bias, i), nOutputFrame); + } + + THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput); + + c10::raw::intrusive_ptr::decref(output3d); } void THNN_(TemporalRowConvolution_updateOutput)( - THNNState *state, - THTensor *input, - THTensor *output, - THTensor *weight, - THTensor *bias, - THTensor *finput, - THTensor *fgradInput, // unused here but needed for Cuda - int kW, - int dW, - int padW, - bool featFirst) { - - int ndim = input->dim(); - - THTensor *tinput = NULL; - if (!featFirst) { - tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); - input = THTensor_(newContiguous)(tinput); - } else { - input = THTensor_(newContiguous)(input); - } - - THNN_(TemporalRowConvolution_shapeCheck)( - state, input, NULL, weight, bias, kW, dW, padW); - - int64_t inputFrameSize = THTensor_sizeLegacyNoScalars(weight, 0); - int64_t nInputFrame = input->size(ndim - 1); - int64_t nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; - - if (ndim == 2) { /* non-batch mode */ - - THTensor_(resize3d)(finput, inputFrameSize, kW, nOutputFrame); - THTensor_(resize2d)(output, inputFrameSize, nOutputFrame); - - THTensor_(zero)(finput); - THTensor_(zero)(output); - - THNN_(TemporalRowConvolution_updateOutput_frame) - (input, output, weight, bias, finput, - kW, dW, padW, - inputFrameSize, nInputFrame, nOutputFrame); - - } else { - int64_t T = input->size(0); - int64_t t; - - THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame); - THTensor_(resize3d)(output, T, inputFrameSize, nOutputFrame); - - THTensor_(zero)(finput); - THTensor_(zero)(output); + THNNState *state, + THTensor *input, + THTensor *output, + THTensor *weight, + THTensor *bias, + THTensor *finput, + THTensor *fgradInput, // unused here but needed for Cuda + int kW, + int dW, + int padW, + bool featFirst) { + + int ndim = input->dim(); + + THTensor *tinput = NULL; + if (!featFirst) { + tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); + input = THTensor_(newContiguous)(tinput); + } else { + input = THTensor_(newContiguous)(input); + } + + THNN_(TemporalRowConvolution_shapeCheck)( + state, input, NULL, weight, bias, kW, dW, padW); + + int64_t inputFrameSize = THTensor_sizeLegacyNoScalars(weight, 0); + int64_t nInputFrame = input->size(ndim - 1); + int64_t nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + + if (ndim == 2) { /* non-batch mode */ + + THTensor_(resize3d)(finput, inputFrameSize, kW, nOutputFrame); + THTensor_(resize2d)(output, inputFrameSize, nOutputFrame); + + THTensor_(zero)(finput); + THTensor_(zero)(output); + + THNN_(TemporalRowConvolution_updateOutput_frame) + (input, output, weight, bias, finput, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + + } else { + int64_t T = input->size(0); + int64_t t; + + THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame); + THTensor_(resize3d)(output, T, inputFrameSize, nOutputFrame); + + THTensor_(zero)(finput); + THTensor_(zero)(output); #pragma omp parallel for private(t) - for (t = 0; t < T; t++) { - THTensor *input_t = THTensor_(newSelect)(input, 0, t); - THTensor *output_t = THTensor_(newSelect)(output, 0, t); - THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); - - THNN_(TemporalRowConvolution_updateOutput_frame) - (input_t, output_t, weight, bias, finput_t, - kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame); - - c10::raw::intrusive_ptr::decref(input_t); - c10::raw::intrusive_ptr::decref(output_t); - c10::raw::intrusive_ptr::decref(finput_t); - } - } - - if (!featFirst) { // NOTE: output will NOT be contiguous in this case - THTensor_(transpose)(output, output, ndim - 1, ndim - 2); - c10::raw::intrusive_ptr::decref(tinput); - } - - c10::raw::intrusive_ptr::decref(input); + for (t = 0; t < T; t++) { + THTensor *input_t = THTensor_(newSelect)(input, 0, t); + THTensor *output_t = THTensor_(newSelect)(output, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(TemporalRowConvolution_updateOutput_frame) + (input_t, output_t, weight, bias, finput_t, + kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame); + + c10::raw::intrusive_ptr::decref(input_t); + c10::raw::intrusive_ptr::decref(output_t); + c10::raw::intrusive_ptr::decref(finput_t); + } + } + + if (!featFirst) { // NOTE: output will NOT be contiguous in this case + THTensor_(transpose)(output, output, ndim - 1, ndim - 2); + c10::raw::intrusive_ptr::decref(tinput); + } + + c10::raw::intrusive_ptr::decref(input); } static void THNN_(TemporalRowConvolution_updateGradInput_frame)( - THTensor *gradInput, - THTensor *gradOutput, - THTensor *weight, - THTensor *fgradInput, - int kW, - int dW, - int padW, - int64_t inputFrameSize, - int64_t nInputFrame, - int64_t nOutputFrame) { - - THTensor *gradOutput3d = THTensor_(newWithStorage3d)( - THTensor_getStoragePtr(gradOutput), gradOutput->storage_offset(), - inputFrameSize, -1, - 1, -1, - nOutputFrame, -1); - - // weight: inputFrameSize x kW x 1 - // gradOutput3d: inputFrameSize x 1 x nOutputFrame - THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d); - // fgradInput: inputFrameSize x kW x nOutputFrame - c10::raw::intrusive_ptr::decref(gradOutput3d); - - THTensor_(zero)(gradInput); - - THNN_(unfolded_acc_row)(fgradInput, gradInput, - kW, dW, padW, - inputFrameSize, nInputFrame, nOutputFrame); + THTensor *gradInput, + THTensor *gradOutput, + THTensor *weight, + THTensor *fgradInput, + int kW, + int dW, + int padW, + int64_t inputFrameSize, + int64_t nInputFrame, + int64_t nOutputFrame) { + + THTensor *gradOutput3d = THTensor_(newWithStorage3d)( + THTensor_getStoragePtr(gradOutput), gradOutput->storage_offset(), + inputFrameSize, -1, + 1, -1, + nOutputFrame, -1); + + // weight: inputFrameSize x kW x 1 + // gradOutput3d: inputFrameSize x 1 x nOutputFrame + THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d); + // fgradInput: inputFrameSize x kW x nOutputFrame + c10::raw::intrusive_ptr::decref(gradOutput3d); + + THTensor_(zero)(gradInput); + + THNN_(unfolded_acc_row)(fgradInput, gradInput, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); } void THNN_(TemporalRowConvolution_updateGradInput)( - THNNState *state, - THTensor *input, - THTensor *gradOutput, - THTensor *gradInput, - THTensor *weight, - THTensor *finput, - THTensor *fgradInput, - int kW, - int dW, - int padW, - bool featFirst) { + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradInput, + THTensor *weight, + THTensor *finput, + THTensor *fgradInput, + int kW, + int dW, + int padW, + bool featFirst) { - int ndim = input->dim(); + int ndim = input->dim(); - THTensor *tinput, *tgradOutput; + THTensor *tinput, *tgradOutput; - if (!featFirst) { - tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); - tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2); + if (!featFirst) { + tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); + tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2); - input = THTensor_(newContiguous)(tinput); - gradOutput = THTensor_(newContiguous)(tgradOutput); + input = THTensor_(newContiguous)(tinput); + gradOutput = THTensor_(newContiguous)(tgradOutput); - } else { - input = THTensor_(newContiguous)(input); - gradOutput = THTensor_(newContiguous)(gradOutput); - } + } else { + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + } - THNN_(TemporalRowConvolution_shapeCheck)(state, input, gradOutput, weight, - NULL, kW, dW, padW); + THNN_(TemporalRowConvolution_shapeCheck)(state, input, gradOutput, weight, + NULL, kW, dW, padW); - int64_t inputFrameSize = THTensor_sizeLegacyNoScalars(weight, 0); - int64_t nInputFrame = input->size(ndim - 1); - int64_t nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; + int64_t inputFrameSize = THTensor_sizeLegacyNoScalars(weight, 0); + int64_t nInputFrame = input->size(ndim - 1); + int64_t nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1; - THTensor_(resizeAs)(fgradInput, finput); - THTensor_(resizeAs)(gradInput, input); + THTensor_(resizeAs)(fgradInput, finput); + THTensor_(resizeAs)(gradInput, input); - THTensor_(zero)(fgradInput); - THTensor_(zero)(gradInput); + THTensor_(zero)(fgradInput); + THTensor_(zero)(gradInput); THTensor *tweight = THTensor_(new)(); THTensor_(transpose)(tweight, weight, 1, 2); - if (ndim == 2) { - THNN_(TemporalRowConvolution_updateGradInput_frame) - (gradInput, gradOutput, tweight, fgradInput, - kW, dW, padW, - inputFrameSize, nInputFrame, nOutputFrame); - } else { - int64_t T = input->size(0); - int64_t t; + if (ndim == 2) { + THNN_(TemporalRowConvolution_updateGradInput_frame) + (gradInput, gradOutput, tweight, fgradInput, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); + } else { + int64_t T = input->size(0); + int64_t t; #pragma omp parallel for private(t) - for (t = 0; t < T; t++) { + for (t = 0; t < T; t++) { - THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); - THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); - THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); + THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); - THNN_(TemporalRowConvolution_updateGradInput_frame) - (gradInput_t, gradOutput_t, tweight, fgradInput_t, - kW, dW, padW, - inputFrameSize, nInputFrame, nOutputFrame); + THNN_(TemporalRowConvolution_updateGradInput_frame) + (gradInput_t, gradOutput_t, tweight, fgradInput_t, + kW, dW, padW, + inputFrameSize, nInputFrame, nOutputFrame); - c10::raw::intrusive_ptr::decref(gradInput_t); - c10::raw::intrusive_ptr::decref(gradOutput_t); - c10::raw::intrusive_ptr::decref(fgradInput_t); - } - } + c10::raw::intrusive_ptr::decref(gradInput_t); + c10::raw::intrusive_ptr::decref(gradOutput_t); + c10::raw::intrusive_ptr::decref(fgradInput_t); + } + } c10::raw::intrusive_ptr::decref(tweight); - if (!featFirst) { // NOTE: gradInput will NOT be contiguous in this case + if (!featFirst) { // NOTE: gradInput will NOT be contiguous in this case - c10::raw::intrusive_ptr::decref(tinput); - c10::raw::intrusive_ptr::decref(tgradOutput); + c10::raw::intrusive_ptr::decref(tinput); + c10::raw::intrusive_ptr::decref(tgradOutput); - THTensor_(transpose)(gradInput, gradInput, ndim - 1, ndim - 2); - } + THTensor_(transpose)(gradInput, gradInput, ndim - 1, ndim - 2); + } - c10::raw::intrusive_ptr::decref(input); - c10::raw::intrusive_ptr::decref(gradOutput); + c10::raw::intrusive_ptr::decref(input); + c10::raw::intrusive_ptr::decref(gradOutput); } static void THNN_(TemporalRowConvolution_accGradParameters_frame)( - THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, - THTensor *finput, scalar_t scale) { + THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, + THTensor *finput, scalar_t scale) { - int64_t i; - THTensor *gradOutput3d = THTensor_(newWithStorage3d)( - THTensor_getStoragePtr(gradOutput), gradOutput->storage_offset(), - gradOutput->size(0), -1, - 1, -1, - gradOutput->size(1), -1); + int64_t i; + THTensor *gradOutput3d = THTensor_(newWithStorage3d)( + THTensor_getStoragePtr(gradOutput), gradOutput->storage_offset(), + gradOutput->size(0), -1, + 1, -1, + gradOutput->size(1), -1); THTensor *tfinput = THTensor_(new)(); - THTensor_(transpose)(tfinput, finput, 1, 2); - // gradOutput3d: inputFrameSize x 1 x nOutputFrame - // finput: inputFrameSize x nOutputFrame x kW - THTensor_(baddbmm)(gradWeight, 1, gradWeight, scale, gradOutput3d, tfinput); - // gradWeight: inputFrameSize x 1 x kW + THTensor_(transpose)(tfinput, finput, 1, 2); + // gradOutput3d: inputFrameSize x 1 x nOutputFrame + // finput: inputFrameSize x nOutputFrame x kW + THTensor_(baddbmm)(gradWeight, 1, gradWeight, scale, gradOutput3d, tfinput); + // gradWeight: inputFrameSize x 1 x kW c10::raw::intrusive_ptr::decref(tfinput); - if (gradBias != NULL) { - for (i = 0; i < THTensor_sizeLegacyNoScalars(gradBias, 0); i++) { - int64_t k; - scalar_t sum = 0; - scalar_t *data = THStorage_(data)(THTensor_getStoragePtr(gradOutput3d)) - + gradOutput3d->storage_offset() - + i * gradOutput3d->stride(0); - for (k = 0; k < gradOutput3d->size(2); k++) { - sum += data[k]; - } - (THStorage_(data)(THTensor_getStoragePtr(gradBias)) + gradBias->storage_offset())[i] - += scale * sum; - } - } - - c10::raw::intrusive_ptr::decref(gradOutput3d); + if (gradBias != NULL) { + for (i = 0; i < THTensor_sizeLegacyNoScalars(gradBias, 0); i++) { + int64_t k; + scalar_t sum = 0; + scalar_t *data = THStorage_(data)(THTensor_getStoragePtr(gradOutput3d)) + + gradOutput3d->storage_offset() + + i * gradOutput3d->stride(0); + for (k = 0; k < gradOutput3d->size(2); k++) { + sum += data[k]; + } + (THStorage_(data)(THTensor_getStoragePtr(gradBias)) + gradBias->storage_offset())[i] + += scale * sum; + } + } + + c10::raw::intrusive_ptr::decref(gradOutput3d); } void THNN_(TemporalRowConvolution_accGradParameters)( - THNNState *state, - THTensor *input, - THTensor *gradOutput, - THTensor *gradWeight, - THTensor *gradBias, - THTensor *finput, - THTensor *fgradInput, - int kW, - int dW, - int padW, - bool featFirst, - accreal scale_) { + THNNState *state, + THTensor *input, + THTensor *gradOutput, + THTensor *gradWeight, + THTensor *gradBias, + THTensor *finput, + THTensor *fgradInput, + int kW, + int dW, + int padW, + bool featFirst, + accreal scale_) { scalar_t scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); - int ndim = input->dim(); - - THTensor *tinput = NULL; - THTensor *tgradOutput = NULL; - - if (!featFirst) { - tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); - tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2); - - input = THTensor_(newContiguous)(tinput); - gradOutput = THTensor_(newContiguous)(tgradOutput); - } else { - input = THTensor_(newContiguous)(input); - gradOutput = THTensor_(newContiguous)(gradOutput); - } - - THNN_(TemporalRowConvolution_shapeCheck) - (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW); - - if (ndim == 2) { - THNN_(TemporalRowConvolution_accGradParameters_frame)( - gradOutput, gradWeight, gradBias, finput, scale); - } else { - int64_t T = input->size(0); - int64_t t; - - for (t = 0; t < T; t++) { - THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); - THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); - - THNN_(TemporalRowConvolution_accGradParameters_frame)( - gradOutput_t, gradWeight, gradBias, finput_t, scale); - - c10::raw::intrusive_ptr::decref(gradOutput_t); - c10::raw::intrusive_ptr::decref(finput_t); - } - } - - if (!featFirst) { - c10::raw::intrusive_ptr::decref(tinput); - c10::raw::intrusive_ptr::decref(tgradOutput); - } - - c10::raw::intrusive_ptr::decref(input); - c10::raw::intrusive_ptr::decref(gradOutput); + int ndim = input->dim(); + + THTensor *tinput = NULL; + THTensor *tgradOutput = NULL; + + if (!featFirst) { + tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2); + tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2); + + input = THTensor_(newContiguous)(tinput); + gradOutput = THTensor_(newContiguous)(tgradOutput); + } else { + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + } + + THNN_(TemporalRowConvolution_shapeCheck) + (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW); + + if (ndim == 2) { + THNN_(TemporalRowConvolution_accGradParameters_frame)( + gradOutput, gradWeight, gradBias, finput, scale); + } else { + int64_t T = input->size(0); + int64_t t; + + for (t = 0; t < T; t++) { + THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); + THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); + + THNN_(TemporalRowConvolution_accGradParameters_frame)( + gradOutput_t, gradWeight, gradBias, finput_t, scale); + + c10::raw::intrusive_ptr::decref(gradOutput_t); + c10::raw::intrusive_ptr::decref(finput_t); + } + } + + if (!featFirst) { + c10::raw::intrusive_ptr::decref(tinput); + c10::raw::intrusive_ptr::decref(tgradOutput); + } + + c10::raw::intrusive_ptr::decref(input); + c10::raw::intrusive_ptr::decref(gradOutput); } #endif diff --git a/aten/src/THNN/generic/VolumetricAdaptiveAveragePooling.c b/aten/src/THNN/generic/VolumetricAdaptiveAveragePooling.c index 8ef3007823e432..a0188ae4cd406e 100644 --- a/aten/src/THNN/generic/VolumetricAdaptiveAveragePooling.c +++ b/aten/src/THNN/generic/VolumetricAdaptiveAveragePooling.c @@ -105,7 +105,7 @@ void THNN_(VolumetricAdaptiveAveragePooling_updateOutput)( THNN_ARGCHECK(!input->is_empty() && (input->dim() == 4 || input->dim() == 5), 2, input, - "non-empty 4D or 5D (batch mode) tensor expected for input, but got: %s"); + "non-empty 4D or 5D (batch mode) tensor expected for input, but got: %s"); if (input->dim() == 5) { diff --git a/aten/src/THNN/generic/VolumetricAveragePooling.c b/aten/src/THNN/generic/VolumetricAveragePooling.c index 949a7efb0c0eaa..3eab0aaaa576de 100644 --- a/aten/src/THNN/generic/VolumetricAveragePooling.c +++ b/aten/src/THNN/generic/VolumetricAveragePooling.c @@ -75,7 +75,7 @@ static inline void THNN_(VolumetricAveragePooling_shapeCheck)( if (otime < 1 || owidth < 1 || oheight < 1) THError("Given input size: (%dx%dx%dx%d). " - "Calculated output size: (%dx%dx%dx%d). Output size is too small", + "Calculated output size: (%dx%dx%dx%d). Output size is too small", nslices,itime,iheight,iwidth,nslices,otime,oheight,owidth); if (gradOutput != NULL) { diff --git a/aten/src/THNN/generic/VolumetricConvolutionMM.c b/aten/src/THNN/generic/VolumetricConvolutionMM.c index 14edcb9360e4a1..8bab66f475df95 100644 --- a/aten/src/THNN/generic/VolumetricConvolutionMM.c +++ b/aten/src/THNN/generic/VolumetricConvolutionMM.c @@ -119,7 +119,7 @@ static THTensor* THNN_(newViewWeight)(THTensor *weight) int64_t s2 = weight->size(1) * weight->size(2) * weight->size(3) * weight->size(4); THTensor *old_weight = weight; weight = THTensor_(newWithStorage2d)(THTensor_getStoragePtr(weight), weight->storage_offset(), - s1, -1, s2, -1); + s1, -1, s2, -1); c10::raw::intrusive_ptr::decref(old_weight); } return weight; diff --git a/aten/src/THNN/generic/VolumetricFullDilatedConvolution.c b/aten/src/THNN/generic/VolumetricFullDilatedConvolution.c index d5ceee415f8fa8..058dbf2d614b17 100644 --- a/aten/src/THNN/generic/VolumetricFullDilatedConvolution.c +++ b/aten/src/THNN/generic/VolumetricFullDilatedConvolution.c @@ -274,7 +274,7 @@ void THNN_(VolumetricFullDilatedConvolution_updateOutput)( const int64_t k_ = 1; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) - if (bias) { + if (bias) { THBlas_(gemm)( 't', 'n', n_, m_, k_, diff --git a/aten/src/THNN/init.cpp b/aten/src/THNN/init.cpp index 564a5285cad4ff..3eb9ba6a1ae0cf 100644 --- a/aten/src/THNN/init.cpp +++ b/aten/src/THNN/init.cpp @@ -7,13 +7,13 @@ #define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME) #define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME) -#define THNN_CHECK_SHAPE(I1, I2) \ - if (I1 != NULL && I2 != NULL && !THTensor_(isSameSizeAs)(I1, I2)) \ - { \ - THDescBuff s1 = THTensor_(sizeDesc)(I1); \ - THDescBuff s2 = THTensor_(sizeDesc)(I2); \ - THError(#I1 " and " #I2 " shapes do not match: " \ - #I1 " %s, " #I2 " %s", s1.str, s2.str); \ +#define THNN_CHECK_SHAPE(I1, I2) \ + if (I1 != NULL && I2 != NULL && !THTensor_(isSameSizeAs)(I1, I2)) \ + { \ + THDescBuff s1 = THTensor_(sizeDesc)(I1); \ + THDescBuff s2 = THTensor_(sizeDesc)(I2); \ + THError(#I1 " and " #I2 " shapes do not match: " \ + #I1 " %s, " #I2 " %s", s1.str, s2.str); \ } #define THNN_CHECK_SHAPE_INDICES(I1, I2) \ @@ -26,39 +26,39 @@ } #define THNN_CHECK_NELEMENT(I1, I2) \ - if (I1 != NULL && I2 != NULL ) { \ - ptrdiff_t n1 = THTensor_(nElement)(I1); \ - ptrdiff_t n2 = THTensor_(nElement)(I2); \ - if (n1 != n2) \ - { \ - THDescBuff s1 = THTensor_(sizeDesc)(I1); \ - THDescBuff s2 = THTensor_(sizeDesc)(I2); \ - THError(#I1 " and " #I2 " have different number of elements: " \ - #I1 "%s has %ld elements, while " \ - #I2 "%s has %ld elements", s1.str, n1, s2.str, n2); \ - } \ + if (I1 != NULL && I2 != NULL ) { \ + ptrdiff_t n1 = THTensor_(nElement)(I1); \ + ptrdiff_t n2 = THTensor_(nElement)(I2); \ + if (n1 != n2) \ + { \ + THDescBuff s1 = THTensor_(sizeDesc)(I1); \ + THDescBuff s2 = THTensor_(sizeDesc)(I2); \ + THError(#I1 " and " #I2 " have different number of elements: " \ + #I1 "%s has %ld elements, while " \ + #I2 "%s has %ld elements", s1.str, n1, s2.str, n2); \ + } \ } -#define THNN_CHECK_DIM_SIZE(T, DIM, DIM_SIZE, SIZE) \ - if (THTensor_(nDimensionLegacyNoScalars)(T) != DIM || \ - THTensor_sizeLegacyNoScalars(T, DIM_SIZE) != SIZE) { \ - THDescBuff s1 = THTensor_(sizeDesc)(T); \ - THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ - " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \ +#define THNN_CHECK_DIM_SIZE(T, DIM, DIM_SIZE, SIZE) \ + if (THTensor_(nDimensionLegacyNoScalars)(T) != DIM || \ + THTensor_sizeLegacyNoScalars(T, DIM_SIZE) != SIZE) { \ + THDescBuff s1 = THTensor_(sizeDesc)(T); \ + THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ + " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \ } -#define THNN_CHECK_DIM_SIZE_INDICES(T, DIM, DIM_SIZE, SIZE) \ - if (THIndexTensor_(nDimensionLegacyNoScalars)(T) != DIM || \ - THTensor_sizeLegacyNoScalars(T, DIM_SIZE) != SIZE) { \ - THDescBuff s1 = THIndexTensor_(sizeDesc)(T); \ - THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ +#define THNN_CHECK_DIM_SIZE_INDICES(T, DIM, DIM_SIZE, SIZE) \ + if (THIndexTensor_(nDimensionLegacyNoScalars)(T) != DIM || \ + THTensor_sizeLegacyNoScalars(T, DIM_SIZE) != SIZE) { \ + THDescBuff s1 = THIndexTensor_(sizeDesc)(T); \ + THError("Need " #T " of dimension %d and " #T ".size[%d] == %d" \ " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \ } -#define THNN_ARGCHECK(COND, ARG, T, FORMAT) \ - if (!(COND)) { \ - THDescBuff s1 = THTensor_(sizeDesc)(T); \ - THArgCheck(COND, ARG, FORMAT, s1.str); \ +#define THNN_ARGCHECK(COND, ARG, T, FORMAT) \ + if (!(COND)) { \ + THDescBuff s1 = THTensor_(sizeDesc)(T); \ + THArgCheck(COND, ARG, FORMAT, s1.str); \ } #include diff --git a/c10/test/util/LeftRight_test.cpp b/c10/test/util/LeftRight_test.cpp index 43189eb36cd92a..3a1fae0cee4a1a 100644 --- a/c10/test/util/LeftRight_test.cpp +++ b/c10/test/util/LeftRight_test.cpp @@ -206,7 +206,7 @@ TEST(LeftRightTest, givenInt_whenWriteThrowsExceptionOnSecondCall_thenKeepsNewSt write_called = true; } }), - MyException + MyException ); // check reading it returns new value diff --git a/c10/util/Half.h b/c10/util/Half.h index 82df870b3f36ff..62421e0cc35fc9 100644 --- a/c10/util/Half.h +++ b/c10/util/Half.h @@ -85,41 +85,41 @@ namespace detail { * @note The implementation doesn't use any floating-point operations. */ inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) { - /* - * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: - * +---+-----+------------+-------------------+ - * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| - * +---+-----+------------+-------------------+ - * Bits 31 26-30 16-25 0-15 - * - * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. - */ - const uint32_t w = (uint32_t) h << 16; - /* - * Extract the sign of the input number into the high bit of the 32-bit word: - * - * +---+----------------------------------+ - * | S |0000000 00000000 00000000 00000000| - * +---+----------------------------------+ - * Bits 31 0-31 - */ - const uint32_t sign = w & UINT32_C(0x80000000); - /* - * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word: - * - * +---+-----+------------+-------------------+ - * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| - * +---+-----+------------+-------------------+ - * Bits 30 27-31 17-26 0-16 - */ - const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); - /* - * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized. - * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one. - * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift - * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the - * biased exponent into 1, and making mantissa normalized (i.e. without leading 1). - */ + /* + * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: + * +---+-----+------------+-------------------+ + * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 31 26-30 16-25 0-15 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. + */ + const uint32_t w = (uint32_t) h << 16; + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = w & UINT32_C(0x80000000); + /* + * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word: + * + * +---+-----+------------+-------------------+ + * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 30 27-31 17-26 0-16 + */ + const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); + /* + * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized. + * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one. + * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift + * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the + * biased exponent into 1, and making mantissa normalized (i.e. without leading 1). + */ #ifdef _MSC_VER unsigned long nonsign_bsr; _BitScanReverse(&nonsign_bsr, (unsigned long)nonsign); @@ -176,62 +176,62 @@ namespace detail { * floating-point operations and bitcasts between integer and floating-point variables. */ inline float fp16_ieee_to_fp32_value(uint16_t h) { - /* - * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: - * +---+-----+------------+-------------------+ - * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| - * +---+-----+------------+-------------------+ - * Bits 31 26-30 16-25 0-15 - * - * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. - */ - const uint32_t w = (uint32_t) h << 16; - /* - * Extract the sign of the input number into the high bit of the 32-bit word: - * - * +---+----------------------------------+ - * | S |0000000 00000000 00000000 00000000| - * +---+----------------------------------+ - * Bits 31 0-31 - */ - const uint32_t sign = w & UINT32_C(0x80000000); - /* - * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word: - * - * +-----+------------+---------------------+ - * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000| - * +-----+------------+---------------------+ - * Bits 27-31 17-26 0-16 - */ - const uint32_t two_w = w + w; - - /* - * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent - * of a single-precision floating-point number: - * - * S|Exponent | Mantissa - * +-+---+-----+------------+----------------+ - * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000| - * +-+---+-----+------------+----------------+ - * Bits | 23-31 | 0-22 - * - * Next, there are some adjustments to the exponent: - * - The exponent needs to be corrected by the difference in exponent bias between single-precision and half-precision - * formats (0x7F - 0xF = 0x70) - * - Inf and NaN values in the inputs should become Inf and NaN values after conversion to the single-precision number. - * Therefore, if the biased exponent of the half-precision input was 0x1F (max possible value), the biased exponent - * of the single-precision output must be 0xFF (max possible value). We do this correction in two steps: - * - First, we adjust the exponent by (0xFF - 0x1F) = 0xE0 (see exp_offset below) rather than by 0x70 suggested - * by the difference in the exponent bias (see above). - * - Then we multiply the single-precision result of exponent adjustment by 2**(-112) to reverse the effect of - * exponent adjustment by 0xE0 less the necessary exponent adjustment by 0x70 due to difference in exponent bias. - * The floating-point multiplication hardware would ensure than Inf and NaN would retain their value on at least - * partially IEEE754-compliant implementations. - * - * Note that the above operations do not handle denormal inputs (where biased exponent == 0). However, they also do not - * operate on denormal inputs, and do not produce denormal results. - */ - const uint32_t exp_offset = UINT32_C(0xE0) << 23; + /* + * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: + * +---+-----+------------+-------------------+ + * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 31 26-30 16-25 0-15 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. + */ + const uint32_t w = (uint32_t) h << 16; + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = w & UINT32_C(0x80000000); + /* + * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word: + * + * +-----+------------+---------------------+ + * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000| + * +-----+------------+---------------------+ + * Bits 27-31 17-26 0-16 + */ + const uint32_t two_w = w + w; + + /* + * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent + * of a single-precision floating-point number: + * + * S|Exponent | Mantissa + * +-+---+-----+------------+----------------+ + * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000| + * +-+---+-----+------------+----------------+ + * Bits | 23-31 | 0-22 + * + * Next, there are some adjustments to the exponent: + * - The exponent needs to be corrected by the difference in exponent bias between single-precision and half-precision + * formats (0x7F - 0xF = 0x70) + * - Inf and NaN values in the inputs should become Inf and NaN values after conversion to the single-precision number. + * Therefore, if the biased exponent of the half-precision input was 0x1F (max possible value), the biased exponent + * of the single-precision output must be 0xFF (max possible value). We do this correction in two steps: + * - First, we adjust the exponent by (0xFF - 0x1F) = 0xE0 (see exp_offset below) rather than by 0x70 suggested + * by the difference in the exponent bias (see above). + * - Then we multiply the single-precision result of exponent adjustment by 2**(-112) to reverse the effect of + * exponent adjustment by 0xE0 less the necessary exponent adjustment by 0x70 due to difference in exponent bias. + * The floating-point multiplication hardware would ensure than Inf and NaN would retain their value on at least + * partially IEEE754-compliant implementations. + * + * Note that the above operations do not handle denormal inputs (where biased exponent == 0). However, they also do not + * operate on denormal inputs, and do not produce denormal results. + */ + const uint32_t exp_offset = UINT32_C(0xE0) << 23; // const float exp_scale = 0x1.0p-112f; uint32_t scale_bits = (uint32_t) 15 << 23; float exp_scale_val; @@ -239,48 +239,48 @@ namespace detail { const float exp_scale = exp_scale_val; const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; - /* - * Convert denormalized half-precision inputs into single-precision results (always normalized). - * Zero inputs are also handled here. - * - * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits. - * First, we shift mantissa into bits 0-9 of the 32-bit word. - * - * zeros | mantissa - * +---------------------------+------------+ - * |0000 0000 0000 0000 0000 00|MM MMMM MMMM| - * +---------------------------+------------+ - * Bits 10-31 0-9 - * - * Now, remember that denormalized half-precision numbers are represented as: - * FP16 = mantissa * 2**(-24). - * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input - * and with an exponent which would scale the corresponding mantissa bits to 2**(-24). - * A normalized single-precision floating-point number is represented as: - * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127) - * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision - * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount. - * - * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number - * is zero, the constructed single-precision number has the value of - * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5 - * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of - * the input half-precision number. - */ - const uint32_t magic_mask = UINT32_C(126) << 23; - const float magic_bias = 0.5f; - const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; - - /* - * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the - * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the - * input is either a denormal number, or zero. - * - Combine the result of conversion of exponent and mantissa with the sign of the input number. - */ - const uint32_t denormalized_cutoff = UINT32_C(1) << 27; - const uint32_t result = sign | - (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); - return fp32_from_bits(result); + /* + * Convert denormalized half-precision inputs into single-precision results (always normalized). + * Zero inputs are also handled here. + * + * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits. + * First, we shift mantissa into bits 0-9 of the 32-bit word. + * + * zeros | mantissa + * +---------------------------+------------+ + * |0000 0000 0000 0000 0000 00|MM MMMM MMMM| + * +---------------------------+------------+ + * Bits 10-31 0-9 + * + * Now, remember that denormalized half-precision numbers are represented as: + * FP16 = mantissa * 2**(-24). + * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input + * and with an exponent which would scale the corresponding mantissa bits to 2**(-24). + * A normalized single-precision floating-point number is represented as: + * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127) + * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision + * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount. + * + * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number + * is zero, the constructed single-precision number has the value of + * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5 + * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of + * the input half-precision number. + */ + const uint32_t magic_mask = UINT32_C(126) << 23; + const float magic_bias = 0.5f; + const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; + + /* + * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the + * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the + * input is either a denormal number, or zero. + * - Combine the result of conversion of exponent and mantissa with the sign of the input number. + */ + const uint32_t denormalized_cutoff = UINT32_C(1) << 27; + const uint32_t result = sign | + (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); + return fp32_from_bits(result); } /* @@ -301,22 +301,22 @@ namespace detail { const float scale_to_inf = scale_to_inf_val; const float scale_to_zero = scale_to_zero_val; - float base = (fabsf(f) * scale_to_inf) * scale_to_zero; - - const uint32_t w = fp32_to_bits(f); - const uint32_t shl1_w = w + w; - const uint32_t sign = w & UINT32_C(0x80000000); - uint32_t bias = shl1_w & UINT32_C(0xFF000000); - if (bias < UINT32_C(0x71000000)) { - bias = UINT32_C(0x71000000); - } - - base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; - const uint32_t bits = fp32_to_bits(base); - const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); - const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); - const uint32_t nonsign = exp_bits + mantissa_bits; - return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); + float base = (fabsf(f) * scale_to_inf) * scale_to_zero; + + const uint32_t w = fp32_to_bits(f); + const uint32_t shl1_w = w + w; + const uint32_t sign = w & UINT32_C(0x80000000); + uint32_t bias = shl1_w & UINT32_C(0xFF000000); + if (bias < UINT32_C(0x71000000)) { + bias = UINT32_C(0x71000000); + } + + base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; + const uint32_t bits = fp32_to_bits(base); + const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); + const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); + const uint32_t nonsign = exp_bits + mantissa_bits; + return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); } } // namespace detail diff --git a/caffe2/operators/assert_op.cc b/caffe2/operators/assert_op.cc index 61a4b5b52e6671..31a3d9770ef1d4 100644 --- a/caffe2/operators/assert_op.cc +++ b/caffe2/operators/assert_op.cc @@ -55,7 +55,7 @@ Assertion Passed! - )DOC") + )DOC") .Arg( "error_msg", "(*string*): custom error message to be thrown when the input does not pass assertion", diff --git a/caffe2/operators/counter_ops.cc b/caffe2/operators/counter_ops.cc index e971aae8766a8e..29a4d8dedc46de 100644 --- a/caffe2/operators/counter_ops.cc +++ b/caffe2/operators/counter_ops.cc @@ -107,17 +107,17 @@ Testing CountUp operator... 'count' value after CountUp test: 10 Testing CountDown operator... -'count' value after CountDown: 9 'done' value: False -'count' value after CountDown: 8 'done' value: False -'count' value after CountDown: 7 'done' value: False -'count' value after CountDown: 6 'done' value: False -'count' value after CountDown: 5 'done' value: False -'count' value after CountDown: 4 'done' value: False -'count' value after CountDown: 3 'done' value: False -'count' value after CountDown: 2 'done' value: False -'count' value after CountDown: 1 'done' value: False -'count' value after CountDown: 0 'done' value: False -'count' value after CountDown: -1 'done' value: True +'count' value after CountDown: 9 'done' value: False +'count' value after CountDown: 8 'done' value: False +'count' value after CountDown: 7 'done' value: False +'count' value after CountDown: 6 'done' value: False +'count' value after CountDown: 5 'done' value: False +'count' value after CountDown: 4 'done' value: False +'count' value after CountDown: 3 'done' value: False +'count' value after CountDown: 2 'done' value: False +'count' value after CountDown: 1 'done' value: False +'count' value after CountDown: 0 'done' value: False +'count' value after CountDown: -1 'done' value: True ``` diff --git a/caffe2/operators/expand_op.cc b/caffe2/operators/expand_op.cc index c0e1201e55ad29..75df6fccb33aa4 100644 --- a/caffe2/operators/expand_op.cc +++ b/caffe2/operators/expand_op.cc @@ -24,11 +24,11 @@ OPERATOR_SCHEMA(Expand) .NumInputs(2) .NumOutputs(1) .SetDoc(R"DOC( - Broadcast the input tensor to a materialized new tensor using given shape. - Broadcast rule is similar to "numpy.array(input) * numpy.ones(shape)": - Dimensions are right alignment; - Two corresponding dimensions must have the same value, or one of them - equals to 1. + Broadcast the input tensor to a materialized new tensor using given shape. + Broadcast rule is similar to "numpy.array(input) * numpy.ones(shape)": + Dimensions are right alignment; + Two corresponding dimensions must have the same value, or one of them + equals to 1. In order to align with PyTorch's `expand`, `shape` is allowed to have entries equal to -1, which means to preserve the size of the corresponding dimension in `X` (so it's actually equivalent to equal to 1). diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 2f2767bfbea895..706bbf7d4d4068 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -758,10 +758,10 @@ if(USE_CUDA) endif() if(CAFFE2_USE_CUDNN) IF(CUDNN_STATIC_LINKAGE) - LIST(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS - caffe2::cudnn "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" "dl") + LIST(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS + caffe2::cudnn "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" "dl") ELSE() - list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn) + list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn) ENDIF() else() caffe2_update_option(USE_CUDNN OFF) @@ -1166,7 +1166,7 @@ if (NOT BUILD_ATEN_MOBILE) CHECK_C_SOURCE_COMPILES("#include static inline void cpuid(uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) + uint32_t *ecx, uint32_t *edx) { uint32_t a = *eax, b, c = *ecx, d; asm volatile ( \"cpuid\" : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) ); diff --git a/cmake/Modules/FindCUB.cmake b/cmake/Modules/FindCUB.cmake index 8a68d7cf432659..aff82aad4553f4 100644 --- a/cmake/Modules/FindCUB.cmake +++ b/cmake/Modules/FindCUB.cmake @@ -3,16 +3,16 @@ # CUB_INCLUDE_DIRS - the CUB include directory find_path(CUB_INCLUDE_DIR - NAMES cub/cub.cuh - DOC "The directory where CUB includes reside" + NAMES cub/cub.cuh + DOC "The directory where CUB includes reside" ) set(CUB_INCLUDE_DIRS ${CUB_INCLUDE_DIR}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(CUB - FOUND_VAR CUB_FOUND - REQUIRED_VARS CUB_INCLUDE_DIR + FOUND_VAR CUB_FOUND + REQUIRED_VARS CUB_INCLUDE_DIR ) mark_as_advanced(CUB_FOUND) diff --git a/cmake/Modules/FindMIOpen.cmake b/cmake/Modules/FindMIOpen.cmake index 6a047df7e52f3e..8ed2888b174d92 100644 --- a/cmake/Modules/FindMIOpen.cmake +++ b/cmake/Modules/FindMIOpen.cmake @@ -35,20 +35,20 @@ find_package_handle_standard_args( MIOPEN DEFAULT_MSG MIOPEN_INCLUDE_DIR MIOPEN_LIBRARY) if(MIOPEN_FOUND) - # get MIOpen version + # get MIOpen version file(READ ${MIOPEN_INCLUDE_DIR}/version.h MIOPEN_HEADER_CONTENTS) - string(REGEX MATCH "define MIOPEN_VERSION_MAJOR * +([0-9]+)" - MIOPEN_VERSION_MAJOR "${MIOPEN_HEADER_CONTENTS}") - string(REGEX REPLACE "define MIOPEN_VERSION_MAJOR * +([0-9]+)" "\\1" - MIOPEN_VERSION_MAJOR "${MIOPEN_VERSION_MAJOR}") - string(REGEX MATCH "define MIOPEN_VERSION_MINOR * +([0-9]+)" - MIOPEN_VERSION_MINOR "${MIOPEN_HEADER_CONTENTS}") - string(REGEX REPLACE "define MIOPEN_VERSION_MINOR * +([0-9]+)" "\\1" - MIOPEN_VERSION_MINOR "${MIOPEN_VERSION_MINOR}") - string(REGEX MATCH "define MIOPEN_VERSION_PATCH * +([0-9]+)" - MIOPEN_VERSION_PATCH "${MIOPEN_HEADER_CONTENTS}") - string(REGEX REPLACE "define MIOPEN_VERSION_PATCH * +([0-9]+)" "\\1" - MIOPEN_VERSION_PATCH "${MIOPEN_VERSION_PATCH}") + string(REGEX MATCH "define MIOPEN_VERSION_MAJOR * +([0-9]+)" + MIOPEN_VERSION_MAJOR "${MIOPEN_HEADER_CONTENTS}") + string(REGEX REPLACE "define MIOPEN_VERSION_MAJOR * +([0-9]+)" "\\1" + MIOPEN_VERSION_MAJOR "${MIOPEN_VERSION_MAJOR}") + string(REGEX MATCH "define MIOPEN_VERSION_MINOR * +([0-9]+)" + MIOPEN_VERSION_MINOR "${MIOPEN_HEADER_CONTENTS}") + string(REGEX REPLACE "define MIOPEN_VERSION_MINOR * +([0-9]+)" "\\1" + MIOPEN_VERSION_MINOR "${MIOPEN_VERSION_MINOR}") + string(REGEX MATCH "define MIOPEN_VERSION_PATCH * +([0-9]+)" + MIOPEN_VERSION_PATCH "${MIOPEN_HEADER_CONTENTS}") + string(REGEX REPLACE "define MIOPEN_VERSION_PATCH * +([0-9]+)" "\\1" + MIOPEN_VERSION_PATCH "${MIOPEN_VERSION_PATCH}") # Assemble MIOpen version if(NOT MIOPEN_VERSION_MAJOR) set(MIOPEN_VERSION "?") diff --git a/cmake/Modules/Findpybind11.cmake b/cmake/Modules/Findpybind11.cmake index 056ed1eab0475f..7ff71b80146c20 100644 --- a/cmake/Modules/Findpybind11.cmake +++ b/cmake/Modules/Findpybind11.cmake @@ -3,16 +3,16 @@ # pybind11_INCLUDE_DIRS - the pybind11 include directory find_path(pybind11_INCLUDE_DIR - NAMES pybind11/pybind11.h - DOC "The directory where pybind11 includes reside" + NAMES pybind11/pybind11.h + DOC "The directory where pybind11 includes reside" ) set(pybind11_INCLUDE_DIRS ${pybind11_INCLUDE_DIR}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(pybind11 - FOUND_VAR pybind11_FOUND - REQUIRED_VARS pybind11_INCLUDE_DIR + FOUND_VAR pybind11_FOUND + REQUIRED_VARS pybind11_INCLUDE_DIR ) mark_as_advanced(pybind11_FOUND) diff --git a/docs/caffe2/stylesheet.css b/docs/caffe2/stylesheet.css index 5b29fa848e08ff..ca916ecc32d027 100644 --- a/docs/caffe2/stylesheet.css +++ b/docs/caffe2/stylesheet.css @@ -1,89 +1,89 @@ /* The standard CSS for doxygen 1.8.14 */ body, table, div, p, dl { - font: 400 14px/22px Roboto,sans-serif; + font: 400 14px/22px Roboto,sans-serif; } p.reference, p.definition { - font: 400 14px/22px Roboto,sans-serif; + font: 400 14px/22px Roboto,sans-serif; } /* @group Heading Levels */ h1.groupheader { - font-size: 150%; + font-size: 150%; } .title { - font: 400 14px/28px Roboto,sans-serif; - font-size: 150%; - font-weight: bold; - margin: 10px 2px; + font: 400 14px/28px Roboto,sans-serif; + font-size: 150%; + font-weight: bold; + margin: 10px 2px; } h2.groupheader { - border-bottom: 1px solid #324770; - color: #223354; - font-size: 150%; - font-weight: normal; - margin-top: 1.75em; - padding-top: 8px; - padding-bottom: 4px; - width: 100%; + border-bottom: 1px solid #324770; + color: #223354; + font-size: 150%; + font-weight: normal; + margin-top: 1.75em; + padding-top: 8px; + padding-bottom: 4px; + width: 100%; } h3.groupheader { - font-size: 100%; + font-size: 100%; } h1, h2, h3, h4, h5, h6 { - -webkit-transition: text-shadow 0.5s linear; - -moz-transition: text-shadow 0.5s linear; - -ms-transition: text-shadow 0.5s linear; - -o-transition: text-shadow 0.5s linear; - transition: text-shadow 0.5s linear; - margin-right: 15px; + -webkit-transition: text-shadow 0.5s linear; + -moz-transition: text-shadow 0.5s linear; + -ms-transition: text-shadow 0.5s linear; + -o-transition: text-shadow 0.5s linear; + transition: text-shadow 0.5s linear; + margin-right: 15px; } h1.glow, h2.glow, h3.glow, h4.glow, h5.glow, h6.glow { - text-shadow: 0 0 15px cyan; + text-shadow: 0 0 15px cyan; } dt { - font-weight: bold; + font-weight: bold; } div.multicol { - -moz-column-gap: 1em; - -webkit-column-gap: 1em; - -moz-column-count: 3; - -webkit-column-count: 3; + -moz-column-gap: 1em; + -webkit-column-gap: 1em; + -moz-column-count: 3; + -webkit-column-count: 3; } p.startli, p.startdd { - margin-top: 2px; + margin-top: 2px; } p.starttd { - margin-top: 0px; + margin-top: 0px; } p.endli { - margin-bottom: 0px; + margin-bottom: 0px; } p.enddd { - margin-bottom: 4px; + margin-bottom: 4px; } p.endtd { - margin-bottom: 2px; + margin-bottom: 2px; } /* @end */ caption { - font-weight: bold; + font-weight: bold; } span.legend { @@ -97,45 +97,45 @@ h3.version { } div.qindex, div.navtab{ - background-color: #EBEFF6; - border: 1px solid #A3B4D7; - text-align: center; + background-color: #EBEFF6; + border: 1px solid #A3B4D7; + text-align: center; } div.qindex, div.navpath { - width: 100%; - line-height: 140%; + width: 100%; + line-height: 140%; } div.navtab { - margin-right: 15px; + margin-right: 15px; } /* @group Link Styling */ a { - color: #3D578C; - font-weight: normal; - text-decoration: none; + color: #3D578C; + font-weight: normal; + text-decoration: none; } .contents a:visited { - color: #4665A2; + color: #4665A2; } a:hover { - text-decoration: underline; + text-decoration: underline; } a.qindex { - font-weight: bold; + font-weight: bold; } a.qindexHL { - font-weight: bold; - background-color: #9CAFD4; - color: #ffffff; - border: 1px double #869DCA; + font-weight: bold; + background-color: #9CAFD4; + color: #ffffff; + border: 1px double #869DCA; } .contents a.qindexHL:visited { @@ -143,24 +143,24 @@ a.qindexHL { } a.el { - font-weight: bold; + font-weight: bold; } a.elRef { } a.code, a.code:visited, a.line, a.line:visited { - color: #4665A2; + color: #4665A2; } a.codeRef, a.codeRef:visited, a.lineRef, a.lineRef:visited { - color: #4665A2; + color: #4665A2; } /* @end */ dl.el { - margin-left: -1cm; + margin-left: -1cm; } pre.fragment { @@ -179,35 +179,35 @@ pre.fragment { div.fragment { padding: 0px; margin: 4px 8px 4px 2px; - background-color: #FBFCFD; - border: 1px solid #C4CFE5; + background-color: #FBFCFD; + border: 1px solid #C4CFE5; } div.line { - font-family: monospace, fixed; + font-family: monospace, fixed; font-size: 13px; - min-height: 13px; - line-height: 1.0; - text-wrap: unrestricted; - white-space: -moz-pre-wrap; /* Moz */ - white-space: -pre-wrap; /* Opera 4-6 */ - white-space: -o-pre-wrap; /* Opera 7 */ - white-space: pre-wrap; /* CSS3 */ - word-wrap: break-word; /* IE 5.5+ */ - text-indent: -53px; - padding-left: 53px; - padding-bottom: 0px; - margin: 0px; - -webkit-transition-property: background-color, box-shadow; - -webkit-transition-duration: 0.5s; - -moz-transition-property: background-color, box-shadow; - -moz-transition-duration: 0.5s; - -ms-transition-property: background-color, box-shadow; - -ms-transition-duration: 0.5s; - -o-transition-property: background-color, box-shadow; - -o-transition-duration: 0.5s; - transition-property: background-color, box-shadow; - transition-duration: 0.5s; + min-height: 13px; + line-height: 1.0; + text-wrap: unrestricted; + white-space: -moz-pre-wrap; /* Moz */ + white-space: -pre-wrap; /* Opera 4-6 */ + white-space: -o-pre-wrap; /* Opera 7 */ + white-space: pre-wrap; /* CSS3 */ + word-wrap: break-word; /* IE 5.5+ */ + text-indent: -53px; + padding-left: 53px; + padding-bottom: 0px; + margin: 0px; + -webkit-transition-property: background-color, box-shadow; + -webkit-transition-duration: 0.5s; + -moz-transition-property: background-color, box-shadow; + -moz-transition-duration: 0.5s; + -ms-transition-property: background-color, box-shadow; + -ms-transition-duration: 0.5s; + -o-transition-property: background-color, box-shadow; + -o-transition-duration: 0.5s; + transition-property: background-color, box-shadow; + transition-duration: 0.5s; } div.line:after { @@ -216,51 +216,51 @@ div.line:after { } div.line.glow { - background-color: cyan; - box-shadow: 0 0 10px cyan; + background-color: cyan; + box-shadow: 0 0 10px cyan; } span.lineno { - padding-right: 4px; - text-align: right; - border-right: 2px solid #0F0; - background-color: #E8E8E8; + padding-right: 4px; + text-align: right; + border-right: 2px solid #0F0; + background-color: #E8E8E8; white-space: pre; } span.lineno a { - background-color: #D8D8D8; + background-color: #D8D8D8; } span.lineno a:hover { - background-color: #C8C8C8; + background-color: #C8C8C8; } .lineno { - -webkit-touch-callout: none; - -webkit-user-select: none; - -khtml-user-select: none; - -moz-user-select: none; - -ms-user-select: none; - user-select: none; + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; } div.ah, span.ah { - background-color: black; - font-weight: bold; - color: #ffffff; - margin-bottom: 3px; - margin-top: 3px; - padding: 0.2em; - border: solid thin #333; - border-radius: 0.5em; - -webkit-border-radius: .5em; - -moz-border-radius: .5em; - box-shadow: 2px 2px 3px #999; - -webkit-box-shadow: 2px 2px 3px #999; - -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px; - background-image: -webkit-gradient(linear, left top, left bottom, from(#eee), to(#000),color-stop(0.3, #444)); - background-image: -moz-linear-gradient(center top, #eee 0%, #444 40%, #000 110%); + background-color: black; + font-weight: bold; + color: #ffffff; + margin-bottom: 3px; + margin-top: 3px; + padding: 0.2em; + border: solid thin #333; + border-radius: 0.5em; + -webkit-border-radius: .5em; + -moz-border-radius: .5em; + box-shadow: 2px 2px 3px #999; + -webkit-box-shadow: 2px 2px 3px #999; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px; + background-image: -webkit-gradient(linear, left top, left bottom, from(#eee), to(#000),color-stop(0.3, #444)); + background-image: -moz-linear-gradient(center top, #eee 0%, #444 40%, #000 110%); } div.classindex ul { @@ -273,51 +273,51 @@ div.classindex span.ai { } div.groupHeader { - margin-left: 16px; - margin-top: 12px; - font-weight: bold; + margin-left: 16px; + margin-top: 12px; + font-weight: bold; } div.groupText { - margin-left: 16px; - font-style: italic; + margin-left: 16px; + font-style: italic; } body { - background-color: white; - color: black; + background-color: white; + color: black; margin: 0; } div.contents { - margin-top: 10px; - margin-left: 12px; - margin-right: 8px; + margin-top: 10px; + margin-left: 12px; + margin-right: 8px; } td.indexkey { - background-color: #EBEFF6; - font-weight: bold; - border: 1px solid #C4CFE5; - margin: 2px 0px 2px 0; - padding: 2px 10px; + background-color: #EBEFF6; + font-weight: bold; + border: 1px solid #C4CFE5; + margin: 2px 0px 2px 0; + padding: 2px 10px; white-space: nowrap; vertical-align: top; } td.indexvalue { - background-color: #EBEFF6; - border: 1px solid #C4CFE5; - padding: 2px 10px; - margin: 2px 0px; + background-color: #EBEFF6; + border: 1px solid #C4CFE5; + padding: 2px 10px; + margin: 2px 0px; } tr.memlist { - background-color: #EEF1F7; + background-color: #EEF1F7; } p.formulaDsp { - text-align: center; + text-align: center; } img.formulaDsp { @@ -325,74 +325,74 @@ img.formulaDsp { } img.formulaInl { - vertical-align: middle; + vertical-align: middle; } div.center { - text-align: center; + text-align: center; margin-top: 0px; margin-bottom: 0px; padding: 0px; } div.center img { - border: 0px; + border: 0px; } address.footer { - text-align: right; - padding-right: 12px; + text-align: right; + padding-right: 12px; } img.footer { - border: 0px; - vertical-align: middle; + border: 0px; + vertical-align: middle; } /* @group Code Colorization */ span.keyword { - color: #008000 + color: #008000 } span.keywordtype { - color: #604020 + color: #604020 } span.keywordflow { - color: #e08000 + color: #e08000 } span.comment { - color: #800000 + color: #800000 } span.preprocessor { - color: #806020 + color: #806020 } span.stringliteral { - color: #002080 + color: #002080 } span.charliteral { - color: #008080 + color: #008080 } span.vhdldigit { - color: #ff00ff + color: #ff00ff } span.vhdlchar { - color: #000000 + color: #000000 } span.vhdlkeyword { - color: #700070 + color: #700070 } span.vhdllogic { - color: #ff0000 + color: #ff0000 } blockquote { @@ -406,85 +406,85 @@ blockquote { /* .search { - color: #003399; - font-weight: bold; + color: #003399; + font-weight: bold; } form.search { - margin-bottom: 0px; - margin-top: 0px; + margin-bottom: 0px; + margin-top: 0px; } input.search { - font-size: 75%; - color: #000080; - font-weight: normal; - background-color: #e8eef2; + font-size: 75%; + color: #000080; + font-weight: normal; + background-color: #e8eef2; } */ td.tiny { - font-size: 75%; + font-size: 75%; } .dirtab { - padding: 4px; - border-collapse: collapse; - border: 1px solid #A3B4D7; + padding: 4px; + border-collapse: collapse; + border: 1px solid #A3B4D7; } th.dirtab { - background: #EBEFF6; - font-weight: bold; + background: #EBEFF6; + font-weight: bold; } hr { - height: 0px; - border: none; - border-top: 1px solid #4A6AAA; + height: 0px; + border: none; + border-top: 1px solid #4A6AAA; } hr.footer { - height: 1px; + height: 1px; } /* @group Member Descriptions */ table.memberdecls { - border-spacing: 0px; - padding: 0px; + border-spacing: 0px; + padding: 0px; } .memberdecls td, .fieldtable tr { - -webkit-transition-property: background-color, box-shadow; - -webkit-transition-duration: 0.5s; - -moz-transition-property: background-color, box-shadow; - -moz-transition-duration: 0.5s; - -ms-transition-property: background-color, box-shadow; - -ms-transition-duration: 0.5s; - -o-transition-property: background-color, box-shadow; - -o-transition-duration: 0.5s; - transition-property: background-color, box-shadow; - transition-duration: 0.5s; + -webkit-transition-property: background-color, box-shadow; + -webkit-transition-duration: 0.5s; + -moz-transition-property: background-color, box-shadow; + -moz-transition-duration: 0.5s; + -ms-transition-property: background-color, box-shadow; + -ms-transition-duration: 0.5s; + -o-transition-property: background-color, box-shadow; + -o-transition-duration: 0.5s; + transition-property: background-color, box-shadow; + transition-duration: 0.5s; } .memberdecls td.glow, .fieldtable tr.glow { - background-color: cyan; - box-shadow: 0 0 15px cyan; + background-color: cyan; + box-shadow: 0 0 15px cyan; } .mdescLeft, .mdescRight, .memItemLeft, .memItemRight, .memTemplItemLeft, .memTemplItemRight, .memTemplParams { - background-color: #F9FAFC; - border: none; - margin: 4px; - padding: 1px 0 0 8px; + background-color: #F9FAFC; + border: none; + margin: 4px; + padding: 1px 0 0 8px; } .mdescLeft, .mdescRight { - padding: 0px 8px 4px 8px; - color: #555; + padding: 0px 8px 4px 8px; + color: #555; } .memSeparator { @@ -499,13 +499,13 @@ table.memberdecls { } .memItemRight { - width: 100%; + width: 100%; } .memTemplParams { - color: #4665A2; + color: #4665A2; white-space: nowrap; - font-size: 80%; + font-size: 80%; } /* @end */ @@ -515,19 +515,19 @@ table.memberdecls { /* Styles for detailed member documentation */ .memtitle { - padding: 8px; - border-top: 1px solid #A8B8D9; - border-left: 1px solid #A8B8D9; - border-right: 1px solid #A8B8D9; - border-top-right-radius: 4px; - border-top-left-radius: 4px; - margin-bottom: -1px; - background-image: url('nav_f.png'); - background-repeat: repeat-x; - background-color: #E2E8F2; - line-height: 1.25; - font-weight: 300; - float:left; + padding: 8px; + border-top: 1px solid #A8B8D9; + border-left: 1px solid #A8B8D9; + border-right: 1px solid #A8B8D9; + border-top-right-radius: 4px; + border-top-left-radius: 4px; + margin-bottom: -1px; + background-image: url('nav_f.png'); + background-repeat: repeat-x; + background-color: #E2E8F2; + line-height: 1.25; + font-weight: 300; + float:left; } .permalink @@ -538,29 +538,29 @@ table.memberdecls { } .memtemplate { - font-size: 80%; - color: #4665A2; - font-weight: normal; - margin-left: 9px; + font-size: 80%; + color: #4665A2; + font-weight: normal; + margin-left: 9px; } .memnav { - background-color: #EBEFF6; - border: 1px solid #A3B4D7; - text-align: center; - margin: 2px; - margin-right: 15px; - padding: 2px; + background-color: #EBEFF6; + border: 1px solid #A3B4D7; + text-align: center; + margin: 2px; + margin-right: 15px; + padding: 2px; } .mempage { - width: 100%; + width: 100%; } .memitem { - padding: 0; - margin-bottom: 10px; - margin-right: 5px; + padding: 0; + margin-bottom: 10px; + margin-right: 5px; -webkit-transition: box-shadow 0.5s linear; -moz-transition: box-shadow 0.5s linear; -ms-transition: box-shadow 0.5s linear; @@ -580,7 +580,7 @@ table.memberdecls { } .memname td { - vertical-align: bottom; + vertical-align: bottom; } .memproto, dl.reflist dt { @@ -606,7 +606,7 @@ table.memberdecls { .overload { font-family: "courier new",courier,monospace; - font-size: 65%; + font-size: 65%; } .memdoc, dl.reflist dd { @@ -643,19 +643,19 @@ dl.reflist dd { } .paramkey { - text-align: right; + text-align: right; } .paramtype { - white-space: nowrap; + white-space: nowrap; } .paramname { - color: #602020; - white-space: nowrap; + color: #602020; + white-space: nowrap; } .paramname em { - font-style: normal; + font-style: normal; } .paramname code { line-height: 14px; @@ -682,18 +682,18 @@ dl.reflist dd { } table.mlabels { - border-spacing: 0px; + border-spacing: 0px; } td.mlabels-left { - width: 100%; - padding: 0px; + width: 100%; + padding: 0px; } td.mlabels-right { - vertical-align: bottom; - padding: 0px; - white-space: nowrap; + vertical-align: bottom; + padding: 0px; + white-space: nowrap; } span.mlabels { @@ -706,14 +706,14 @@ span.mlabel { border-left:1px solid #5373B4; border-right:1px solid #C4CFE5; border-bottom:1px solid #C4CFE5; - text-shadow: none; - color: white; - margin-right: 4px; - padding: 2px 3px; - border-radius: 3px; - font-size: 7pt; - white-space: nowrap; - vertical-align: middle; + text-shadow: none; + color: white; + margin-right: 4px; + padding: 2px 3px; + border-radius: 3px; + font-size: 7pt; + white-space: nowrap; + vertical-align: middle; } @@ -736,13 +736,13 @@ div.directory { .directory td { margin: 0px; padding: 0px; - vertical-align: top; + vertical-align: top; } .directory td.entry { white-space: nowrap; padding-right: 6px; - padding-top: 3px; + padding-top: 3px; } .directory td.entry a { @@ -756,18 +756,18 @@ div.directory { .directory td.desc { padding-left: 6px; - padding-right: 6px; - padding-top: 3px; - border-left: 1px solid rgba(0,0,0,0.05); + padding-right: 6px; + padding-top: 3px; + border-left: 1px solid rgba(0,0,0,0.05); } .directory tr.even { - padding-left: 6px; - background-color: #F7F8FB; + padding-left: 6px; + background-color: #F7F8FB; } .directory img { - vertical-align: -30%; + vertical-align: -30%; } .directory .levels { @@ -781,7 +781,7 @@ div.directory { cursor: pointer; padding-left: 2px; padding-right: 2px; - color: #3D578C; + color: #3D578C; } .arrow { @@ -860,40 +860,40 @@ table.directory { div.dynheader { margin-top: 8px; - -webkit-touch-callout: none; - -webkit-user-select: none; - -khtml-user-select: none; - -moz-user-select: none; - -ms-user-select: none; - user-select: none; + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; } address { - font-style: normal; - color: #2A3D61; + font-style: normal; + color: #2A3D61; } table.doxtable caption { - caption-side: top; + caption-side: top; } table.doxtable { - border-collapse:collapse; + border-collapse:collapse; margin-top: 4px; margin-bottom: 4px; } table.doxtable td, table.doxtable th { - border: 1px solid #2D4068; - padding: 3px 7px 2px; + border: 1px solid #2D4068; + padding: 3px 7px 2px; } table.doxtable th { - background-color: #374F7F; - color: #FFFFFF; - font-size: 110%; - padding-bottom: 4px; - padding-top: 5px; + background-color: #374F7F; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; } table.fieldtable { @@ -962,57 +962,57 @@ table.fieldtable { .tabsearch { - top: 0px; - left: 10px; - height: 36px; - background-image: url('tab_b.png'); - z-index: 101; - overflow: hidden; - font-size: 13px; + top: 0px; + left: 10px; + height: 36px; + background-image: url('tab_b.png'); + z-index: 101; + overflow: hidden; + font-size: 13px; } .navpath ul { - font-size: 11px; - background-image:url('tab_b.png'); - background-repeat:repeat-x; - background-position: 0 -5px; - height:30px; - line-height:30px; - color:#8AA0CC; - border:solid 1px #C2CDE4; - overflow:hidden; - margin:0px; - padding:0px; + font-size: 11px; + background-image:url('tab_b.png'); + background-repeat:repeat-x; + background-position: 0 -5px; + height:30px; + line-height:30px; + color:#8AA0CC; + border:solid 1px #C2CDE4; + overflow:hidden; + margin:0px; + padding:0px; } .navpath li { - list-style-type:none; - float:left; - padding-left:10px; - padding-right:15px; - background-image:url('bc_s.png'); - background-repeat:no-repeat; - background-position:right; - color:#364D7C; + list-style-type:none; + float:left; + padding-left:10px; + padding-right:15px; + background-image:url('bc_s.png'); + background-repeat:no-repeat; + background-position:right; + color:#364D7C; } .navpath li.navelem a { - height:32px; - display:block; - text-decoration: none; - outline: none; - color: #283A5D; - font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif; - text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); - text-decoration: none; + height:32px; + display:block; + text-decoration: none; + outline: none; + color: #283A5D; + font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif; + text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); + text-decoration: none; } .navpath li.navelem a:hover { - color:#6884BD; + color:#6884BD; } .navpath li.footer @@ -1031,16 +1031,16 @@ table.fieldtable { div.summary { - float: right; - font-size: 8pt; - padding-right: 5px; - width: 50%; - text-align: right; + float: right; + font-size: 8pt; + padding-right: 5px; + width: 50%; + text-align: right; } div.summary a { - white-space: nowrap; + white-space: nowrap; } table.classindex @@ -1057,28 +1057,28 @@ table.classindex div.ingroups { - font-size: 8pt; - width: 50%; - text-align: left; + font-size: 8pt; + width: 50%; + text-align: left; } div.ingroups a { - white-space: nowrap; + white-space: nowrap; } div.header { background-image:url('nav_h.png'); background-repeat:repeat-x; - background-color: #F9FAFC; - margin: 0px; - border-bottom: 1px solid #C4CFE5; + background-color: #F9FAFC; + margin: 0px; + border-bottom: 1px solid #C4CFE5; } div.headertitle { - padding: 5px 5px 5px 10px; + padding: 5px 5px 5px 10px; } dl @@ -1089,8 +1089,8 @@ dl /* dl.note, dl.warning, dl.attention, dl.pre, dl.post, dl.invariant, dl.deprecated, dl.todo, dl.test, dl.bug */ dl.section { - margin-left: 0px; - padding-left: 0px; + margin-left: 0px; + padding-left: 0px; } dl.note @@ -1150,19 +1150,19 @@ dl.bug } dl.section dd { - margin-bottom: 6px; + margin-bottom: 6px; } #projectlogo { - vertical-align: bottom; - border-collapse: separate; + vertical-align: bottom; + border-collapse: separate; } #projectlogo img { - border: 0px none; + border: 0px none; } #projectalign @@ -1172,31 +1172,31 @@ dl.section dd { #projectname { - font: 300% Tahoma, Arial,sans-serif; - margin: 0px; - padding: 2px 0px; + font: 300% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 2px 0px; } #projectbrief { - font: 120% Tahoma, Arial,sans-serif; - margin: 0px; - padding: 0px; + font: 120% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 0px; } #projectnumber { - font: 50% Tahoma, Arial,sans-serif; - margin: 0px; - padding: 0px; + font: 50% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 0px; } #titlearea { - padding: 0px; - margin: 0px; - width: 100%; - border-bottom: 1px solid #5373B4; + padding: 0px; + margin: 0px; + width: 100%; + border-bottom: 1px solid #5373B4; } .image @@ -1226,12 +1226,12 @@ dl.section dd { .caption { - font-weight: bold; + font-weight: bold; } div.zoom { - border: 1px solid #90A5CE; + border: 1px solid #90A5CE; } dl.citelist { @@ -1272,7 +1272,7 @@ div.toc li { div.toc h3 { font: bold 12px/1.2 Arial,FreeSans,sans-serif; - color: #4665A2; + color: #4665A2; border-bottom: 0 none; margin: 0; } @@ -1303,12 +1303,12 @@ div.toc li.level4 { font-weight: bold; color: gray; cursor: pointer; - -webkit-touch-callout: none; - -webkit-user-select: none; - -khtml-user-select: none; - -moz-user-select: none; - -ms-user-select: none; - user-select: none; + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; } .inherit_header td { @@ -1332,24 +1332,24 @@ tr.heading h2 { } #powerTip { - cursor: default; - white-space: nowrap; - background-color: white; - border: 1px solid gray; - border-radius: 4px 4px 4px 4px; - box-shadow: 1px 1px 7px gray; - display: none; - font-size: smaller; - max-width: 80%; - opacity: 0.9; - padding: 1ex 1em 1em; - position: absolute; - z-index: 2147483647; + cursor: default; + white-space: nowrap; + background-color: white; + border: 1px solid gray; + border-radius: 4px 4px 4px 4px; + box-shadow: 1px 1px 7px gray; + display: none; + font-size: smaller; + max-width: 80%; + opacity: 0.9; + padding: 1ex 1em 1em; + position: absolute; + z-index: 2147483647; } #powerTip div.ttdoc { color: grey; - font-style: italic; + font-style: italic; } #powerTip div.ttname a { @@ -1371,9 +1371,9 @@ tr.heading h2 { } #powerTip:before, #powerTip:after { - content: ""; - position: absolute; - margin: 0px; + content: ""; + position: absolute; + margin: 0px; } #powerTip.n:after, #powerTip.n:before, @@ -1384,115 +1384,115 @@ tr.heading h2 { #powerTip.se:after, #powerTip.se:before, #powerTip.nw:after, #powerTip.nw:before, #powerTip.sw:after, #powerTip.sw:before { - border: solid transparent; - content: " "; - height: 0; - width: 0; - position: absolute; + border: solid transparent; + content: " "; + height: 0; + width: 0; + position: absolute; } #powerTip.n:after, #powerTip.s:after, #powerTip.w:after, #powerTip.e:after, #powerTip.nw:after, #powerTip.ne:after, #powerTip.sw:after, #powerTip.se:after { - border-color: rgba(255, 255, 255, 0); + border-color: rgba(255, 255, 255, 0); } #powerTip.n:before, #powerTip.s:before, #powerTip.w:before, #powerTip.e:before, #powerTip.nw:before, #powerTip.ne:before, #powerTip.sw:before, #powerTip.se:before { - border-color: rgba(128, 128, 128, 0); + border-color: rgba(128, 128, 128, 0); } #powerTip.n:after, #powerTip.n:before, #powerTip.ne:after, #powerTip.ne:before, #powerTip.nw:after, #powerTip.nw:before { - top: 100%; + top: 100%; } #powerTip.n:after, #powerTip.ne:after, #powerTip.nw:after { - border-top-color: #ffffff; - border-width: 10px; - margin: 0px -10px; + border-top-color: #ffffff; + border-width: 10px; + margin: 0px -10px; } #powerTip.n:before { - border-top-color: #808080; - border-width: 11px; - margin: 0px -11px; + border-top-color: #808080; + border-width: 11px; + margin: 0px -11px; } #powerTip.n:after, #powerTip.n:before { - left: 50%; + left: 50%; } #powerTip.nw:after, #powerTip.nw:before { - right: 14px; + right: 14px; } #powerTip.ne:after, #powerTip.ne:before { - left: 14px; + left: 14px; } #powerTip.s:after, #powerTip.s:before, #powerTip.se:after, #powerTip.se:before, #powerTip.sw:after, #powerTip.sw:before { - bottom: 100%; + bottom: 100%; } #powerTip.s:after, #powerTip.se:after, #powerTip.sw:after { - border-bottom-color: #ffffff; - border-width: 10px; - margin: 0px -10px; + border-bottom-color: #ffffff; + border-width: 10px; + margin: 0px -10px; } #powerTip.s:before, #powerTip.se:before, #powerTip.sw:before { - border-bottom-color: #808080; - border-width: 11px; - margin: 0px -11px; + border-bottom-color: #808080; + border-width: 11px; + margin: 0px -11px; } #powerTip.s:after, #powerTip.s:before { - left: 50%; + left: 50%; } #powerTip.sw:after, #powerTip.sw:before { - right: 14px; + right: 14px; } #powerTip.se:after, #powerTip.se:before { - left: 14px; + left: 14px; } #powerTip.e:after, #powerTip.e:before { - left: 100%; + left: 100%; } #powerTip.e:after { - border-left-color: #ffffff; - border-width: 10px; - top: 50%; - margin-top: -10px; + border-left-color: #ffffff; + border-width: 10px; + top: 50%; + margin-top: -10px; } #powerTip.e:before { - border-left-color: #808080; - border-width: 11px; - top: 50%; - margin-top: -11px; + border-left-color: #808080; + border-width: 11px; + top: 50%; + margin-top: -11px; } #powerTip.w:after, #powerTip.w:before { - right: 100%; + right: 100%; } #powerTip.w:after { - border-right-color: #ffffff; - border-width: 10px; - top: 50%; - margin-top: -10px; + border-right-color: #ffffff; + border-width: 10px; + top: 50%; + margin-top: -10px; } #powerTip.w:before { - border-right-color: #808080; - border-width: 11px; - top: 50%; - margin-top: -11px; + border-right-color: #808080; + border-width: 11px; + top: 50%; + margin-top: -11px; } @media print @@ -1518,77 +1518,77 @@ tr.heading h2 { /* table.markdownTable { - border-collapse:collapse; + border-collapse:collapse; margin-top: 4px; margin-bottom: 4px; } table.markdownTable td, table.markdownTable th { - border: 1px solid #2D4068; - padding: 3px 7px 2px; + border: 1px solid #2D4068; + padding: 3px 7px 2px; } table.markdownTableHead tr { } table.markdownTableBodyLeft td, table.markdownTable th { - border: 1px solid #2D4068; - padding: 3px 7px 2px; + border: 1px solid #2D4068; + padding: 3px 7px 2px; } th.markdownTableHeadLeft th.markdownTableHeadRight th.markdownTableHeadCenter th.markdownTableHeadNone { - background-color: #374F7F; - color: #FFFFFF; - font-size: 110%; - padding-bottom: 4px; - padding-top: 5px; + background-color: #374F7F; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; } th.markdownTableHeadLeft { - text-align: left + text-align: left } th.markdownTableHeadRight { - text-align: right + text-align: right } th.markdownTableHeadCenter { - text-align: center + text-align: center } */ table.markdownTable { - border-collapse:collapse; + border-collapse:collapse; margin-top: 4px; margin-bottom: 4px; } table.markdownTable td, table.markdownTable th { - border: 1px solid #2D4068; - padding: 3px 7px 2px; + border: 1px solid #2D4068; + padding: 3px 7px 2px; } table.markdownTable tr { } th.markdownTableHeadLeft, th.markdownTableHeadRight, th.markdownTableHeadCenter, th.markdownTableHeadNone { - background-color: #374F7F; - color: #FFFFFF; - font-size: 110%; - padding-bottom: 4px; - padding-top: 5px; + background-color: #374F7F; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; } th.markdownTableHeadLeft, td.markdownTableBodyLeft { - text-align: left + text-align: left } th.markdownTableHeadRight, td.markdownTableBodyRight { - text-align: right + text-align: right } th.markdownTableHeadCenter, td.markdownTableBodyCenter { - text-align: center + text-align: center } diff --git a/docs/cpp/source/notes/tensor_creation.rst b/docs/cpp/source/notes/tensor_creation.rst index 8d0fbcccb26990..86c98b9f28caa4 100644 --- a/docs/cpp/source/notes/tensor_creation.rst +++ b/docs/cpp/source/notes/tensor_creation.rst @@ -145,10 +145,10 @@ allowed values for these axes at the moment are: .. tip:: - There exist "Rust-style" shorthands for dtypes, like ``kF32`` instead of - ``kFloat32``. See `here - `_ - for the full list. + There exist "Rust-style" shorthands for dtypes, like ``kF32`` instead of + ``kFloat32``. See `here + `_ + for the full list. An instance of ``TensorOptions`` stores a concrete value for each of these @@ -314,8 +314,8 @@ we can convert it from ``int64`` to ``float32``: .. attention:: - The result of the conversion, ``float_tensor``, is a new tensor pointing to - new memory, unrelated to the source ``source_tensor``. + The result of the conversion, ``float_tensor``, is a new tensor pointing to + new memory, unrelated to the source ``source_tensor``. We can then move it from CPU memory to GPU memory: diff --git a/docs/make.bat b/docs/make.bat index a4dc68ef02765f..aa4a7a4141260e 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -5,7 +5,7 @@ pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build + set SPHINXBUILD=sphinx-build ) set SOURCEDIR=source set BUILDDIR=build @@ -15,15 +15,15 @@ if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% diff --git a/docs/source/jit.rst b/docs/source/jit.rst index 53c2ff44e3e7e3..332173896f1ee1 100644 --- a/docs/source/jit.rst +++ b/docs/source/jit.rst @@ -770,34 +770,34 @@ Interpreting Graphs The example script above produces the graph:: - graph(%len : int) { - %15 : int = prim::Constant[value=1]() - %9 : bool = prim::Constant[value=1]() - %7 : Device = prim::Constant[value="cpu"]() - %6 : int = prim::Constant[value=0]() - %5 : int = prim::Constant[value=6]() - %1 : int = prim::Constant[value=3]() - %2 : int = prim::Constant[value=4]() - %11 : int = prim::Constant[value=10]() - %14 : float = prim::Constant[value=1]() - %4 : int[] = prim::ListConstruct(%1, %2) - %rv.1 : Tensor = aten::zeros(%4, %5, %6, %7) - %rv : Tensor = prim::Loop(%len, %9, %rv.1) - block0(%i : int, %13 : Tensor) { - %12 : bool = aten::lt(%i, %11) - %rv.4 : Tensor = prim::If(%12) - block0() { - %rv.2 : Tensor = aten::sub(%13, %14, %15) - -> (%rv.2) - } - block1() { - %rv.3 : Tensor = aten::add(%13, %14, %15) - -> (%rv.3) - } - -> (%9, %rv.4) - } - return (%rv); - } + graph(%len : int) { + %15 : int = prim::Constant[value=1]() + %9 : bool = prim::Constant[value=1]() + %7 : Device = prim::Constant[value="cpu"]() + %6 : int = prim::Constant[value=0]() + %5 : int = prim::Constant[value=6]() + %1 : int = prim::Constant[value=3]() + %2 : int = prim::Constant[value=4]() + %11 : int = prim::Constant[value=10]() + %14 : float = prim::Constant[value=1]() + %4 : int[] = prim::ListConstruct(%1, %2) + %rv.1 : Tensor = aten::zeros(%4, %5, %6, %7) + %rv : Tensor = prim::Loop(%len, %9, %rv.1) + block0(%i : int, %13 : Tensor) { + %12 : bool = aten::lt(%i, %11) + %rv.4 : Tensor = prim::If(%12) + block0() { + %rv.2 : Tensor = aten::sub(%13, %14, %15) + -> (%rv.2) + } + block1() { + %rv.3 : Tensor = aten::add(%13, %14, %15) + -> (%rv.3) + } + -> (%9, %rv.4) + } + return (%rv); + } Take the instruction ``%rv.1 : Dynamic = aten::zeros(%3, %4, %5, %6)`` for @@ -850,39 +850,39 @@ Automatic Trace Checking traced = torch.jit.trace(loop_in_traced_fn, inputs, check_inputs=check_inputs) Gives us the following diagnostic information:: - ERROR: Graphs differed across invocations! - Graph diff:: - - graph(%x : Tensor) { - %1 : int = prim::Constant[value=0]() - %2 : int = prim::Constant[value=0]() - %result.1 : Tensor = aten::select(%x, %1, %2) - %4 : int = prim::Constant[value=0]() - %5 : int = prim::Constant[value=0]() - %6 : Tensor = aten::select(%x, %4, %5) - %result.2 : Tensor = aten::mul(%result.1, %6) - %8 : int = prim::Constant[value=0]() - %9 : int = prim::Constant[value=1]() - %10 : Tensor = aten::select(%x, %8, %9) - - %result : Tensor = aten::mul(%result.2, %10) - + %result.3 : Tensor = aten::mul(%result.2, %10) - ? ++ - %12 : int = prim::Constant[value=0]() - %13 : int = prim::Constant[value=2]() - %14 : Tensor = aten::select(%x, %12, %13) - + %result : Tensor = aten::mul(%result.3, %14) - + %16 : int = prim::Constant[value=0]() - + %17 : int = prim::Constant[value=3]() - + %18 : Tensor = aten::select(%x, %16, %17) - - %15 : Tensor = aten::mul(%result, %14) - ? ^ ^ - + %19 : Tensor = aten::mul(%result, %18) - ? ^ ^ - - return (%15); - ? ^ - + return (%19); - ? ^ - } + ERROR: Graphs differed across invocations! + Graph diff:: + + graph(%x : Tensor) { + %1 : int = prim::Constant[value=0]() + %2 : int = prim::Constant[value=0]() + %result.1 : Tensor = aten::select(%x, %1, %2) + %4 : int = prim::Constant[value=0]() + %5 : int = prim::Constant[value=0]() + %6 : Tensor = aten::select(%x, %4, %5) + %result.2 : Tensor = aten::mul(%result.1, %6) + %8 : int = prim::Constant[value=0]() + %9 : int = prim::Constant[value=1]() + %10 : Tensor = aten::select(%x, %8, %9) + - %result : Tensor = aten::mul(%result.2, %10) + + %result.3 : Tensor = aten::mul(%result.2, %10) + ? ++ + %12 : int = prim::Constant[value=0]() + %13 : int = prim::Constant[value=2]() + %14 : Tensor = aten::select(%x, %12, %13) + + %result : Tensor = aten::mul(%result.3, %14) + + %16 : int = prim::Constant[value=0]() + + %17 : int = prim::Constant[value=3]() + + %18 : Tensor = aten::select(%x, %16, %17) + - %15 : Tensor = aten::mul(%result, %14) + ? ^ ^ + + %19 : Tensor = aten::mul(%result, %18) + ? ^ ^ + - return (%15); + ? ^ + + return (%19); + ? ^ + } This message indicates to us that the computation differed between when @@ -912,19 +912,19 @@ Automatic Trace Checking Which produces:: - graph(%x : Tensor) { - %5 : bool = prim::Constant[value=1]() - %1 : int = prim::Constant[value=0]() - %result.1 : Tensor = aten::select(%x, %1, %1) - %4 : int = aten::size(%x, %1) - %result : Tensor = prim::Loop(%4, %5, %result.1) - block0(%i : int, %7 : Tensor) { - %10 : Tensor = aten::select(%x, %1, %i) - %result.2 : Tensor = aten::mul(%7, %10) - -> (%5, %result.2) - } - return (%result); - } + graph(%x : Tensor) { + %5 : bool = prim::Constant[value=1]() + %1 : int = prim::Constant[value=0]() + %result.1 : Tensor = aten::select(%x, %1, %1) + %4 : int = aten::size(%x, %1) + %result : Tensor = prim::Loop(%4, %5, %result.1) + block0(%i : int, %7 : Tensor) { + %10 : Tensor = aten::select(%x, %1, %i) + %result.2 : Tensor = aten::mul(%7, %10) + -> (%5, %result.2) + } + return (%result); + } Tracer Warnings ^^^^^^^^^^^^^^^ diff --git a/docs/source/notes/windows.rst b/docs/source/notes/windows.rst index e9f7acd305899a..26f1167fa72c83 100644 --- a/docs/source/notes/windows.rst +++ b/docs/source/notes/windows.rst @@ -213,8 +213,8 @@ Multiprocessing error without if-clause protection .. code-block:: py3tb RuntimeError: - An attempt has been made to start a new process before the - current process has finished its bootstrapping phase. + An attempt has been made to start a new process before the + current process has finished its bootstrapping phase. This probably means that you are not using fork to start your child processes and you have forgotten to use the proper idiom diff --git a/tools/pytorch.version b/tools/pytorch.version index a7e3617967eeb5..3488ccfc56b1df 100644 --- a/tools/pytorch.version +++ b/tools/pytorch.version @@ -8,24 +8,24 @@ PyInit*; init*; state; - _ZGVZN2at*; + _ZGVZN2at*; _ZN2at*; - _ZNK2at*Type*; - _ZNK2at*Tensor*; - _ZNK2at*Storage*; - _ZNK2at*Scalar*; - _ZNK2at*CUDA*; - *2at7Context*; - _ZTIN2at*; - _ZTIZN2at*; - _ZTSN2at*; - _ZTSPN2at*; - _ZTSZN2at*; - _ZTVN2at*; - _ZZN2at*; - _Z*torch*; - _Z*Tensor*; - _Z*tensor*; + _ZNK2at*Type*; + _ZNK2at*Tensor*; + _ZNK2at*Storage*; + _ZNK2at*Scalar*; + _ZNK2at*CUDA*; + *2at7Context*; + _ZTIN2at*; + _ZTIZN2at*; + _ZTSN2at*; + _ZTSPN2at*; + _ZTSZN2at*; + _ZTVN2at*; + _ZZN2at*; + _Z*torch*; + _Z*Tensor*; + _Z*tensor*; local: *; }; diff --git a/torch/csrc/api/include/torch/nn/modules/conv.h b/torch/csrc/api/include/torch/nn/modules/conv.h index 38cf9b64269917..4e8a3ff281b37c 100644 --- a/torch/csrc/api/include/torch/nn/modules/conv.h +++ b/torch/csrc/api/include/torch/nn/modules/conv.h @@ -18,9 +18,9 @@ struct ConvOptions { int64_t input_channels, int64_t output_channels, ExpandingArray kernel_size) : - input_channels_(input_channels), - output_channels_(output_channels), - kernel_size_(std::move(kernel_size)) {} + input_channels_(input_channels), + output_channels_(output_channels), + kernel_size_(std::move(kernel_size)) {} /// The number of channels the input volumes will have. /// Changing this parameter after construction __has no effect__. diff --git a/torch/csrc/jit/README.md b/torch/csrc/jit/README.md index 0a403bde94f6a6..f917c136ba451d 100644 --- a/torch/csrc/jit/README.md +++ b/torch/csrc/jit/README.md @@ -370,21 +370,21 @@ As the trace runs, individual operators create Nodes in the Graph being traced t torch::jit::Node* node = nullptr; std::shared_ptr tracer_state; if (jit::tracer::isTracing()) { - tracer_state = jit::tracer::getTracingState(); - at::Symbol op_name; - op_name = jit::Symbol::fromQualString("aten::__ilshift__"); - node = tracer_state->graph->create(op_name, /*num_outputs=*/0); - jit::tracer::recordSourceLocation(node); - jit::tracer::addInputs(node, "self", self); - jit::tracer::addInputs(node, "other", other); - tracer_state->graph->insertNode(node); - - jit::tracer::setTracingState(nullptr); + tracer_state = jit::tracer::getTracingState(); + at::Symbol op_name; + op_name = jit::Symbol::fromQualString("aten::__ilshift__"); + node = tracer_state->graph->create(op_name, /*num_outputs=*/0); + jit::tracer::recordSourceLocation(node); + jit::tracer::addInputs(node, "self", self); + jit::tracer::addInputs(node, "other", other); + tracer_state->graph->insertNode(node); + + jit::tracer::setTracingState(nullptr); } TypeDefault::__ilshift__(self, other); if (tracer_state) { - jit::tracer::setTracingState(std::move(tracer_state)); - jit::tracer::addOutput(node, self); + jit::tracer::setTracingState(std::move(tracer_state)); + jit::tracer::addOutput(node, self); } ``` @@ -412,15 +412,15 @@ Our frontends produce ASTs in the form of Tree objects. Trees are similar to [s- ``` (- - (+ - (variable (ident x)) - (variable (ident y))) - (apply - (. - (variable (ident z)) - (ident sigmoid)) - (list) - (list)))) + (+ + (variable (ident x)) + (variable (ident y))) + (apply + (. + (variable (ident z)) + (ident sigmoid)) + (list) + (list)))) ``` This is printed in s-expression style with `(kind ...)` representing compound trees and `string_value` representing strings. @@ -454,16 +454,16 @@ The typical way to traverse a tree is to `switch` on the kind and then construct ```cpp switch (tree.kind()) { case TK_VAR: - auto var = Var(tree); // construct tree-view - return environment_stack->getSugaredVar(var.name()); + auto var = Var(tree); // construct tree-view + return environment_stack->getSugaredVar(var.name()); case '.': { - auto select = Select(tree); // construct tree-view - auto sv = emitSugaredExpr(select.value(), 1); - return sv->attr(select.range(), method, select.selector().name()); + auto select = Select(tree); // construct tree-view + auto sv = emitSugaredExpr(select.value(), 1); + return sv->attr(select.range(), method, select.selector().name()); } case TK_APPLY: { - auto apply = Apply(tree); // construct tree-view - return emitApplyExpr(apply, n_binders); + auto apply = Apply(tree); // construct tree-view + return emitApplyExpr(apply, n_binders); } break; ``` @@ -507,7 +507,7 @@ Tokens are either keywords (`def`), operators (`+`), literals (`3.4`), or identi ```cpp if (lexer.nextIf('+')) { - // handle + ... + // handle + ... } ``` @@ -650,10 +650,10 @@ using Operation = std::function; // schema: example_add(Tensor a, Tensor b) -> Tensor int example_add(Stack& stack) { - Tensor a, b; - // stack before: ? ? ? a b <- back - pop(stack, a, b); //Templated helper function - // that pops a, b and converts them to tensor + Tensor a, b; + // stack before: ? ? ? a b <- back + pop(stack, a, b); //Templated helper function + // that pops a, b and converts them to tensor push(stack, a + b); // stack after: // ? ? ? c <- back @@ -1126,7 +1126,7 @@ As a more involved example, the following TorchScript snippet: ```python @torch.jit.script def foo(a : Tensor, b : Tensor): - c = 2 * b + c = 2 * b a += 1 if a.max() > 4: r = a[0] diff --git a/torch/csrc/utils/pybind.h b/torch/csrc/utils/pybind.h index 54bb1c2ea0c383..0184449d565e0d 100644 --- a/torch/csrc/utils/pybind.h +++ b/torch/csrc/utils/pybind.h @@ -71,15 +71,15 @@ template<> struct type_caster { auto size = tuple ? PyTuple_GET_SIZE(source) : PyList_GET_SIZE(source); v_value.resize(size); for (int idx = 0; idx < size; idx++) { - PyObject* obj = tuple ? PyTuple_GET_ITEM(source, idx) : PyList_GET_ITEM(source, idx); - if (THPVariable_Check(obj)) { - v_value[idx] = THPVariable_Unpack(obj).item(); - } else if (PyLong_Check(obj)) { - // use THPUtils_unpackLong after it is safe to include python_numbers.h - v_value[idx] = THPUtils_unpackLong(obj); - } else { - return false; - } + PyObject* obj = tuple ? PyTuple_GET_ITEM(source, idx) : PyList_GET_ITEM(source, idx); + if (THPVariable_Check(obj)) { + v_value[idx] = THPVariable_Unpack(obj).item(); + } else if (PyLong_Check(obj)) { + // use THPUtils_unpackLong after it is safe to include python_numbers.h + v_value[idx] = THPUtils_unpackLong(obj); + } else { + return false; + } } value = v_value; return true; diff --git a/torch/csrc/utils/tensor_numpy.cpp b/torch/csrc/utils/tensor_numpy.cpp index 675c41c2a7f2ee..235c0cfc447f47 100644 --- a/torch/csrc/utils/tensor_numpy.cpp +++ b/torch/csrc/utils/tensor_numpy.cpp @@ -195,7 +195,7 @@ ScalarType numpy_dtype_to_aten(int dtype) { bool is_numpy_scalar(PyObject* obj) { return (PyArray_IsIntegerScalar(obj) || - PyArray_IsScalar(obj, Floating)); + PyArray_IsScalar(obj, Floating)); } }} // namespace torch::utils