From: Hyung-Kyu Choi Date: Thu, 29 Mar 2018 07:33:12 +0000 (+0900) Subject: Introduce CPU operation of NN runtime X-Git-Tag: 0.1~515 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=804f8a002d19b65abab23ed623a68cb5bb82fbb5;p=platform%2Fcore%2Fml%2Fnnfw.git Introduce CPU operation of NN runtime - Introduce CPU operation of NN runtime - Also introduce eigen and gemmlowp in depend/external Signed-off-by: Hyung-Kyu Choi --- diff --git a/src/runtime/ref/nn/common/CMakeLists.txt b/src/runtime/ref/nn/common/CMakeLists.txt index 4223f7f..5a97f04 100644 --- a/src/runtime/ref/nn/common/CMakeLists.txt +++ b/src/runtime/ref/nn/common/CMakeLists.txt @@ -1,5 +1,6 @@ SET (CUR_INCS ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/operations ${CMAKE_CURRENT_SOURCE_DIR} ) SET (INC_DIRS @@ -12,6 +13,7 @@ SET (CUR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/CpuExecutor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/OperationsUtils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/operations/Activation.cpp ) SET (SRCS ${SRCS} diff --git a/src/runtime/ref/nn/common/include/ActivationFunctor.h b/src/runtime/ref/nn/common/include/ActivationFunctor.h new file mode 100644 index 0000000..7e6600e --- /dev/null +++ b/src/runtime/ref/nn/common/include/ActivationFunctor.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_ACTIVATION_FUNCTOR_H +#define ANDROID_ML_NN_ACTIVATION_FUNCTOR_H + +#if 0 // REF-ANN +#include "android/log.h" +#endif + +#include +#include +#include +#include + +enum ActivationFn { + kActivationNone = 0, + kActivationRelu, + kActivationRelu1, + kActivationRelu6, + kActivationTanh, + kActivationSignBit, + kActivationSigmoid, +}; + +class ActivationFunctor { + public: + explicit ActivationFunctor(ActivationFn act) : act_(act) {} + + float operator()(float a) const { + switch (act_) { + case kActivationNone: + return a; + case kActivationRelu: + return a < 0.f ? 0.f : a; + case kActivationRelu6: + return std::max(0.f, std::min(a, 6.f)); + case kActivationTanh: + return std::tanh(a); + case kActivationSigmoid: + return 1.0f / (1.0f + std::exp(-a)); + default: +#if 0 // REF-ANN + __android_log_print(ANDROID_LOG_ERROR, "NN API", + "Invalid enum value for activation function: 0x%0X", + act_); +#endif + exit(1); + } + } + + private: + ActivationFn act_; +}; + +#endif // ANDROID_ML_NN_ACTIVATION_FUNCTOR_H diff --git a/src/runtime/ref/nn/common/operations/Activation.cpp b/src/runtime/ref/nn/common/operations/Activation.cpp new file mode 100644 index 0000000..b40b34b --- /dev/null +++ b/src/runtime/ref/nn/common/operations/Activation.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Operations.h" +#include "OperationsUtils.h" + +#include "internal/optimized/optimized_ops.h" + +// TODO-NNRT: There was no inlcude "ActivationFunctor.h" in Android NN. +// This may be included from some other header files. +#include "ActivationFunctor.h" + +namespace android { +namespace nn { + +bool reluFloat32(const float* inputData, const Shape& inputShape, + float* outputData, const Shape& outputShape) { + int numElements = getNumberOfElements(inputShape); + for (int i=0; i dim; + if (getNumberOfDimensions(inputShape) == 2) { + uint32_t batch_size = getSizeOfDimension(inputShape, 0); + uint32_t input_size = getNumberOfElements(inputShape) / batch_size; + + Shape shapeIn4D; + shapeIn4D.dimensions = {batch_size, 1, 1, input_size}; + dim = convertShapeToDims(shapeIn4D); + } else if (getNumberOfDimensions(inputShape) == 4) { + dim = convertShapeToDims(inputShape); + } else { + LOG(ERROR) << "only 2D and 4D tensors supported"; + return false; + } + + optimized_ops::Softmax(inputData, dim, beta, + outputData, dim); + return true; +} + +#define ANDROID_NN_RELUX_QUANT8(activation) \ + int numElements = getNumberOfElements(inputShape); \ + int32_t output_activation_min = 0; \ + int32_t output_activation_max = 0; \ + \ + CalculateActivationRangeUint8(activation, inputShape, \ + &output_activation_min, \ + &output_activation_max); \ + \ + for (int i=0; i(1 << (31 - kInputIntegerBits)); + + int32_t input_multiplier = 0; + int32_t input_left_shift = 0; + if (!QuantizeMultiplierGreaterThanOne(input_real_multiplier, + &input_multiplier, + &input_left_shift)) { + return false; + } + int32_t input_range_radius = + CalculateInputRadius(kInputIntegerBits, input_left_shift); + + optimized_ops::Logistic( + inputData, convertShapeToDims(inputShape), + inputShape.offset, input_range_radius, + input_multiplier, input_left_shift, + outputData, convertShapeToDims(outputShape)); + + return true; +} + +bool softmaxQuant8(const uint8_t* inputData, const Shape& inputShape, + const float beta, + uint8_t* outputData, const Shape& outputShape) { + Dims<4> dim; + if (getNumberOfDimensions(inputShape) == 2) { + uint32_t batch_size = getSizeOfDimension(inputShape, 0); + uint32_t input_size = getNumberOfElements(inputShape) / batch_size; + + Shape shapeIn4D; + shapeIn4D.dimensions = {batch_size, 1, 1, input_size}; + dim = convertShapeToDims(shapeIn4D); + } else if (getNumberOfDimensions(inputShape) == 4) { + dim = convertShapeToDims(inputShape); + } else { + LOG(ERROR) << "only 2D and 4D tensors supported"; + return false; + } + + if (outputShape.offset != 0 || outputShape.scale != 1.f / 256) { + LOG(ERROR) << "incorrect scale / offset for output"; + return false; + } + + static const int32_t kScaledDiffIntegerBits = 5; + const double input_beta_real_multiplier = std::min( + 1.0 * beta * inputShape.scale * (1 << (31 - kScaledDiffIntegerBits)), + (1ll << 31) - 1.0); + + int32_t input_multiplier = 0; + int32_t input_left_shift = 0; + if (!QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, + &input_multiplier, + &input_left_shift)) { + return false; + } + float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, + input_left_shift); + + optimized_ops::Softmax(inputData, dim, input_multiplier, + input_left_shift, diff_min, + outputData, dim); + return true; +} + + +} // namespace nn +} // namespace android diff --git a/src/runtime/ref/nn/common/operations/internal/common.h b/src/runtime/ref/nn/common/operations/internal/common.h new file mode 100644 index 0000000..c54ee6e --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/common.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMMON_H_ +#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMMON_H_ + +#ifndef USE_NEON +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define USE_NEON +#include +#endif +#endif + +#include "gemmlowp.h" +#include "types.h" + +namespace android { +namespace nn { + +template +struct ActivationFunctionImpl {}; + +template <> +struct ActivationFunctionImpl { + static float Eval(float x) { return x; } +}; + +template <> +struct ActivationFunctionImpl { + static float Eval(float x) { return x < 0.f ? 0.f : x; } +}; + +template <> +struct ActivationFunctionImpl { + static float Eval(float x) { return x > 1.f ? 1.f : x < -1.f ? -1.f : x; } +}; + +template <> +struct ActivationFunctionImpl { + static float Eval(float x) { return x > 6.f ? 6.f : x < 0.f ? 0.f : x; } +}; + +template +float ActivationFunction(float x) { + return ActivationFunctionImpl::Eval(x); +} + +inline int32 MultiplyByQuantizedMultiplierSmallerThanOne( + int32 x, int32 quantized_multiplier, int right_shift) { + using gemmlowp::RoundingDivideByPOT; + using gemmlowp::SaturatingRoundingDoublingHighMul; + return RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(x, quantized_multiplier), right_shift); +} + +inline int32 MultiplyByQuantizedMultiplierGreaterThanOne( + int32 x, int32 quantized_multiplier, int left_shift) { + using gemmlowp::SaturatingRoundingDoublingHighMul; + return SaturatingRoundingDoublingHighMul(x * (1 << left_shift), + quantized_multiplier); +} + +} // namespace nn +} // namespace android + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMMON_H_ diff --git a/src/runtime/ref/nn/common/operations/internal/compatibility.h b/src/runtime/ref/nn/common/operations/internal/compatibility.h new file mode 100644 index 0000000..9d9cdb7 --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/compatibility.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMPATIBILITY_H_ +#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMPATIBILITY_H_ + +#ifndef ANDROID_ML_NN_COMPATIBILITY +#define ANDROID_ML_NN_COMPATIBILITY + +#include +#include + +#ifndef DCHECK +#define DCHECK(condition) (condition) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_EQ +#define DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_GE +#define DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_GT +#define DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_LE +#define DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : assert(false) +#endif + +#ifndef DCHECK_LT +#define DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : assert(false) +#endif + +#ifndef CHECK_EQ +#define CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : assert(false) +#endif + +using uint8 = std::uint8_t; +using int16 = std::int16_t; +using uint16 = std::uint16_t; +using int32 = std::int32_t; +using uint32 = std::uint32_t; + +#endif + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_COMPATIBILITY_H_ diff --git a/src/runtime/ref/nn/common/operations/internal/optimized/cpu_check.h b/src/runtime/ref/nn/common/operations/internal/optimized/cpu_check.h new file mode 100644 index 0000000..02f42fd --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/optimized/cpu_check.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_CPU_CHECK_ +#define FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_CPU_CHECK_ + +// NEON_OR_PORTABLE(SomeFunc, arcs) calls NeonSomeFunc(args) if NEON is +// enabled at build time, or PortableSomeFunc(args) otherwise. +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__) +#else +#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__) +#endif + +#endif // FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_CPU_CHECK_ diff --git a/src/runtime/ref/nn/common/operations/internal/optimized/neon_tensor_utils.cc b/src/runtime/ref/nn/common/operations/internal/optimized/neon_tensor_utils.cc new file mode 100644 index 0000000..f17730f --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/optimized/neon_tensor_utils.cc @@ -0,0 +1,216 @@ +/* + * Copyright 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "ActivationFunctor.h" +#include "tensor_utils_impl.h" + +#ifdef USE_NEON + +#include +#define kFloatWeightsPerNeonLane 4 + +namespace android { +namespace nn { +namespace tensor_utils { + +void NeonMatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result, + int result_stride) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + m_cols - (m_cols & (kFloatWeightsPerNeonLane - 1)); + + // The arrays used to cache the vector. + float32x4_t* vector_cache_float32x4 = + new float32x4_t[(m_cols / kFloatWeightsPerNeonLane) * + sizeof(float32x4_t)]; + + for (int b = 0; b < n_batch; b++) { + float* result_in_batch = result + b * m_rows; + const float* vector_in_batch = vector + b * m_cols; + const float* matrix_ptr = matrix; + for (int c = 0; c < postamble_start; c += kFloatWeightsPerNeonLane) { + vector_cache_float32x4[c >> 2] = vld1q_f32(vector_in_batch + c); + } + for (int r = 0; r < m_rows; r++) { + float32x4_t acc_32x4 = vmovq_n_f32(0.0); + for (int c = 0; c < postamble_start; c += kFloatWeightsPerNeonLane) { + float32x4_t temp = vector_cache_float32x4[c >> 2]; + // Load 4 float values from vector1 and vector2 and accumulator. + float32x4_t v1_f32x4 = vld1q_f32(matrix_ptr + c); + // Vector multiply-accumulate 4 float + acc_32x4 = vmlaq_f32(acc_32x4, v1_f32x4, temp); + } + // Add the 4 intermediate sum values to get the final dot-prod value for + // this column. + *result_in_batch += + (vgetq_lane_f32(acc_32x4, 0) + vgetq_lane_f32(acc_32x4, 1) + + vgetq_lane_f32(acc_32x4, 2) + vgetq_lane_f32(acc_32x4, 3)); + for (int c = postamble_start; c < m_cols; c++) { + *result_in_batch += matrix_ptr[c] * vector_in_batch[c]; + } + matrix_ptr += m_cols; + result_in_batch += result_stride; + } + } + delete[] vector_cache_float32x4; +} + +void NeonVectorVectorCwiseProduct(const float* vector1, const float* vector2, + int v_size, float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load 4 float values from vector1 and vector2. + float32x4_t v1_f32x4 = vld1q_f32(vector1 + v); + float32x4_t v2_f32x4 = vld1q_f32(vector2 + v); + // Vector multiply 4 float + float32x4_t mul_32x4 = vmulq_f32(v1_f32x4, v2_f32x4); + // Save to result array. + vst1q_f32(&result[v], mul_32x4); + } + for (int v = postamble_start; v < v_size; v++) { + result[v] = vector1[v] * vector2[v]; + } +} + +void NeonVectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, int v_size, + float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load 4 float values from vector1 and vector2 and accumulator. + float32x4_t v1_f32x4 = vld1q_f32(vector1 + v); + float32x4_t v2_f32x4 = vld1q_f32(vector2 + v); + float32x4_t acc_32x4 = vld1q_f32(result + v); + // Vector multiply-accumulate 4 float + acc_32x4 = vmlaq_f32(acc_32x4, v1_f32x4, v2_f32x4); + // Save to result array. + vst1q_f32(&result[v], acc_32x4); + } + for (int v = postamble_start; v < v_size; v++) { + result[v] += vector1[v] * vector2[v]; + } +} + +void NeonVectorBatchVectorCwiseProductAccumulate(const float* vector, + int v_size, + const float* batch_vector, + int n_batch, float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + + // The arrays used to cache the vector. + float32x4_t* vector_cache_float32x4 = + new float32x4_t[(v_size / kFloatWeightsPerNeonLane) * + sizeof(float32x4_t)]; + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + vector_cache_float32x4[v >> 2] = vld1q_f32(vector + v); + } + + float* result_ptr = result; + const float* batch_vector_ptr = batch_vector; + for (int b = 0; b < n_batch; b++) { + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load from memory to vectors. + float32x4_t result_f32x4 = vld1q_f32(result_ptr + v); + float32x4_t batch_vector_f32x4 = vld1q_f32(batch_vector_ptr + v); + // Multiply-accumulate. + result_f32x4 = vmlaq_f32(result_f32x4, batch_vector_f32x4, + vector_cache_float32x4[v >> 2]); + // Store. + vst1q_f32(result_ptr + v, result_f32x4); + } + // Postamble loop + for (int v = postamble_start; v < v_size; v++) { + result_ptr[v] += vector[v] * batch_vector_ptr[v]; + } + // Update the pointers. + result_ptr += v_size; + batch_vector_ptr += v_size; + } + delete[] vector_cache_float32x4; +} + +void NeonSub1Vector(const float* vector, int v_size, float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + + float32x4_t one_f32x4 = vmovq_n_f32(1.0); + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load 4 float values from the current pointers of the input column and + // subtract from 1. + float32x4_t v_f32x4 = vld1q_f32(vector + v); + float32x4_t result_f32x4 = vsubq_f32(one_f32x4, v_f32x4); + // Save to output. + vst1q_f32(result + v, result_f32x4); + } + for (int v = postamble_start; v < v_size; v++) { + result[v] = 1.0f - vector[v]; + } +} + +void NeonClipVector(const float* vector, int v_size, float abs_limit, + float* result) { + // If v_size is not divisible by kWeightsPerNeonLane, we cannot use the main + // vectorized loop, and we need to process sequentially. postamble_start shows + // the start index where this should happen. + const int postamble_start = + v_size - (v_size & (kFloatWeightsPerNeonLane - 1)); + + // Replicate abs_limit and -abs_limit in two vectors. + const float32x4_t abs_limit_f32x4 = vmovq_n_f32(abs_limit); + const float32x4_t neg_abs_limit_f32x4 = vmovq_n_f32(-abs_limit); + + for (int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane) { + // Load from memory to vector. + float32x4_t v_f32x4 = vld1q_f32(vector + v); + // Clip between abs_limit and -abs_limit. + float32x4_t result_f32x4 = vminq_f32(abs_limit_f32x4, v_f32x4); + result_f32x4 = vmaxq_f32(neg_abs_limit_f32x4, result_f32x4); + // Save to output. + vst1q_f32(result + v, result_f32x4); + } + // Postamble loop. + for (int v = postamble_start; v < v_size; v++) { + result[v] = (abs_limit < vector[v]) ? abs_limit : vector[v]; + result[v] = (-abs_limit > result[v]) ? -abs_limit : result[v]; + } +} + +} // namespace tensor_utils +} // namespace nn +} // namespace android + +#endif // USE_NEON diff --git a/src/runtime/ref/nn/common/operations/internal/optimized/neon_tensor_utils.h b/src/runtime/ref/nn/common/operations/internal/optimized/neon_tensor_utils.h new file mode 100644 index 0000000..92a151c --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/optimized/neon_tensor_utils.h @@ -0,0 +1,118 @@ +/* + * Copyright 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_NEON_TENSOR_UTILS_H_ +#define FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_NEON_TENSOR_UTILS_H_ + +#include "ActivationFunctor.h" +#include "cpu_check.h" +#include "tensor_utils_impl.h" + +namespace android { +namespace nn { +namespace tensor_utils { + +void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result, + int result_stride) { + NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, + vector, n_batch, result, result_stride); +} + +void VectorVectorCwiseProduct(const float* vector1, const float* vector2, + int v_size, float* result) { + NEON_OR_PORTABLE(VectorVectorCwiseProduct, vector1, vector2, v_size, result); +} + +void VectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, int v_size, + float* result) { + NEON_OR_PORTABLE(VectorVectorCwiseProductAccumulate, vector1, vector2, v_size, + result); +} + +void VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size, + const float* batch_vector, + int n_batch, float* result) { + NEON_OR_PORTABLE(VectorBatchVectorCwiseProductAccumulate, vector, v_size, + batch_vector, n_batch, result); +} + +float VectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size) { + return PortableVectorVectorDotProduct(vector1, vector2, v_size); +} + +void BatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride) { + PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch, + result, result_stride); +} + +void VectorBatchVectorAssign(const float* vector, int v_size, int n_batch, + float* batch_vector) { + PortableVectorBatchVectorAssign(vector, v_size, n_batch, batch_vector); +} + +void ApplySigmoidToVector(const float* vector, int v_size, float* result) { + PortableApplySigmoidToVector(vector, v_size, result); +} + +void ApplyActivationToVector(const float* vector, int v_size, + ActivationFn activation, float* result) { + PortableApplyActivationToVector(vector, v_size, activation, result); +} + +void CopyVector(const float* vector, int v_size, float* result) { + PortableCopyVector(vector, v_size, result); +} + +void Sub1Vector(const float* vector, int v_size, float* result) { + NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result); +} + +void ZeroVector(float* vector, int v_size) { + PortableZeroVector(vector, v_size); +} + +float Clip(float f, float abs_limit) { return PortableClip(f, abs_limit); } + +void ClipVector(const float* vector, int v_size, float abs_limit, + float* result) { + NEON_OR_PORTABLE(ClipVector, vector, v_size, abs_limit, result); +} + +// TODO(ghodrat): Implement Neon version. +void VectorShiftLeft(float* vector, int v_size, float shift_value) { + PortableVectorShiftLeft(vector, v_size, shift_value); +} + +// TODO(ghodrat): Implement Neon version. +void ReductionSumVector(const float* input_vector, int input_stride, + float* output_vector, int output_size, + int reduction_size) { + PortableReductionSumVector(input_vector, input_stride, output_vector, + output_size, reduction_size); +} + +} // namespace tensor_utils +} // namespace nn +} // namespace android + +#endif // FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_NEON_TENSOR_UTILS_H_ diff --git a/src/runtime/ref/nn/common/operations/internal/optimized/optimized_ops.h b/src/runtime/ref/nn/common/operations/internal/optimized/optimized_ops.h new file mode 100644 index 0000000..5bbec3d --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/optimized/optimized_ops.h @@ -0,0 +1,2712 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_ +#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Eigen/Core" +#include "fixedpoint.h" +#include "gemmlowp.h" +#include "../common.h" +#include "../types.h" + +namespace android { +namespace nn { +namespace optimized_ops { + +// Make a local VectorMap typedef allowing to map a float array +// as a Eigen vector expression. The std::conditional here is to +// construct the suitable Eigen type for the constness of the +// data. Indeed, for const data, we need to produce +// Eigen::Map> +// and not the more straightforward +// Eigen::Map> +template +using VectorMap = typename std::conditional< + std::is_const::value, + Eigen::Map::type, + Eigen::Dynamic, 1>>, + Eigen::Map>>::type; + +template +VectorMap MapAsVector(Scalar* data, const Dims& dims) { + const int size = RequiredBufferSizeForDims(dims); + return VectorMap(data, size, 1); +} + +// Make a local VectorMap typedef allowing to map a float array +// as a Eigen matrix expression. The same explanation as for VectorMap +// above also applies here. +template +using MatrixMap = typename std::conditional< + std::is_const::value, + Eigen::Map::type, + Eigen::Dynamic, Eigen::Dynamic>>, + Eigen::Map>>::type; + +template +MatrixMap MapAsMatrixWithFirstDimAsRows(Scalar* data, + const Dims& dims) { + const int rows = dims.sizes[0]; + int cols = 1; + for (int d = 1; d < N; d++) { + cols *= dims.sizes[d]; + } + return MatrixMap(data, rows, cols); +} + +template +MatrixMap MapAsMatrixWithLastDimAsCols(Scalar* data, + const Dims& dims) { + const int cols = dims.sizes[N - 1]; + int rows = 1; + for (int d = 0; d < N - 1; d++) { + rows *= dims.sizes[d]; + } + return MatrixMap(data, rows, cols); +} + +template +using ArrayMap = typename std::conditional< + std::is_const::value, + Eigen::Map::type, + Eigen::Dynamic, Eigen::Dynamic>>, + Eigen::Map>>::type; + +template +ArrayMap MapAsArrayWithFirstDimAsRows(Scalar* data, + const Dims& dims) { + const int rows = dims.sizes[0]; + int cols = 1; + for (int d = 1; d < N; d++) { + cols *= dims.sizes[d]; + } + return ArrayMap(data, rows, cols); +} + +// TODO(b/62193649): this function is only needed as long +// as we have the --variable_batch hack. +template +MatrixMap MapAsMatrixWithGivenNumberOfRows(Scalar* data, + const Dims& dims, + int rows) { + int cols = 1; + bool matched_rows = false; + for (int d = 0; d < N; d++) { + cols *= dims.sizes[d]; + if (cols == rows) { + matched_rows = true; + cols = 1; + } + } + DCHECK(matched_rows); + return MatrixMap(data, rows, cols); +} + +// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING ELEMENT-WISE +// BROADCASTING. +// +// NdArrayDesc describes the shape and memory layout of an N-dimensional +// rectangular array of numbers. +// +// NdArrayDesc is basically identical to Dims defined in types.h. +// However, as Dims is to be deprecated, this class exists as an adaptor +// to enable simple unoptimized implementations of element-wise broadcasting +// operations. +template +struct NdArrayDesc { + // The "extent" of each dimension. Indices along dimension d must be in the + // half-open interval [0, extents[d]). + int extents[N]; + + // The number of *elements* (not bytes) between consecutive indices of each + // dimension. + int strides[N]; +}; + +// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING +// ELEMENT-WISE BROADCASTING. +// +// Same as Offset(), except takes as NdArrayDesc instead of Dims. +inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2, + int i3) { + DCHECK(i0 >= 0 && i0 < desc.extents[0]); + DCHECK(i1 >= 0 && i1 < desc.extents[1]); + DCHECK(i2 >= 0 && i2 < desc.extents[2]); + DCHECK(i3 >= 0 && i3 < desc.extents[3]); + return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + + i3 * desc.strides[3]; +} + +// Given the dimensions of the operands for an element-wise binary broadcast, +// adjusts them so that they can be directly iterated over with simple loops. +// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and +// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr. +// +// This function assumes that the two input shapes are compatible up to +// broadcasting and the shorter one has already been prepended with 1s to be the +// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64), +// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that +// Dims refer to shapes in reverse order. In this case, input0_dims will be +// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1). +// +// When two shapes are compatible up to broadcasting, for each dimension d, +// the input extents are either equal, or one of them is 1. +// +// This function performs the following for each dimension d: +// - If the extents are equal, then do nothing since the loop that walks over +// both of the input arrays is correct. +// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1 +// and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows +// array0 to be referenced *at any index* in dimension d and still access the +// same slice. +template +inline void NdArrayDescsForElementwiseBroadcast(const Dims& input0_dims, + const Dims& input1_dims, + NdArrayDesc* desc0_out, + NdArrayDesc* desc1_out) { + DCHECK(desc0_out != nullptr); + DCHECK(desc1_out != nullptr); + + // Copy dims to desc. + for (int i = 0; i < N; ++i) { + desc0_out->extents[i] = input0_dims.sizes[i]; + desc0_out->strides[i] = input0_dims.strides[i]; + desc1_out->extents[i] = input1_dims.sizes[i]; + desc1_out->strides[i] = input1_dims.strides[i]; + } + + // Walk over each dimension. If the extents are equal do nothing. + // Otherwise, set the desc with extent 1 to have extent equal to the other and + // stride 0. + for (int i = 0; i < N; ++i) { + const int extent0 = ArraySize(input0_dims, i); + const int extent1 = ArraySize(input1_dims, i); + if (extent0 != extent1) { + if (extent0 == 1) { + desc0_out->strides[i] = 0; + desc0_out->extents[i] = extent1; + } else { + DCHECK_EQ(extent1, 1); + desc1_out->strides[i] = 0; + desc1_out->extents[i] = extent0; + } + } + } +} + +#ifdef USE_NEON +template +void AddBiasAndEvalActivationFunction(const float* bias_data, + const Dims<4>& bias_dims, + float* array_data, + const Dims<4>& array_dims) { + gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction"); + const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3]; + const int array_size = array_dims.sizes[3] * array_dims.strides[3]; + DCHECK_EQ((array_size % bias_size), 0); + float* array_ptr = array_data; + float* array_end_ptr = array_ptr + array_size; + const auto zero = vdupq_n_f32(0); + const auto six = vdupq_n_f32(6); + const auto neg_one = vdupq_n_f32(-1); + const auto one = vdupq_n_f32(1); + for (; array_ptr != array_end_ptr; array_ptr += bias_size) { + int i = 0; + for (; i <= bias_size - 16; i += 16) { + auto b0 = vld1q_f32(bias_data + i); + auto b1 = vld1q_f32(bias_data + i + 4); + auto b2 = vld1q_f32(bias_data + i + 8); + auto b3 = vld1q_f32(bias_data + i + 12); + auto a0 = vld1q_f32(array_ptr + i); + auto a1 = vld1q_f32(array_ptr + i + 4); + auto a2 = vld1q_f32(array_ptr + i + 8); + auto a3 = vld1q_f32(array_ptr + i + 12); + auto x0 = vaddq_f32(a0, b0); + auto x1 = vaddq_f32(a1, b1); + auto x2 = vaddq_f32(a2, b2); + auto x3 = vaddq_f32(a3, b3); + if (Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6) { + x0 = vmaxq_f32(zero, x0); + x1 = vmaxq_f32(zero, x1); + x2 = vmaxq_f32(zero, x2); + x3 = vmaxq_f32(zero, x3); + if (Ac == FusedActivationFunctionType::kRelu6) { + x0 = vminq_f32(six, x0); + x1 = vminq_f32(six, x1); + x2 = vminq_f32(six, x2); + x3 = vminq_f32(six, x3); + } + } else if (Ac == FusedActivationFunctionType::kRelu1) { + x0 = vmaxq_f32(neg_one, x0); + x1 = vmaxq_f32(neg_one, x1); + x2 = vmaxq_f32(neg_one, x2); + x3 = vmaxq_f32(neg_one, x3); + x0 = vminq_f32(one, x0); + x1 = vminq_f32(one, x1); + x2 = vminq_f32(one, x2); + x3 = vminq_f32(one, x3); + } + vst1q_f32(array_ptr + i, x0); + vst1q_f32(array_ptr + i + 4, x1); + vst1q_f32(array_ptr + i + 8, x2); + vst1q_f32(array_ptr + i + 12, x3); + } + for (; i <= bias_size - 4; i += 4) { + auto b = vld1q_f32(bias_data + i); + auto a = vld1q_f32(array_ptr + i); + auto x = vaddq_f32(a, b); + if (Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6) { + x = vmaxq_f32(zero, x); + if (Ac == FusedActivationFunctionType::kRelu6) { + x = vminq_f32(six, x); + } + } else if (Ac == FusedActivationFunctionType::kRelu1) { + x = vmaxq_f32(neg_one, x); + x = vminq_f32(one, x); + } + vst1q_f32(array_ptr + i, x); + } + for (; i < bias_size; i++) { + array_ptr[i] = ActivationFunction(array_ptr[i] + bias_data[i]); + } + } +} +#else // not NEON +template +void AddBiasAndEvalActivationFunction(const float* bias_data, + const Dims<4>& bias_dims, + float* array_data, + const Dims<4>& array_dims) { + gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction"); + const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3]; + const int array_size = array_dims.sizes[3] * array_dims.strides[3]; + DCHECK_EQ((array_size % bias_size), 0); + for (int array_offset = 0; array_offset < array_size; + array_offset += bias_size) { + for (int i = 0; i < bias_size; i++) { + array_data[array_offset + i] = + ActivationFunction(array_data[array_offset + i] + bias_data[i]); + } + } +} +#endif + +template +void Gemm(const Eigen::MatrixBase& lhs, const Eigen::MatrixBase& rhs, + Eigen::MatrixBase* result) { + if (rhs.cols() == 1) { + gemmlowp::ScopedProfilingLabel label("GEMV"); + result->col(0).noalias() = lhs * rhs.col(0); + } else { + gemmlowp::ScopedProfilingLabel label("GEMM"); + result->noalias() = lhs * rhs; + } +} + +template +void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, const Dims<4>& weights_dims, + const float* bias_data, const Dims<4>& bias_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("FullyConnected"); + // TODO(b/62193649): this convoluted shape computation (determining + // input_rows from the weights_dims, then MapAsMatrixWithGivenNumberOfRows) + // is because the current --variable_batch hack consists in overwriting the + // 3rd dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + // When that is fixed, this should become: + // const auto input_matrix_map = + // MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + const int input_rows = ArraySize(weights_dims, 0); + const auto input_matrix_map = + MapAsMatrixWithGivenNumberOfRows(input_data, input_dims, input_rows); + const auto filter_matrix_map = + MapAsMatrixWithFirstDimAsRows(weights_data, weights_dims); + auto output_matrix_map = + MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map); + AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data, + output_dims); +} + +inline void preload_l1_stream(const uint8* ptr) { +#ifdef GEMMLOWP_ARM_64 + asm volatile("prfm pldl1strm, [%[ptr]]\n" ::[ptr] "r"(ptr) :); +#else + gemmlowp::Prefetch(ptr); +#endif +} + +#ifdef USE_NEON +template +void FullyConnectedAsGEMV(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("FullyConnectedAsGEMV/8bit"); + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + DCHECK(IsPackedWithoutStrides(input_dims)); + DCHECK(IsPackedWithoutStrides(filter_dims)); + DCHECK(IsPackedWithoutStrides(bias_dims)); + DCHECK(IsPackedWithoutStrides(output_dims)); + DCHECK_EQ(ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3), + 1); + const int input_size = input_dims.strides[3]; + const int output_size = MatchingArraySize(filter_dims, 1, output_dims, 0); + static constexpr int kPeel = 4; + for (int k = 0; k < input_size; k += 64) { + preload_l1_stream(input_data + k); + } + for (int k = 0; k < kPeel * input_size; k += 64) { + preload_l1_stream(filter_data + k); + } + DCHECK(!(output_size % kPeel)); + const int32* bias_ptr = bias_data; + uint8* output_ptr = output_data; + for (int out = 0; out < output_size; out += kPeel) { + int32x4_t acc[kPeel]; + for (int k = 0; k < kPeel; k++) { + acc[k] = vdupq_n_s32(0); + } + const int16x8_t input_offset_vec = vdupq_n_s16(input_offset); + const int16x8_t filter_offset_vec = vdupq_n_s16(filter_offset); + int in = 0; + for (; in <= input_size - 16; in += 16) { + const uint8x16_t input_val_u8 = vld1q_u8(input_data + in); + uint8x16_t filter_val_u8[kPeel]; + for (int k = 0; k < kPeel; k++) { + const uint8* filter_ptr = filter_data + in + (out + k) * input_size; + filter_val_u8[k] = vld1q_u8(filter_ptr); + preload_l1_stream(filter_ptr + 64); + } + int16x8_t input_val[2]; + const uint8x8_t low = vget_low_u8(input_val_u8); + const uint8x8_t high = vget_high_u8(input_val_u8); + input_val[0] = vreinterpretq_s16_u16(vmovl_u8(low)); + input_val[1] = vreinterpretq_s16_u16(vmovl_u8(high)); + input_val[0] = vaddq_s16(input_val[0], input_offset_vec); + input_val[1] = vaddq_s16(input_val[1], input_offset_vec); + int16x8_t filter_val[kPeel][2]; + for (int k = 0; k < kPeel; k++) { + const uint8x8_t low = vget_low_u8(filter_val_u8[k]); + const uint8x8_t high = vget_high_u8(filter_val_u8[k]); + filter_val[k][0] = vreinterpretq_s16_u16(vmovl_u8(low)); + filter_val[k][1] = vreinterpretq_s16_u16(vmovl_u8(high)); + filter_val[k][0] = vaddq_s16(filter_val[k][0], filter_offset_vec); + filter_val[k][1] = vaddq_s16(filter_val[k][1], filter_offset_vec); + } + for (int p = 0; p < 2; p++) { + for (int k = 0; k < kPeel; k++) { + acc[k] = vmlal_s16(acc[k], vget_low_s16(filter_val[k][p]), + vget_low_s16(input_val[p])); + } + for (int k = 0; k < kPeel; k++) { + acc[k] = vmlal_s16(acc[k], vget_high_s16(filter_val[k][p]), + vget_high_s16(input_val[p])); + } + } + } + for (; in <= input_size - 8; in += 8) { + const uint8x8_t input_val_u8 = vld1_u8(input_data + in); + uint8x8_t filter_val_u8[kPeel]; + for (int k = 0; k < kPeel; k++) { + const uint8* filter_ptr = filter_data + in + (out + k) * input_size; + filter_val_u8[k] = vld1_u8(filter_ptr); + } + int16x8_t input_val; + input_val = vreinterpretq_s16_u16(vmovl_u8(input_val_u8)); + input_val = vaddq_s16(input_val, input_offset_vec); + int16x8_t filter_val[kPeel]; + for (int k = 0; k < kPeel; k++) { + filter_val[k] = vreinterpretq_s16_u16(vmovl_u8(filter_val_u8[k])); + filter_val[k] = vaddq_s16(filter_val[k], filter_offset_vec); + } + for (int k = 0; k < kPeel; k++) { + acc[k] = vmlal_s16(acc[k], vget_low_s16(filter_val[k]), + vget_low_s16(input_val)); + } + for (int k = 0; k < kPeel; k++) { + acc[k] = vmlal_s16(acc[k], vget_high_s16(filter_val[k]), + vget_high_s16(input_val)); + } + } + if (in < input_size) { + int32 buf[4 * kPeel]; + for (int k = 0; k < 4; k++) { + vst1q_s32(buf + 4 * k, acc[k]); + } + for (; in < input_size; in++) { + int lane = (in + 8 - input_size) % 4; + const int32 input_val = input_data[in] + input_offset; + for (int k = 0; k < kPeel; k++) { + int32 filter_val = + filter_data[in + (out + k) * input_size] + filter_offset; + buf[lane + 4 * k] += filter_val * input_val; + } + } + for (int k = 0; k < 4; k++) { + acc[k] = vld1q_s32(buf + 4 * k); + } + } + + // Horizontally reduce accumulators + int32x2_t pairwise_reduced_acc[kPeel]; + for (int k = 0; k < kPeel; k++) { + pairwise_reduced_acc[k] = + vpadd_s32(vget_low_s32(acc[k]), vget_high_s32(acc[k])); + } + static_assert(kPeel == 4, "the code below currently assumes kPeel = 4"); + const int32x2_t reduced_lo = + vpadd_s32(pairwise_reduced_acc[0], pairwise_reduced_acc[1]); + const int32x2_t reduced_hi = + vpadd_s32(pairwise_reduced_acc[2], pairwise_reduced_acc[3]); + int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); + // Add bias values. + int32x4_t bias_vec = vld1q_s32(bias_ptr); + bias_ptr += 4; + reduced = vaddq_s32(reduced, bias_vec); + // Multiply by the fixed-point multiplier. + reduced = vqrdmulhq_n_s32(reduced, output_multiplier); + // Rounding-shift-right. + using gemmlowp::RoundingDivideByPOT; + reduced = RoundingDivideByPOT(reduced, output_shift); + // Add the output offset. + const int32x4_t output_offset_vec = vdupq_n_s32(output_offset); + reduced = vaddq_s32(reduced, output_offset_vec); + // Narrow values down to 16 bit signed. + const int16x4_t res16 = vqmovn_s32(reduced); + // Narrow values down to 8 bit unsigned, saturating. + uint8x8_t res8 = vqmovun_s16(vcombine_s16(res16, res16)); + if (Ac != FusedActivationFunctionType::kNone) { + // Apply the clamping from the activation function + res8 = vmax_u8(res8, vdup_n_u8(output_activation_min)); + res8 = vmin_u8(res8, vdup_n_u8(output_activation_max)); + } + // Store results to destination. Assumes 32bit alignment. + vst1_lane_u32(reinterpret_cast(output_ptr), + vreinterpret_u32_u8(res8), 0); + output_ptr += kPeel; + } +} +#endif // USE_NEON + +template +struct GemmlowpOutputPipeline { + typedef gemmlowp::VectorMap + ColVectorMap; + typedef std::tuple< + gemmlowp::OutputStageBiasAddition, + gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint, + gemmlowp::OutputStageClamp, gemmlowp::OutputStageSaturatingCastToUint8> + Pipeline; + static Pipeline Make(const int32* bias_data, int output_rows, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max) { + ColVectorMap bias_vector(bias_data, output_rows); + gemmlowp::OutputStageBiasAddition bias_addition_stage; + bias_addition_stage.bias_vector = bias_vector; + gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint + quantize_down_stage; + quantize_down_stage.result_offset_after_shift = output_offset; + quantize_down_stage.result_fixedpoint_multiplier = output_multiplier; + quantize_down_stage.result_shift = output_shift; + gemmlowp::OutputStageClamp clamp_stage; + clamp_stage.min = output_activation_min; + clamp_stage.max = output_activation_max; + gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; + return std::make_tuple(bias_addition_stage, quantize_down_stage, + clamp_stage, saturating_cast_stage); + } +}; + +template <> +struct GemmlowpOutputPipeline { + typedef gemmlowp::VectorMap + ColVectorMap; + typedef std::tuple< + gemmlowp::OutputStageBiasAddition, + gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint, + gemmlowp::OutputStageSaturatingCastToUint8> + Pipeline; + static Pipeline Make(const int32* bias_data, int output_rows, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max) { + DCHECK_EQ(output_activation_min, 0); + DCHECK_EQ(output_activation_max, 255); + ColVectorMap bias_vector(bias_data, output_rows); + gemmlowp::OutputStageBiasAddition bias_addition_stage; + bias_addition_stage.bias_vector = bias_vector; + gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint + quantize_down_stage; + quantize_down_stage.result_offset_after_shift = output_offset; + quantize_down_stage.result_fixedpoint_multiplier = output_multiplier; + quantize_down_stage.result_shift = output_shift; + gemmlowp::OutputStageSaturatingCastToUint8 saturating_cast_stage; + return std::make_tuple(bias_addition_stage, quantize_down_stage, + saturating_cast_stage); + } +}; + +template +void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label("FullyConnected/8bit"); + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + // TODO: This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3); +#ifdef USE_NEON + const int output_size = MatchingArraySize(filter_dims, 1, output_dims, 0); + if (batches == 1 && !(output_size % 4)) { + return FullyConnectedAsGEMV( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, output_data, + output_dims); + } +#endif // USE_NEON + const int filter_rows = filter_dims.sizes[1]; + const int filter_cols = filter_dims.sizes[0]; + DCHECK_EQ(filter_dims.sizes[2], 1); + DCHECK_EQ(filter_dims.sizes[3], 1); + const int output_rows = output_dims.sizes[0]; + DCHECK_EQ(output_rows, filter_rows); + DCHECK_EQ(bias_dims.sizes[0], output_rows); + DCHECK_EQ(bias_dims.sizes[1], 1); + DCHECK_EQ(bias_dims.sizes[2], 1); + DCHECK_EQ(bias_dims.sizes[3], 1); + + gemmlowp::MatrixMap filter_matrix( + filter_data, output_rows, filter_cols, filter_cols); + gemmlowp::MatrixMap input_matrix( + input_data, filter_cols, batches, filter_cols); + gemmlowp::MatrixMap output_matrix( + output_data, output_rows, batches, output_rows); + const auto& output_pipeline = GemmlowpOutputPipeline::Make( + bias_data, output_rows, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max); + gemmlowp::GemmWithOutputPipeline( + gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, + input_offset, output_pipeline); +} + +template +inline void ExtractPatchIntoBufferColumn( + const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth, + int stride_width, int stride_height, int pad_width, int pad_height, + int in_width, int in_height, int in_depth, int single_buffer_length, + int buffer_id, const T* in_data, T* conv_buffer_data, uint8 byte_zero) { + gemmlowp::ScopedProfilingLabel label("ExtractPatchIntoBufferColumn"); + // This chunk of code reshapes all the inputs corresponding to + // output (b, h, w) to a column vector in conv_buffer(:, buffer_id). + const int kwidth_times_indepth = kwidth * in_depth; + const int inwidth_times_indepth = in_width * in_depth; + const int ih_ungated_start = h * stride_height - pad_height; + const int ih_ungated_end = (ih_ungated_start + kheight); + const int ih_end = std::min(ih_ungated_end, in_height); + const int iw_ungated_start = w * stride_width - pad_width; + const int iw_ungated_end = (iw_ungated_start + kwidth); + const int iw_end = std::min(iw_ungated_end, in_width); + // If the patch is off the edge of the input image, skip writing those rows + // and columns from the patch into the output array. + const int h_offset = std::max(0, -ih_ungated_start); + const int w_offset = std::max(0, -iw_ungated_start); + const int ih_start = std::max(0, ih_ungated_start); + const int iw_start = std::max(0, iw_ungated_start); + const int single_row_num = + std::min(kwidth - w_offset, in_width - iw_start) * in_depth; + const int output_row_offset = (buffer_id * single_buffer_length); + int out_offset = + output_row_offset + (h_offset * kwidth + w_offset) * in_depth; + int in_offset = Offset(input_dims, 0, iw_start, ih_start, b); + + // Express all of the calculations as padding around the input patch. + const int top_padding = h_offset; + const int bottom_padding = (ih_ungated_end - ih_end); + const int left_padding = w_offset; + const int right_padding = (iw_ungated_end - iw_end); + assert(single_row_num == + ((kwidth - (left_padding + right_padding)) * in_depth)); + + // Write out zeroes to the elements representing the top rows of the input + // patch that are off the edge of the input image. + if (top_padding > 0) { + const int top_row_elements = (top_padding * kwidth * in_depth); + memset(conv_buffer_data + output_row_offset, byte_zero, + (top_row_elements * sizeof(T))); + } + + // If the patch is on the interior of the input image horizontally, just copy + // over the rows sequentially, otherwise add zero padding at the start or end. + if ((left_padding == 0) && (right_padding == 0)) { + for (int ih = ih_start; ih < ih_end; ++ih) { + memcpy(conv_buffer_data + out_offset, in_data + in_offset, + single_row_num * sizeof(T)); + out_offset += kwidth_times_indepth; + in_offset += inwidth_times_indepth; + } + } else { + for (int ih = ih_start; ih < ih_end; ++ih) { + if (left_padding > 0) { + const int left_start = (out_offset - (left_padding * in_depth)); + memset(conv_buffer_data + left_start, byte_zero, + (left_padding * in_depth * sizeof(T))); + } + memcpy(conv_buffer_data + out_offset, in_data + in_offset, + single_row_num * sizeof(T)); + if (right_padding > 0) { + const int right_start = (out_offset + single_row_num); + memset(conv_buffer_data + right_start, byte_zero, + (right_padding * in_depth * sizeof(T))); + } + out_offset += kwidth_times_indepth; + in_offset += inwidth_times_indepth; + } + } + + // If the bottom of the patch falls off the input image, pad the values + // representing those input rows with zeroes. + if (bottom_padding > 0) { + const int bottom_row_elements = (bottom_padding * kwidth * in_depth); + const int bottom_start = + output_row_offset + + ((top_padding + (ih_end - ih_start)) * kwidth * in_depth); + memset(conv_buffer_data + bottom_start, byte_zero, + (bottom_row_elements * sizeof(T))); + } +} + +template +void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width, + int stride_height, int pad_width, int pad_height, int kheight, + int kwidth, uint8 byte_zero, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Im2col"); + DCHECK(IsPackedWithoutStrides(input_dims)); + DCHECK(IsPackedWithoutStrides(output_dims)); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_depth = ArraySize(input_dims, 0); + const int input_width = ArraySize(input_dims, 1); + const int input_height = ArraySize(input_dims, 2); + const int output_depth = ArraySize(output_dims, 0); + const int output_width = ArraySize(output_dims, 1); + const int output_height = ArraySize(output_dims, 2); + + int buffer_id = 0; + // Loop over the output nodes. + for (int b = 0; b < batches; ++b) { + for (int h = 0; h < output_height; ++h) { + for (int w = 0; w < output_width; ++w) { + ExtractPatchIntoBufferColumn( + input_dims, w, h, b, kheight, kwidth, stride_width, stride_height, + pad_width, pad_height, input_width, input_height, input_depth, + output_depth, buffer_id, input_data, output_data, byte_zero); + ++buffer_id; + } + } + } +} + +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + (void)im2col_data; + (void)im2col_dims; + gemmlowp::ScopedProfilingLabel label("Conv"); + + const float* gemm_input_data = nullptr; + const Dims<4>* gemm_input_dims = nullptr; + const int filter_width = ArraySize(filter_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const bool need_im2col = stride_width != 1 || stride_height != 1 || + filter_width != 1 || filter_height != 1; + if (need_im2col) { + DCHECK(im2col_data); + Im2col(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_height, filter_width, 0, im2col_data, + im2col_dims); + gemm_input_data = im2col_data; + gemm_input_dims = &im2col_dims; + } else { + DCHECK(!im2col_data); + gemm_input_data = input_data; + gemm_input_dims = &input_dims; + } + + const auto im2col_matrix_map = + MapAsMatrixWithFirstDimAsRows(gemm_input_data, *gemm_input_dims); + const auto filter_matrix_map = + MapAsMatrixWithLastDimAsCols(filter_data, filter_dims); + auto output_matrix_map = + MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map); + + AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data, + output_dims); +} + +template +void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int pad_width, int pad_height, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label("Conv/8bit"); + + DCHECK(IsPackedWithoutStrides(input_dims)); + DCHECK(IsPackedWithoutStrides(filter_dims)); + DCHECK(IsPackedWithoutStrides(output_dims)); + + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + + const uint8* gemm_input_data = nullptr; + const Dims<4>* gemm_input_dims = nullptr; + const int filter_width = ArraySize(filter_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const bool need_im2col = stride_width != 1 || stride_height != 1 || + filter_width != 1 || filter_height != 1; + if (need_im2col) { + DCHECK(im2col_data); + const int input_zero_point = -input_offset; + DCHECK_GE(input_zero_point, 0); + DCHECK_LE(input_zero_point, 255); + Im2col(input_data, input_dims, stride_width, stride_height, pad_width, + pad_height, filter_height, filter_width, input_zero_point, + im2col_data, im2col_dims); + gemm_input_data = im2col_data; + gemm_input_dims = &im2col_dims; + } else { + DCHECK(!im2col_data); + gemm_input_data = input_data; + gemm_input_dims = &input_dims; + } + + const int gemm_input_rows = gemm_input_dims->sizes[0]; + const int gemm_input_cols = gemm_input_dims->sizes[1] * + gemm_input_dims->sizes[2] * + gemm_input_dims->sizes[3]; + const int filter_rows = filter_dims.sizes[3]; + const int filter_cols = + filter_dims.sizes[0] * filter_dims.sizes[1] * filter_dims.sizes[2]; + const int output_rows = output_dims.sizes[0]; + const int output_cols = + output_dims.sizes[1] * output_dims.sizes[2] * output_dims.sizes[3]; + DCHECK_EQ(output_rows, filter_rows); + DCHECK_EQ(output_cols, gemm_input_cols); + DCHECK_EQ(filter_cols, gemm_input_rows); + DCHECK_EQ(bias_dims.sizes[0], output_rows); + DCHECK_EQ(bias_dims.sizes[1], 1); + DCHECK_EQ(bias_dims.sizes[2], 1); + DCHECK_EQ(bias_dims.sizes[3], 1); + gemmlowp::MatrixMap filter_matrix( + filter_data, filter_rows, filter_cols); + gemmlowp::MatrixMap input_matrix( + gemm_input_data, gemm_input_rows, gemm_input_cols); + gemmlowp::MatrixMap output_matrix( + output_data, output_rows, output_cols); + const auto& output_pipeline = GemmlowpOutputPipeline::Make( + bias_data, output_rows, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max); + gemmlowp::GemmWithOutputPipeline( + gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, + input_offset, output_pipeline); +} + +template +inline void DepthToSpace(const T* input_data, const Dims<4>& input_dims, + int block_size, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("DepthToSpace"); + + const int input_depth = ArraySize(input_dims, 0); + const int input_width = ArraySize(input_dims, 1); + const int input_height = ArraySize(input_dims, 2); + + const int output_depth = ArraySize(output_dims, 0); + const int batch_size = ArraySize(output_dims, 3); + + // Number of continuous values that we can copy in one interation. + const int stride = block_size * output_depth; + + for (int batch = 0; batch < batch_size; ++batch) { + for (int in_h = 0; in_h < input_height; ++in_h) { + const T* input_ptr = input_data + Offset(input_dims, 0, 0, in_h, batch); + for (int offset_h = 0; offset_h < block_size; ++offset_h) { + const T* src = input_ptr; + for (int in_w = 0; in_w < input_width; ++in_w) { + memcpy(output_data, src, stride * sizeof(T)); + output_data += stride; + src += input_depth; + } + input_ptr += stride; + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void Im2col(const T* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int kheight, int kwidth, + uint8 byte_zero, T* output_data, const Dims<4>& output_dims) { + Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight, + kwidth, byte_zero, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void ConvAsGemm(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("ConvAsGemm"); + + const auto input_matrix_map = + MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + const auto filter_matrix_map = + MapAsMatrixWithLastDimAsCols(filter_data, filter_dims); + auto output_matrix_map = + MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map); + + AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data, + output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label("ConvAsGemm/8bit"); + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + const int input_rows = input_dims.sizes[0]; + const int input_cols = + input_dims.sizes[1] * input_dims.sizes[2] * input_dims.sizes[3]; + const int filter_rows = filter_dims.sizes[3]; + const int filter_cols = + filter_dims.sizes[0] * filter_dims.sizes[1] * filter_dims.sizes[2]; + const int output_rows = output_dims.sizes[0]; + const int output_cols = + output_dims.sizes[1] * output_dims.sizes[2] * output_dims.sizes[3]; + DCHECK_EQ(output_rows, filter_rows); + DCHECK_EQ(output_cols, input_cols); + DCHECK_EQ(filter_cols, input_rows); + DCHECK_EQ(bias_dims.sizes[0], output_rows); + DCHECK_EQ(bias_dims.sizes[1], 1); + DCHECK_EQ(bias_dims.sizes[2], 1); + DCHECK_EQ(bias_dims.sizes[3], 1); + gemmlowp::MatrixMap filter_matrix( + filter_data, output_rows, filter_cols, filter_cols); + gemmlowp::MatrixMap input_matrix( + input_data, filter_cols, output_cols, filter_cols); + gemmlowp::MatrixMap output_matrix( + output_data, output_rows, output_cols, output_rows); + const auto& output_pipeline = GemmlowpOutputPipeline::Make( + bias_data, output_rows, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max); + gemmlowp::GemmWithOutputPipeline( + gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, + input_offset, output_pipeline); +} + +template +inline void SpaceToDepth(const T* input_data, const Dims<4>& input_dims, + int block_size, T* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("SpaceToDepth"); + + const int output_depth = ArraySize(output_dims, 0); + const int output_width = ArraySize(output_dims, 1); + const int output_height = ArraySize(output_dims, 2); + + const int input_depth = ArraySize(input_dims, 0); + const int batch_size = ArraySize(input_dims, 3); + + // Number of continuous values that we can copy in one interation. + const int stride = block_size * input_depth; + + for (int batch = 0; batch < batch_size; ++batch) { + for (int out_h = 0; out_h < output_height; ++out_h) { + T* output_ptr = output_data + Offset(output_dims, 0, 0, out_h, batch); + for (int offset_h = 0; offset_h < block_size; ++offset_h) { + T* dst = output_ptr; + for (int out_w = 0; out_w < output_width; ++out_w) { + memcpy(dst, input_data, stride * sizeof(T)); + input_data += stride; + dst += output_depth; + } + output_ptr += stride; + } + } + } +} + +template +void NonGlobalBatchNormalization( + const float* input_data, const Dims<4>& input_dims, const float* mean_data, + const Dims<4>& mean_dims, const float* multiplier_data, + const Dims<4>& multiplier_dims, const float* offset_data, + const Dims<4>& offset_dims, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("NonGlobalBatchNormalization"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = + MatchingArraySize(input_dims, 2, mean_dims, 2, multiplier_dims, 2, + offset_dims, 2, output_dims, 2); + const int width = + MatchingArraySize(input_dims, 1, mean_dims, 1, multiplier_dims, 1, + offset_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input_dims, 0, mean_dims, 0, multiplier_dims, 0, + offset_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + (input_data[Offset(input_dims, c, x, y, b)] - + mean_data[Offset(mean_dims, c, x, y, 0)]) * + multiplier_data[Offset(multiplier_dims, c, x, y, 0)] + + offset_data[Offset(offset_dims, c, x, y, 0)]); + } + } + } + } +} + +template +void GlobalBatchNormalization(const float* input_data, + const Dims<4>& input_dims, const float* mean_data, + const Dims<4>& mean_dims, + const float* multiplier_data, + const Dims<4>& multiplier_dims, + const float* offset_data, + const Dims<4>& offset_dims, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("GlobalBatchNormalization"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = + MatchingArraySize(input_dims, 0, mean_dims, 0, multiplier_dims, 0, + offset_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + (input_data[Offset(input_dims, c, x, y, b)] - + mean_data[Offset(mean_dims, c, 0, 0, 0)]) * + multiplier_data[Offset(multiplier_dims, c, 0, 0, 0)] + + offset_data[Offset(offset_dims, c, 0, 0, 0)]); + } + } + } + } +} + +inline void Relu(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Relu (not fused)"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + const float lower = 0; + float clamped = val < lower ? lower : val; + output_data[Offset(output_dims, c, x, y, b)] = clamped; + } + } + } + } +} + +inline void Relu1(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Relu1 (not fused)"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + const float upper = 1; + const float lower = -1; + float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[Offset(output_dims, c, x, y, b)] = clamped; + } + } + } + } +} + +inline void Relu6(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Relu6 (not fused)"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + const float upper = 6; + const float lower = 0; + float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[Offset(output_dims, c, x, y, b)] = clamped; + } + } + } + } +} + +template +void L2Normalization(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("L2Normalization"); + static_assert(Ac == FusedActivationFunctionType::kNone, ""); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + float squared_l2_norm = 0; + for (int c = 0; c < depth; ++c) { + float val = input_data[Offset(input_dims, c, x, y, b)]; + squared_l2_norm += val * val; + } + float inverse_l2_norm = 1.0f / std::sqrt(squared_l2_norm); + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + input_data[Offset(input_dims, c, x, y, b)] * inverse_l2_norm; + } + } + } + } +} + +inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt, + int* output_shift) { + *output_shift = 11; + while (input >= (1 << 29)) { + input /= 4; + ++*output_shift; + } + DCHECK_GT(input, 0); + const unsigned max_left_shift_bits = __builtin_clz(input) - 1; + const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2; + const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1; + *output_shift -= left_shift_bit_pairs; + input <<= 2 * left_shift_bit_pairs; + DCHECK_GE(input, (1 << 27)); + DCHECK_LT(input, (1 << 29)); + using gemmlowp::FixedPoint; + using gemmlowp::Rescale; + using gemmlowp::SaturatingRoundingMultiplyByPOT; + // Using 3 integer bits gives us enough room for the internal arithmetic in + // this Newton-Raphson iteration. + using F3 = FixedPoint; + using F0 = FixedPoint; + const F3 fixedpoint_input = F3::FromRaw(input >> 1); + const F3 fixedpoint_half_input = + SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input); + const F3 fixedpoint_half_three = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5); + // Newton-Raphson iteration + // Naive unoptimized starting guess: x = 1 + F3 x = F3::One(); + // Naive unoptimized number of iterations: 5 + for (int i = 0; i < 5; i++) { + const F3 x3 = Rescale<3>(x * x * x); + x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3); + } + const F0 fixedpoint_half_sqrt_2 = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.); + x = x * fixedpoint_half_sqrt_2; + *output_inv_sqrt = x.raw(); + if (*output_shift < 0) { + *output_inv_sqrt <<= -*output_shift; + *output_shift = 0; + } +} + +inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, + int32 input_zero_point, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("L2Normalization/8bit"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + DCHECK(IsPackedWithoutStrides(input_dims)); + DCHECK(IsPackedWithoutStrides(output_dims)); + DCHECK_EQ(batches, 1); + DCHECK_EQ(height, 1); + DCHECK_EQ(width, 1); + int32 square_l2_norm = 0; + for (int i = 0; i < depth; i++) { + int32 diff = input_data[i] - input_zero_point; + square_l2_norm += diff * diff; + } + int32 inv_l2norm_multiplier; + int inv_l2norm_shift; + GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier, + &inv_l2norm_shift); + + for (int i = 0; i < depth; i++) { + int32 diff = input_data[i] - input_zero_point; + int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOne( + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); + int32 unclamped_output_val = 128 + rescaled_diff; + int32 output_val = std::min(255, std::max(0, unclamped_output_val)); + output_data[i] = static_cast(output_val); + } +} + +template +void Add(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Add"); + /* const int batches = */ MatchingArraySize(input1_dims, 3, input2_dims, 3, + output_dims, 3); + /* const int height = */ MatchingArraySize(input1_dims, 2, input2_dims, 2, + output_dims, 2); + /* const int width = */ MatchingArraySize(input1_dims, 1, input2_dims, 1, + output_dims, 1); + /* const int depth = */ MatchingArraySize(input1_dims, 0, input2_dims, 0, + output_dims, 0); + DCHECK(IsPackedWithoutStrides(input1_dims)); + DCHECK(IsPackedWithoutStrides(input2_dims)); + DCHECK(IsPackedWithoutStrides(output_dims)); + + int i = 0; + const int size = input1_dims.sizes[3] * input1_dims.strides[3]; +#ifdef USE_NEON + const auto zero = vdupq_n_f32(0); + const auto six = vdupq_n_f32(6); + const auto neg_one = vdupq_n_f32(-1); + const auto one = vdupq_n_f32(1); + for (; i <= size - 16; i += 16) { + auto a10 = vld1q_f32(input1_data + i); + auto a11 = vld1q_f32(input1_data + i + 4); + auto a12 = vld1q_f32(input1_data + i + 8); + auto a13 = vld1q_f32(input1_data + i + 12); + auto a20 = vld1q_f32(input2_data + i); + auto a21 = vld1q_f32(input2_data + i + 4); + auto a22 = vld1q_f32(input2_data + i + 8); + auto a23 = vld1q_f32(input2_data + i + 12); + auto x0 = vaddq_f32(a10, a20); + auto x1 = vaddq_f32(a11, a21); + auto x2 = vaddq_f32(a12, a22); + auto x3 = vaddq_f32(a13, a23); + if (Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6) { + x0 = vmaxq_f32(zero, x0); + x1 = vmaxq_f32(zero, x1); + x2 = vmaxq_f32(zero, x2); + x3 = vmaxq_f32(zero, x3); + if (Ac == FusedActivationFunctionType::kRelu6) { + x0 = vminq_f32(six, x0); + x1 = vminq_f32(six, x1); + x2 = vminq_f32(six, x2); + x3 = vminq_f32(six, x3); + } + } else if (Ac == FusedActivationFunctionType::kRelu1) { + x0 = vmaxq_f32(neg_one, x0); + x1 = vmaxq_f32(neg_one, x1); + x2 = vmaxq_f32(neg_one, x2); + x3 = vmaxq_f32(neg_one, x3); + x0 = vminq_f32(one, x0); + x1 = vminq_f32(one, x1); + x2 = vminq_f32(one, x2); + x3 = vminq_f32(one, x3); + } + vst1q_f32(output_data + i, x0); + vst1q_f32(output_data + i + 4, x1); + vst1q_f32(output_data + i + 8, x2); + vst1q_f32(output_data + i + 12, x3); + } + for (; i <= size - 4; i += 4) { + auto a1 = vld1q_f32(input1_data + i); + auto a2 = vld1q_f32(input2_data + i); + auto x = vaddq_f32(a1, a2); + if (Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6) { + x = vmaxq_f32(zero, x); + if (Ac == FusedActivationFunctionType::kRelu6) { + x = vminq_f32(six, x); + } + } else if (Ac == FusedActivationFunctionType::kRelu1) { + x = vmaxq_f32(neg_one, x); + x = vminq_f32(one, x); + } + vst1q_f32(output_data + i, x); + } +#endif // NEON + + for (; i < size; i++) { + auto x = input1_data[i] + input2_data[i]; + output_data[i] = ActivationFunction(x); + } +} + +template +inline void Add(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, int input2_shift, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + DCHECK_EQ(output_activation_min, 0); + DCHECK_EQ(output_activation_max, 255); + } + gemmlowp::ScopedProfilingLabel label("Add/8bit"); + /* const int batches = */ MatchingArraySize(input1_dims, 3, input2_dims, 3, + output_dims, 3); + /* const int height = */ MatchingArraySize(input1_dims, 2, input2_dims, 2, + output_dims, 2); + /* const int width = */ MatchingArraySize(input1_dims, 1, input2_dims, 1, + output_dims, 1); + /* const int depth = */ MatchingArraySize(input1_dims, 0, input2_dims, 0, + output_dims, 0); + DCHECK(IsPackedWithoutStrides(input1_dims)); + DCHECK(IsPackedWithoutStrides(input2_dims)); + DCHECK(IsPackedWithoutStrides(output_dims)); + + int i = 0; + const int size = input1_dims.sizes[3] * input1_dims.strides[3]; + DCHECK_GT(input1_offset, -256); + DCHECK_GT(input2_offset, -256); + DCHECK_LT(input1_offset, 256); + DCHECK_LT(input2_offset, 256); +#ifdef USE_NEON + for (; i <= size - 8; i += 8) { + const auto input1_val_original = vld1_u8(input1_data + i); + const auto input2_val_original = vld1_u8(input2_data + i); + const auto input1_val_s16 = + vreinterpretq_s16_u16(vmovl_u8(input1_val_original)); + const auto input2_val_s16 = + vreinterpretq_s16_u16(vmovl_u8(input2_val_original)); + const auto input1_val = + vaddq_s16(input1_val_s16, vdupq_n_s16(input1_offset)); + const auto input2_val = + vaddq_s16(input2_val_s16, vdupq_n_s16(input2_offset)); + const auto input1_val_high = vget_high_s16(input1_val); + const auto input1_val_low = vget_low_s16(input1_val); + const auto input2_val_high = vget_high_s16(input2_val); + const auto input2_val_low = vget_low_s16(input2_val); + auto x11 = vmovl_s16(input1_val_low); + auto x12 = vmovl_s16(input1_val_high); + auto x21 = vmovl_s16(input2_val_low); + auto x22 = vmovl_s16(input2_val_high); + const auto left_shift_dup = vdupq_n_s32(left_shift); + x11 = vshlq_s32(x11, left_shift_dup); + x12 = vshlq_s32(x12, left_shift_dup); + x21 = vshlq_s32(x21, left_shift_dup); + x22 = vshlq_s32(x22, left_shift_dup); + x11 = vqrdmulhq_n_s32(x11, input1_multiplier); + x12 = vqrdmulhq_n_s32(x12, input1_multiplier); + x21 = vqrdmulhq_n_s32(x21, input2_multiplier); + x22 = vqrdmulhq_n_s32(x22, input2_multiplier); + const auto input1_shift_dup = vdupq_n_s32(-input1_shift); + const auto input2_shift_dup = vdupq_n_s32(-input2_shift); + x11 = vshlq_s32(x11, input1_shift_dup); + x12 = vshlq_s32(x12, input1_shift_dup); + x21 = vshlq_s32(x21, input2_shift_dup); + x22 = vshlq_s32(x22, input2_shift_dup); + auto s1 = vaddq_s32(x11, x21); + auto s2 = vaddq_s32(x12, x22); + s1 = vqrdmulhq_n_s32(s1, output_multiplier); + s2 = vqrdmulhq_n_s32(s2, output_multiplier); + using gemmlowp::RoundingDivideByPOT; + s1 = RoundingDivideByPOT(s1, output_shift); + s2 = RoundingDivideByPOT(s2, output_shift); + const auto s1_narrowed = vmovn_s32(s1); + const auto s2_narrowed = vmovn_s32(s2); + const auto s = vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), + vdupq_n_s16(output_offset)); + vst1_u8(output_data + i, vqmovun_s16(s)); + } +#endif // NEON + + for (; i < size; i++) { + const int32 input1_val = input1_offset + input1_data[i]; + const int32 input2_val = input2_offset + input2_data[i]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val + scaled_input2_val; + const int32 raw_output = MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = std::min( + output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = static_cast(clamped_output); + } +} + + +// TODO: We can implement BroadcastAdd on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO: BroadcastAdd is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastAdd(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastAdd"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] + + input2_data[SubscriptToIndex(desc2, c, x, y, b)]); + } + } + } + } +} + +template +inline void BroadcastAdd(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + DCHECK_EQ(output_activation_min, 0); + DCHECK_EQ(output_activation_max, 255); + } + gemmlowp::ScopedProfilingLabel label("BroadcastAdd/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val + scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +template +void Mul(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Mul"); + /* const int batches = */ MatchingArraySize(input1_dims, 3, input2_dims, 3, + output_dims, 3); + /* const int height = */ MatchingArraySize(input1_dims, 2, input2_dims, 2, + output_dims, 2); + /* const int width = */ MatchingArraySize(input1_dims, 1, input2_dims, 1, + output_dims, 1); + /* const int depth = */ MatchingArraySize(input1_dims, 0, input2_dims, 0, + output_dims, 0); + DCHECK(IsPackedWithoutStrides(input1_dims)); + DCHECK(IsPackedWithoutStrides(input2_dims)); + DCHECK(IsPackedWithoutStrides(output_dims)); + + int i = 0; + const int size = input1_dims.sizes[3] * input1_dims.strides[3]; +#ifdef USE_NEON + const auto zero = vdupq_n_f32(0); + const auto six = vdupq_n_f32(6); + const auto neg_one = vdupq_n_f32(-1); + const auto one = vdupq_n_f32(1); + for (; i <= size - 16; i += 16) { + auto a10 = vld1q_f32(input1_data + i); + auto a11 = vld1q_f32(input1_data + i + 4); + auto a12 = vld1q_f32(input1_data + i + 8); + auto a13 = vld1q_f32(input1_data + i + 12); + auto a20 = vld1q_f32(input2_data + i); + auto a21 = vld1q_f32(input2_data + i + 4); + auto a22 = vld1q_f32(input2_data + i + 8); + auto a23 = vld1q_f32(input2_data + i + 12); + auto x0 = vmulq_f32(a10, a20); + auto x1 = vmulq_f32(a11, a21); + auto x2 = vmulq_f32(a12, a22); + auto x3 = vmulq_f32(a13, a23); + if (Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6) { + x0 = vmaxq_f32(zero, x0); + x1 = vmaxq_f32(zero, x1); + x2 = vmaxq_f32(zero, x2); + x3 = vmaxq_f32(zero, x3); + if (Ac == FusedActivationFunctionType::kRelu6) { + x0 = vminq_f32(six, x0); + x1 = vminq_f32(six, x1); + x2 = vminq_f32(six, x2); + x3 = vminq_f32(six, x3); + } + } else if (Ac == FusedActivationFunctionType::kRelu1) { + x0 = vmaxq_f32(neg_one, x0); + x1 = vmaxq_f32(neg_one, x1); + x2 = vmaxq_f32(neg_one, x2); + x3 = vmaxq_f32(neg_one, x3); + x0 = vminq_f32(one, x0); + x1 = vminq_f32(one, x1); + x2 = vminq_f32(one, x2); + x3 = vminq_f32(one, x3); + } + vst1q_f32(output_data + i, x0); + vst1q_f32(output_data + i + 4, x1); + vst1q_f32(output_data + i + 8, x2); + vst1q_f32(output_data + i + 12, x3); + } + for (; i <= size - 4; i += 4) { + auto a1 = vld1q_f32(input1_data + i); + auto a2 = vld1q_f32(input2_data + i); + auto x = vmulq_f32(a1, a2); + if (Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6) { + x = vmaxq_f32(zero, x); + if (Ac == FusedActivationFunctionType::kRelu6) { + x = vminq_f32(six, x); + } + } else if (Ac == FusedActivationFunctionType::kRelu1) { + x = vmaxq_f32(neg_one, x); + x = vminq_f32(one, x); + } + vst1q_f32(output_data + i, x); + } +#endif // NEON + + for (; i < size; i++) { + auto x = input1_data[i] * input2_data[i]; + output_data[i] = ActivationFunction(x); + } +} + +// TODO: We can implement BroadcastMul on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO: BroadcastMul is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastMul(const float* input1_data, const Dims<4>& input1_dims, + const float* input2_data, const Dims<4>& input2_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastMul"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] * + input2_data[SubscriptToIndex(desc2, c, x, y, b)]); + } + } + } + } +} + +template +inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + DCHECK_EQ(output_activation_min, 0); + DCHECK_EQ(output_activation_max, 255); + } + gemmlowp::ScopedProfilingLabel label("BroadcastMul/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 unclamped_result = + output_offset + + MultiplyByQuantizedMultiplierSmallerThanOne( + input1_val * input2_val, output_multiplier, output_shift); + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, unclamped_result)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +template +void Concatenation(int concat_dim, const Scalar* const* input_data, + const Dims<4>* const* input_dims, int inputs_count, + Scalar* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Concatenation"); + DCHECK_GT(inputs_count, 1); + int concat_size = 0; + for (int i = 0; i < inputs_count; i++) { + for (int j = 0; j < 4; j++) { + if (j != concat_dim) { + MatchingArraySize(*input_dims[i], j, output_dims, j); + } + } + concat_size += ArraySize(*input_dims[i], concat_dim); + } + DCHECK_EQ(concat_size, ArraySize(output_dims, concat_dim)); + DCHECK(IsPackedWithoutStrides(output_dims)); + // for now we dont have a model with a Concatenation + // with fused activation function. + DCHECK(Ac == FusedActivationFunctionType::kNone); + int outer_size = 1; + for (int i = concat_dim + 1; i < 4; i++) { + outer_size *= output_dims.sizes[i]; + } + Scalar* output_ptr = output_data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < inputs_count; ++i) { + const int copy_size = + input_dims[i]->sizes[concat_dim] * input_dims[i]->strides[concat_dim]; + memcpy(output_ptr, input_data[i] + k * copy_size, + copy_size * sizeof(Scalar)); + output_ptr += copy_size; + } + } +} + +template +void DepthConcatenation(const Scalar* const* input_data, + const Dims<4>* const* input_dims, int inputs_count, + Scalar* output_data, const Dims<4>& output_dims) { + Concatenation(0, input_data, input_dims, inputs_count, + output_data, output_dims); +} + +inline void LstmCell(const float* input_data, const Dims<4>& input_dims, + const float* prev_activ_data, + const Dims<4>& prev_activ_dims, const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, const float* prev_state_data, + const Dims<4>& prev_state_dims, float* output_state_data, + const Dims<4>& output_state_dims, float* output_activ_data, + const Dims<4>& output_activ_dims, float* concat_temp_data, + const Dims<4>& concat_temp_dims, float* activ_temp_data, + const Dims<4>& activ_temp_dims) { + gemmlowp::ScopedProfilingLabel label("LstmCell"); + MatchingArraySize( // batches + input_dims, 3, prev_activ_dims, 3, prev_state_dims, 3, output_state_dims, + 3, output_activ_dims, 3); + MatchingArraySize( // height + input_dims, 2, prev_activ_dims, 2, prev_state_dims, 2, output_state_dims, + 2, output_activ_dims, 2); + MatchingArraySize( // width + input_dims, 1, prev_activ_dims, 1, prev_state_dims, 1, output_state_dims, + 1, output_activ_dims, 1); + CHECK_EQ(ArraySize(weights_dims, 2), 1); + CHECK_EQ(ArraySize(weights_dims, 3), 1); + const int input_depth = ArraySize(input_dims, 0); + const int prev_activ_depth = ArraySize(prev_activ_dims, 0); + const int total_input_depth = prev_activ_depth + input_depth; + CHECK_EQ(ArraySize(weights_dims, 0), total_input_depth); + CHECK_EQ(MatchingArraySize(bias_dims, 1, bias_dims, 2, bias_dims, 3), 1); + const int intern_activ_depth = MatchingArraySize( + weights_dims, 1, + bias_dims, 0); + CHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = MatchingArraySize( + prev_state_dims, 0, + prev_activ_dims, 0, + output_state_dims, 0, + output_activ_dims, 0); + CHECK_EQ(output_depth, intern_activ_depth / 4); + + // Concatenate prev_activ and input data together + std::vector concat_input_arrays_data; + std::vector const*> concat_input_arrays_dims; + concat_input_arrays_data.push_back(input_data); + concat_input_arrays_data.push_back(prev_activ_data); + concat_input_arrays_dims.push_back(&input_dims); + concat_input_arrays_dims.push_back(&prev_activ_dims); + Concatenation( + 0, &(concat_input_arrays_data[0]), &(concat_input_arrays_dims[0]), + concat_input_arrays_data.size(), concat_temp_data, concat_temp_dims); + + // Fully connected + FullyConnected( + concat_temp_data, concat_temp_dims, weights_data, weights_dims, bias_data, + bias_dims, activ_temp_data, activ_temp_dims); + + // Map raw arrays to Eigen arrays so we can use Eigen's optimized array + // operations. + ArrayMap activ_temp_map = + MapAsArrayWithFirstDimAsRows(activ_temp_data, activ_temp_dims); + auto input_gate_sm = activ_temp_map.block(0 * output_depth, 0, output_depth, + activ_temp_map.cols()); + auto new_input_sm = activ_temp_map.block(1 * output_depth, 0, output_depth, + activ_temp_map.cols()); + auto forget_gate_sm = activ_temp_map.block(2 * output_depth, 0, output_depth, + activ_temp_map.cols()); + auto output_gate_sm = activ_temp_map.block(3 * output_depth, 0, output_depth, + activ_temp_map.cols()); + ArrayMap prev_state_map = + MapAsArrayWithFirstDimAsRows(prev_state_data, prev_state_dims); + ArrayMap output_state_map = + MapAsArrayWithFirstDimAsRows(output_state_data, output_state_dims); + ArrayMap output_activ_map = + MapAsArrayWithFirstDimAsRows(output_activ_data, output_activ_dims); + + // Combined memory state and final output calculation + gemmlowp::ScopedProfilingLabel label2("MemoryStateAndFinalOutput"); + output_state_map = + input_gate_sm.unaryExpr(Eigen::internal::scalar_sigmoid_op()) * + new_input_sm.tanh() + + forget_gate_sm.unaryExpr(Eigen::internal::scalar_sigmoid_op()) * + prev_state_map; + output_activ_map = + output_gate_sm.unaryExpr(Eigen::internal::scalar_sigmoid_op()) * + output_state_map.tanh(); +} + +template +void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, + int outputs_count, Scalar* const* output_data, + const Dims<4>* const* output_dims) { + gemmlowp::ScopedProfilingLabel label("TensorFlowSplit"); + DCHECK_GE(outputs_count, 1); + for (int i = 0; i < outputs_count; i++) { + /* batches = */ MatchingArraySize(*output_dims[i], 3, input_dims, 3); + /* height = */ MatchingArraySize(*output_dims[i], 2, input_dims, 2); + /* width = */ MatchingArraySize(*output_dims[i], 1, input_dims, 1); + } + const int batches = MatchingArraySize(*output_dims[0], 3, input_dims, 3); + const int height = MatchingArraySize(*output_dims[0], 2, input_dims, 2); + const int width = MatchingArraySize(*output_dims[0], 1, input_dims, 1); + DCHECK(IsPackedWithoutStrides(input_dims)); + // for now we dont have a model with a TensorFlowSplit + // with fused activation function. + DCHECK(Ac == FusedActivationFunctionType::kNone); + const int whb = width * height * batches; + const Scalar* input_ptr = input_data; + for (int k = 0; k < whb; k++) { + for (int i = 0; i < outputs_count; ++i) { + memcpy(output_data[i] + k * output_dims[i]->sizes[0], input_ptr, + output_dims[i]->sizes[0] * sizeof(Scalar)); + input_ptr += output_dims[i]->sizes[0]; + } + } +} + +inline int NodeOffset(int b, int h, int w, int height, int width) { + return (b * height + h) * width + w; +} + +template +void AveragePool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, + int pad_width, int pad_height, int kwidth, int kheight, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("AveragePool"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + // TODO: get rid of the dynamic memory allocation here! + Eigen::VectorXf out_count(out_mat.cols()); + out_count.setZero(); + // Prefill the output to 0. + out_mat.setZero(); + for (int b = 0; b < batches; ++b) { + for (int h = 0; h < input_height; ++h) { + for (int w = 0; w < input_width; ++w) { + // (h_start, h_end) * (w_start, w_end) is the range that the input + // vector projects to. + int hpad = h + pad_height; + int wpad = w + pad_width; + int h_start = (hpad < kheight) ? 0 : (hpad - kheight) / stride_height + 1; + int h_end = std::min(hpad / stride_height + 1, output_height); + int w_start = (wpad < kwidth) ? 0 : (wpad - kwidth) / stride_width + 1; + int w_end = std::min(wpad / stride_width + 1, output_width); + // compute elementwise sum + for (int ph = h_start; ph < h_end; ++ph) { + for (int pw = w_start; pw < w_end; ++pw) { + int out_offset = NodeOffset(b, ph, pw, output_height, output_width); + out_mat.col(out_offset) += + in_mat.col(NodeOffset(b, h, w, input_height, input_width)); + out_count(out_offset)++; + } + } + } + } + } + // Divide the output by the actual number of elements being averaged over + DCHECK_GT(out_count.minCoeff(), 0); + out_mat.array().rowwise() /= out_count.transpose().array(); + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + for (int x = 0; x < output_width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + output_data[Offset(output_dims, c, x, y, b)]); + } + } + } + } +} + +template +void AveragePool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, + int pad_width, int pad_height, int filter_width, + int filter_height, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("AveragePool/8bit"); + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + DCHECK_EQ(output_activation_min, 0); + DCHECK_EQ(output_activation_max, 255); + } + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + const int filter_count = + (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start); + // TODO: Add a dynamic buffer allocation path instead of hardcoded size. + static constexpr int kAccBufferMaxSize = 2048; + DCHECK_LE(depth, kAccBufferMaxSize); + uint16 acc[kAccBufferMaxSize]; + memset(acc, 0, depth * sizeof(acc[0])); + const uint8* input_ptr = + input_data + input_dims.strides[1] * in_x_origin + + input_dims.strides[2] * in_y_origin + input_dims.strides[3] * batch; + for (int fy = filter_y_start; fy < filter_y_end; fy++) { + const uint8* input_row_ptr = input_ptr + fy * input_dims.strides[2] + + filter_x_start * input_dims.strides[1]; + for (int fx = filter_x_start; fx < filter_x_end; fx++) { + int channel = 0; +#ifdef USE_NEON + for (; channel <= depth - 16; channel += 16) { + uint16x8_t acc_reg[2]; + for (int i = 0; i < 2; i++) { + acc_reg[i] = vld1q_u16(acc + channel + 8 * i); + } + uint8x16_t input_reg = vld1q_u8(input_row_ptr); + input_row_ptr += 16; + acc_reg[0] = vaddw_u8(acc_reg[0], vget_low_u8(input_reg)); + acc_reg[1] = vaddw_u8(acc_reg[1], vget_high_u8(input_reg)); + for (int i = 0; i < 2; i++) { + vst1q_u16(acc + channel + 8 * i, acc_reg[i]); + } + } + for (; channel <= depth - 8; channel += 8) { + uint16x8_t acc_reg = vld1q_u16(acc + channel); + uint8x8_t input_reg = vld1_u8(input_row_ptr); + input_row_ptr += 8; + acc_reg = vaddw_u8(acc_reg, input_reg); + vst1q_u16(acc + channel, acc_reg); + } +#endif + for (; channel < depth; ++channel) { + acc[channel] += *input_row_ptr++; + } + } + } + uint8* output_ptr = + output_data + Offset(output_dims, 0, out_x, out_y, batch); + int channel = 0; +#ifdef USE_NEON +#define AVGPOOL_DIVIDING_BY(FILTER_COUNT) \ + if (filter_count == FILTER_COUNT) { \ + for (; channel <= depth - 8; channel += 8) { \ + uint16 buf[8]; \ + for (int i = 0; i < 8; i++) { \ + buf[i] = (acc[channel + i] + FILTER_COUNT / 2) / FILTER_COUNT; \ + } \ + uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf)); \ + buf8 = vmin_u8(buf8, vdup_n_u8(output_activation_max)); \ + buf8 = vmax_u8(buf8, vdup_n_u8(output_activation_min)); \ + vst1_u8(output_ptr + channel, buf8); \ + } \ + } + AVGPOOL_DIVIDING_BY(9) + AVGPOOL_DIVIDING_BY(15) +#undef AVGPOOL_DIVIDING_BY + for (; channel <= depth - 8; channel += 8) { + uint16 buf[8]; + for (int i = 0; i < 8; i++) { + buf[i] = (acc[channel + i] + filter_count / 2) / filter_count; + } + uint8x8_t buf8 = vqmovn_u16(vld1q_u16(buf)); + buf8 = vmin_u8(buf8, vdup_n_u8(output_activation_max)); + buf8 = vmax_u8(buf8, vdup_n_u8(output_activation_min)); + vst1_u8(output_ptr + channel, buf8); + } +#endif + for (; channel < depth; ++channel) { + uint16 a = (acc[channel] + filter_count / 2) / filter_count; + a = std::max(a, output_activation_min); + a = std::min(a, output_activation_max); + output_ptr[channel] = static_cast(a); + } + } + } + } +} + +template +void MaxPool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, + int pad_width, int pad_height, int kwidth, int kheight, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("MaxPool"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + // Prefill the output to minimum representable float value + out_mat.setConstant(std::numeric_limits::lowest()); + for (int b = 0; b < batches; ++b) { + for (int h = 0; h < input_height; ++h) { + for (int w = 0; w < input_width; ++w) { + // (h_start, h_end) * (w_start, w_end) is the range that the input + // vector projects to. + int hpad = h + pad_height; + int wpad = w + pad_width; + int h_start = (hpad < kheight) ? 0 : (hpad - kheight) / stride_height + 1; + int h_end = std::min(hpad / stride_height + 1, output_height); + int w_start = (wpad < kwidth) ? 0 : (wpad - kwidth) / stride_width + 1; + int w_end = std::min(wpad / stride_width + 1, output_width); + // compute elementwise sum + for (int ph = h_start; ph < h_end; ++ph) { + for (int pw = w_start; pw < w_end; ++pw) { + int out_offset = NodeOffset(b, ph, pw, output_height, output_width); + out_mat.col(out_offset) = + out_mat.col(out_offset) + .cwiseMax(in_mat.col( + NodeOffset(b, h, w, input_height, input_width))); + } + } + } + } + } + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + for (int x = 0; x < output_width; ++x) { + for (int c = 0; c < depth; ++c) { + output_data[Offset(output_dims, c, x, y, b)] = ActivationFunction( + output_data[Offset(output_dims, c, x, y, b)]); + } + } + } + } +} + +template +void MaxPool(const uint8* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, + int pad_width, int pad_height, int filter_width, int filter_height, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("MaxPool/8bit"); + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + DCHECK_EQ(output_activation_min, 0); + DCHECK_EQ(output_activation_max, 255); + } + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(filter_height, input_height - in_y_origin); + // TODO: Add a dynamic buffer allocation path instead of hardcoded size. + static constexpr int kAccBufferMaxSize = 2048; + DCHECK_LE(depth, kAccBufferMaxSize); + uint8 acc[kAccBufferMaxSize]; + memset(acc, 0, depth * sizeof(acc[0])); + const uint8* input_ptr = + input_data + input_dims.strides[1] * in_x_origin + + input_dims.strides[2] * in_y_origin + input_dims.strides[3] * batch; + for (int fy = filter_y_start; fy < filter_y_end; fy++) { + const uint8* input_row_ptr = input_ptr + fy * input_dims.strides[2] + + filter_x_start * input_dims.strides[1]; + for (int fx = filter_x_start; fx < filter_x_end; fx++) { + int channel = 0; +#ifdef USE_NEON + for (; channel <= depth - 16; channel += 16) { + uint8x16_t acc_reg = vld1q_u8(acc + channel); + uint8x16_t input_reg = vld1q_u8(input_row_ptr); + input_row_ptr += 16; + acc_reg = vmaxq_u8(acc_reg, input_reg); + vst1q_u8(acc + channel, acc_reg); + } + + for (; channel <= depth - 8; channel += 8) { + uint8x8_t acc_reg = vld1_u8(acc + channel); + uint8x8_t input_reg = vld1_u8(input_row_ptr); + input_row_ptr += 8; + acc_reg = vmax_u8(acc_reg, input_reg); + vst1_u8(acc + channel, acc_reg); + } +#endif + for (; channel < depth; ++channel) { + acc[channel] = std::max(acc[channel], *input_row_ptr++); + } + } + } + uint8* output_ptr = + output_data + Offset(output_dims, 0, out_x, out_y, batch); + int channel = 0; +#ifdef USE_NEON + for (; channel <= depth - 16; channel += 16) { + uint8x16_t a = vld1q_u8(acc + channel); + a = vminq_u8(a, vdupq_n_u8(output_activation_max)); + a = vmaxq_u8(a, vdupq_n_u8(output_activation_min)); + vst1q_u8(output_ptr + channel, a); + } + for (; channel <= depth - 8; channel += 8) { + uint8x8_t a = vld1_u8(acc + channel); + a = vmin_u8(a, vdup_n_u8(output_activation_max)); + a = vmax_u8(a, vdup_n_u8(output_activation_min)); + vst1_u8(output_ptr + channel, a); + } +#endif + for (; channel < depth; ++channel) { + uint8 a = acc[channel]; + a = std::max(a, output_activation_min); + a = std::min(a, output_activation_max); + output_ptr[channel] = static_cast(a); + } + } + } + } +} + +template +void L2Pool(const float* input_data, const Dims<4>& input_dims, + int stride_width, int stride_height, + int pad_width, int pad_height, int filter_width, int filter_height, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("L2Pool"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + // Actually carry out L2 Pool. Code is written in forward mode: we go through + // the input values once, and write to all the pooled regions that it maps to. + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + Eigen::VectorXf in_square(in_mat.rows()); + Eigen::VectorXf out_count(out_mat.cols()); + out_count.setZero(); + // Prefill the output to 0. + out_mat.setZero(); + for (int b = 0; b < batches; ++b) { + for (int h = 0; h < input_height; ++h) { + for (int w = 0; w < input_width; ++w) { + // (h_start, h_end) * (w_start, w_end) is the range that the input + // vector projects to. + const int hpad = h + pad_height; + const int wpad = w + pad_width; + const int h_start = + (hpad < filter_height) ? 0 : (hpad - filter_height) / stride_height + 1; + const int h_end = std::min(hpad / stride_height + 1, output_height); + const int w_start = + (wpad < filter_width) ? 0 : (wpad - filter_width) / stride_width + 1; + const int w_end = std::min(wpad / stride_width + 1, output_width); + // pre-compute square + const int in_offset = w + input_width * (h + input_height * b); + in_square = + in_mat.col(in_offset).array() * in_mat.col(in_offset).array(); + // compute elementwise sum of squares + for (int ph = h_start; ph < h_end; ++ph) { + for (int pw = w_start; pw < w_end; ++pw) { + const int out_offset = pw + output_width * (ph + output_height * b); + out_mat.col(out_offset) += in_square; + out_count(out_offset)++; + } + } + } + } + } + + out_count = out_count.array().inverse(); + out_mat = + (out_mat.array().rowwise() * out_count.transpose().array()).cwiseSqrt(); +} + +inline void LocalResponseNormalization(const float* input_data, + const Dims<4>& input_dims, int range, + float bias, float alpha, float beta, + float* output_data, + const Dims<4>& output_dims) { + /* const int batches = */ MatchingArraySize(input_dims, 3, output_dims, 3); + /* const int height = */ MatchingArraySize(input_dims, 2, output_dims, 2); + /* const int width = */ MatchingArraySize(input_dims, 1, output_dims, 1); + /* const int depth = */ MatchingArraySize(input_dims, 0, output_dims, 0); + + const auto data_in = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto data_out = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + // Carry out local response normalization, vector by vector. + // Since the data are stored column major, making row-wise operation + // probably not memory efficient anyway, we do an explicit for loop over + // the columns. + const int double_range = range * 2; + Eigen::VectorXf padded_square(data_in.rows() + double_range); + padded_square.setZero(); + for (int r = 0; r < data_in.cols(); ++r) { + // Do local response normalization for data_in(:, r) + // first, compute the square and store them in buffer for repeated use + padded_square.block(range, 0, data_in.rows(), 1) = + data_in.col(r).cwiseProduct(data_in.col(r)) * alpha; + // Then, compute the scale and writes them to data_out + float accumulated_scale = 0; + for (int i = 0; i < double_range; ++i) { + accumulated_scale += padded_square(i); + } + for (int i = 0; i < data_in.rows(); ++i) { + accumulated_scale += padded_square(i + double_range); + data_out(i, r) = bias + accumulated_scale; + accumulated_scale -= padded_square(i); + } + } + + // In a few cases, the pow computation could benefit from speedups. + if (beta == 1) { + data_out.array() = data_in.array() * data_out.array().inverse(); + } else if (beta == 0.5) { + data_out.array() = data_in.array() * data_out.array().sqrt().inverse(); + } else { + data_out.array() = data_in.array() * data_out.array().pow(-beta); + } +} + +inline void Softmax(const float* input_data, const Dims<4>& input_dims, + float beta, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Softmax"); + /* const int batches = */ MatchingArraySize(input_dims, 3, output_dims, 3); + /* const int height = */ MatchingArraySize(input_dims, 2, output_dims, 2); + /* const int width = */ MatchingArraySize(input_dims, 1, output_dims, 1); + /* const int depth = */ MatchingArraySize(input_dims, 0, output_dims, 0); + + const auto in_mat = MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + auto out_mat = MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + // Compute the exponential first, removing the max coefficient for numerical + // stability. + out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * beta; + // We are separating out the exp function so that exp can be vectorized. + out_mat = out_mat.array().exp(); + // Normalize to get the activations. + Eigen::Array scale = + out_mat.array().colwise().sum().inverse(); + out_mat.array().rowwise() *= scale; +} + +inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, + int32 input_beta_multiplier, int32 input_beta_left_shift, + int diff_min, uint8* output_data, + const Dims<4>& output_dims) { + // The representation chosen for the input to the exp() function is Q5.26. + // We need to leave extra space since values that we skip might be as large as + // -32 before multiplying by input_beta_multiplier, and therefore as large as + // -16 afterwards. Note that exp(-8) is definitely not insignificant to + // accumulation, but exp(-16) definitely is. + static const int kScaledDiffIntegerBits = 5; + static const int kAccumulationIntegerBits = 12; + using FixedPointScaledDiff = + gemmlowp::FixedPoint; + using FixedPointAccum = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + + gemmlowp::ScopedProfilingLabel label("Softmax"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + for (int b = 0; b < batches; ++b) { + for (int x = 0; x < width; ++x) { + for (int y = 0; y < height; ++y) { + uint8 max_in_row = 0; + for (int c = 0; c < depth; ++c) { + max_in_row = + std::max(max_in_row, input_data[Offset(input_dims, c, x, y, b)]); + } + + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[Offset(input_dims, c, x, y, b)]) - + max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = + sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); + } + } + + int32 fixed_sum_of_exps = sum_of_exps.raw(); + // TODO: Use a NEON intrinsic like vclzq_u32 instead. + int headroom_plus_one = + __builtin_clz(static_cast(fixed_sum_of_exps)); + // This is the number of bits to the left of the binary point above 1.0. + // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and + // no later adjustment will be needed. + int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; + int32 shifted_sum_minus_one = static_cast( + (static_cast(fixed_sum_of_exps) << headroom_plus_one) - + (static_cast(1) << 31)); + + FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( + FixedPoint0::FromRaw(shifted_sum_minus_one)); + + for (int c = 0; c < depth; ++c) { + int32 input_diff = + static_cast(input_data[Offset(input_dims, c, x, y, b)]) - + max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + + FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); + int32 unsat_output = gemmlowp::RoundingDivideByPOT( + (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); + + output_data[Offset(output_dims, c, x, y, b)] = + std::max(std::min(unsat_output, 255), 0); + + } else { + output_data[Offset(output_dims, c, x, y, b)] = 0; + } + } + } + } + } +} + +inline void Logistic(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Logistic"); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); + output_map.array() = + input_map.array().unaryExpr(Eigen::internal::scalar_sigmoid_op()); +} + +inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Logistic"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + const uint8 input_val_u8 = input_data[Offset(input_dims, c, x, y, b)]; + const int32 input_val_centered = + static_cast(input_val_u8) - input_zero_point; + uint8 output_val; + if (input_val_centered < -input_range_radius) { + output_val = 0; + } else if (input_val_centered > input_range_radius) { + output_val = 255; + } else { + const int32 input_val_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_val_centered, input_multiplier, input_left_shift); + using FixedPoint4 = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + const FixedPoint4 input_val_f4 = + FixedPoint4::FromRaw(input_val_rescaled); + const FixedPoint0 output_val_f0 = gemmlowp::logistic(input_val_f4); + using gemmlowp::RoundingDivideByPOT; + int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 23); + if (output_val_s32 == 256) { + output_val_s32 = 255; + } + DCHECK_GE(output_val_s32, 0); + DCHECK_LE(output_val_s32, 255); + output_val = static_cast(output_val_s32); + } + output_data[Offset(output_dims, c, x, y, b)] = output_val; + } + } + } + } +} + +inline void Tanh(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Tanh"); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); + output_map.array() = input_map.array().tanh(); +} + +inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims, + int32 zero_point, double scale, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Dequantize"); + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + int32 val = input_data[Offset(input_dims, c, x, y, b)]; + float result = static_cast(scale * (val - zero_point)); + output_data[Offset(output_dims, c, x, y, b)] = result; + } + } + } + } +} + +inline void FakeQuant(const float* input_data, const Dims<4>& input_dims, + float rmin, float rmax, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("FakeQuant"); + + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + DCHECK_LE(rmin, 0.); + DCHECK_GE(rmax, 0.); + + // Determine quantization parameters: zero_point, scale. + using Integer = uint8; + const Integer qmin = std::numeric_limits::min(); + const Integer qmax = std::numeric_limits::max(); + const float qmin_float = qmin; + const float qmax_float = qmax; + int32 zero_point = 0; + float scale = 0.f; + // If rmin==rmax, both must be zero per the above assertion, + // so we are done. + if (rmin != rmax) { + // First determine the scale. + scale = (rmax - rmin) / (qmax_float - qmin_float); + + // Zero-point computation. + // First the initial floating-point computation. The zero-point can be + // determined from solving an affine equation for any known pair + // (real value, corresponding quantized value). + // We know two such pairs: (rmin, qmin) and (rmax, qmax). + // The arithmetic error on the zero point computed from either pair + // will be roughly machine_epsilon * (sum of absolute values of terms) + // so we want to use the variant that adds the smaller terms. + const float zero_point_from_min = qmin_float - rmin / scale; + const float zero_point_from_max = qmax_float - rmax / scale; + const float zero_point_from_min_error = + std::abs(qmin_float) + std::abs(rmin / scale); + const float zero_point_from_max_error = + std::abs(qmax_float) + std::abs(rmax / scale); + + const float zero_point_float = + zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + + // Now we need to nudge the zero point to be an integer + // (our zero points are integer, and this is motivated by the requirement + // to be able to represent the real value "0" exactly as a quantized value, + // which is required in multiple places, for example in Im2col with SAME + // padding). + if (zero_point_float < qmin_float) { + zero_point = qmin; + } else if (zero_point_float > qmax_float) { + zero_point = qmax; + } else { + zero_point = static_cast(std::round(zero_point_float)); + } + // The zero point should always be in the range of quantized value, + // [qmin, qmax]. + DCHECK_GE(zero_point, qmin); + DCHECK_LE(zero_point, qmax); + } + + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int height = MatchingArraySize(input_dims, 2, output_dims, 2); + const int width = MatchingArraySize(input_dims, 1, output_dims, 1); + const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + const float src_val = input_data[Offset(input_dims, c, x, y, b)]; + const float unclamped_quantized_val = + std::round(zero_point + src_val / scale); + const float quantized_val = std::min( + qmax_float, std::max(qmin_float, unclamped_quantized_val)); + const float dst_val = scale * (quantized_val - zero_point); + output_data[Offset(output_dims, c, x, y, b)] = dst_val; + } + } + } + } +} + +template +inline void Cast(const SrcT* input_data, const Dims<4>& input_dims, + DstT* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Cast"); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); + output_map.array() = input_map.array().template cast(); +} + +inline void Floor(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Floor"); + auto input_map = MapAsVector(input_data, input_dims); + auto output_map = MapAsVector(output_data, output_dims); + output_map.array() = Eigen::floor(input_map.array()); +} + +template +inline void Gather(const T* input_data, const Dims<4>& input_dims, + const int32* coords_data, const Dims<4>& coords_dims, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Gather"); + DCHECK_EQ(RequiredBufferSizeForDims(output_dims), + RequiredBufferSizeForDims(coords_dims)); + for (int i = 0; i < RequiredBufferSizeForDims(coords_dims); i++) { + DCHECK_GE(coords_data[i], 0); + DCHECK_LT(coords_data[i], RequiredBufferSizeForDims(input_dims)); + output_data[i] = input_data[coords_data[i]]; + } +} + +inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims, + const int32* output_size_data, + const Dims<4>& output_size_dims, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("ResizeBilinear"); + int32 batches = MatchingArraySize(input_dims, 3, output_dims, 3); + int32 input_height = ArraySize(input_dims, 2); + int32 input_width = ArraySize(input_dims, 1); + int32 depth = MatchingArraySize(input_dims, 0, output_dims, 0); + + DCHECK_EQ(ArraySize(output_size_dims, 3), 1); + DCHECK_EQ(ArraySize(output_size_dims, 2), 1); + DCHECK_EQ(ArraySize(output_size_dims, 1), 1); + DCHECK_EQ(ArraySize(output_size_dims, 0), 2); + int32 output_height = output_size_data[Offset(output_size_dims, 0, 0, 0, 0)]; + int32 output_width = output_size_data[Offset(output_size_dims, 1, 0, 0, 0)]; + float height_scale = static_cast(input_height) / output_height; + float width_scale = static_cast(input_width) / output_width; + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + float input_y = y * height_scale; + int32 y0 = static_cast(input_y); + int32 y1 = std::min(y0 + 1, input_height - 1); + for (int x = 0; x < output_width; ++x) { + float input_x = x * width_scale; + int32 x0 = static_cast(input_x); + int32 x1 = std::min(x0 + 1, input_width - 1); + for (int c = 0; c < depth; ++c) { + float interpolation = input_data[Offset(input_dims, c, x0, y0, b)] * + (1 - (input_y - y0)) * + (1 - (input_x - x0)) + + input_data[Offset(input_dims, c, x0, y1, b)] * + (input_y - y0) * (1 - (input_x - x0)) + + input_data[Offset(input_dims, c, x1, y0, b)] * + (1 - (input_y - y0)) * (input_x - x0) + + input_data[Offset(input_dims, c, x1, y1, b)] * + (input_y - y0) * (input_x - x0); + output_data[Offset(output_dims, c, x, y, b)] = interpolation; + } + } + } + } +} + +} // namespace optimized_ops +} // namespace nn +} // namespace android + +#if defined OPTIMIZED_OPS_H__IGNORE_DEPRECATED_DECLARATIONS +#undef OPTIMIZED_OPS_H__IGNORE_DEPRECATED_DECLARATIONS +#pragma GCC diagnostic pop +#endif + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_ diff --git a/src/runtime/ref/nn/common/operations/internal/optimized/tensor_utils_impl.h b/src/runtime/ref/nn/common/operations/internal/optimized/tensor_utils_impl.h new file mode 100644 index 0000000..249a672 --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/optimized/tensor_utils_impl.h @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_TENSOR_UTILS_IMPL_H_ +#define FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_TENSOR_UTILS_IMPL_H_ + +#include "ActivationFunctor.h" + +#ifndef USE_NEON +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define USE_NEON +#endif // defined(__ARM_NEON__) || defined(__ARM_NEON) +#endif // USE_NEON + +namespace android { +namespace nn { +namespace tensor_utils { + +// Multiply a matrix by a batch vector, and store results in a batch-size +// vector. +void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, + int m_rows, int m_cols, + const float* vector, + int n_batch, float* result, + int result_stride); +void NeonMatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result, + int result_stride); + +// Cwise product of two vectors. +void PortableVectorVectorCwiseProduct(const float* vector1, + const float* vector2, int v_size, + float* result); +void NeonVectorVectorCwiseProduct(const float* vector1, const float* vector2, + int v_size, float* result); + +// Cwise product and accumulate of two vectors. Since it's a MAC operation, the +// assumption here is that result array is initialized to valid values. +void PortableVectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, + int v_size, float* result); +void NeonVectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, int v_size, + float* result); + +// Dot product of two vectors. +float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size); + +// Dot product of two batch vectors. +void PortableBatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride); + +// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC +// operation, the assumption here is that result array is initialized to valid +// values. +void PortableVectorBatchVectorCwiseProductAccumulate(const float* vector, + int v_size, + const float* batch_vector, + int n_batch, + float* result); +void NeonVectorBatchVectorCwiseProductAccumulate(const float* vector, + int v_size, + const float* batch_vector, + int n_batch, float* result); + +// Compute "1.0f - elements of vector" (used in CIFG). +void PortableSub1Vector(const float* vector, int v_size, float* result); +void NeonSub1Vector(const float* vector, int v_size, float* result); + +// Clip elements of a vector using a abs_limit value. +void PortableClipVector(const float* vector, int v_size, float abs_limit, + float* result); +void NeonClipVector(const float* vector, int v_size, float abs_limit, + float* result); + +// Batch vector initialization with another vector. +void PortableVectorBatchVectorAssign(const float* vector, int v_size, + int n_batch, float* batch_vector); + +// Apply sigmoid to elements of a vector. +void PortableApplySigmoidToVector(const float* vector, int v_size, + float* result); + +// Apply activation function to elements of a vector. +void PortableApplyActivationToVector(const float* vector, int v_size, + ActivationFn activation, + float* result); + +// Copy vector to another vector. +void PortableCopyVector(const float* vector, int v_size, float* result); + +// Fill vector with 0.f. +void PortableZeroVector(float* vector, int v_size); + +// Limit a float input f between +abs_limit and -abs_limit. +float PortableClip(float f, float abs_limit); + +// Shift left a vector in place with v_size size. +void PortableVectorShiftLeft(float* vector, int v_size, float shift_value); + +// Reduce-sum on a float input vector: +// input_vector: float pointer to input vector. +// input_stride: input vector stride. +// output_vector: float pointer to vector. +// output_size: output vector size. +// reduction_size: number of consecutive elements from input vector which are +// added to get one element of output. +void PortableReductionSumVector(const float* input_vector, int input_stride, + float* output_vector, int output_size, + int reduction_size); +} // namespace tensor_utils +} // namespace nn +} // namespace android + +#endif // FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_TENSOR_UTILS_IMPL_H_ diff --git a/src/runtime/ref/nn/common/operations/internal/tensor_utils.cc b/src/runtime/ref/nn/common/operations/internal/tensor_utils.cc new file mode 100644 index 0000000..78275bb --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/tensor_utils.cc @@ -0,0 +1,29 @@ +/* + * Copyright 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensor_utils.h" + +#ifndef USE_NEON +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define USE_NEON +#endif // defined(__ARM_NEON__) || defined(__ARM_NEON) +#endif // USE_NEON + +#ifdef USE_NEON +#include "optimized/neon_tensor_utils.h" +#else +#include "reference/portable_tensor_utils.h" +#endif // USE_NEON diff --git a/src/runtime/ref/nn/common/operations/internal/tensor_utils.h b/src/runtime/ref/nn/common/operations/internal/tensor_utils.h new file mode 100644 index 0000000..56167d2 --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/tensor_utils.h @@ -0,0 +1,122 @@ +/* + * Copyright 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_TENSOR_UTILS_H_ +#define FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_TENSOR_UTILS_H_ + +#include "ActivationFunctor.h" + +namespace android { +namespace nn { +namespace tensor_utils { + +// Limit a float input f betweeen +abs_limit and -abs_limit. +float Clip(float f, float abs_limit); + +// Multiply a matrix by a batch vector, and store results in a batch-size +// vector using a stride value provided in result_stride. 'result_stride' shows +// how the number of elements between consecutive result values. For example +// result_stride = 1, will cause the output to look like this: +// [O_1, 0_2, ... O_rows] in memory, but result_stride = 3, will cause it to be +// arranged like this in memory: [O_1, x, x, 0_2, x, x, ..., O_rows] +void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result, + int result_stride); + +// Cwise product of two vectors. +void VectorVectorCwiseProduct(const float* vector1, const float* vector2, + int v_size, float* result); + +// Cwise product and accumulate of two vectors. Since it's a MAC opertation, the +// assumption here is that result array is initialized to valid values. +void VectorVectorCwiseProductAccumulate(const float* vector1, + const float* vector2, int v_size, + float* result); + +// Dot product of two vectors. +float VectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size); + +// Dot product of two batch vectors of size n_batch * v_size: +// vector1 = [x_1_1, x_1_2, ..., x_1_vsize, +// x_2_1, x_2_2, ..., x_2_vsize, +// ... +// x_nbatch_1,..., x_nbatch_vsize] +// vector2 = [y_1_1, y_1_2, ..., y_1_vsize, +// y_2_1, y_2_2, ..., y_2_vsize, +// ... +// y_nbatch_1,..., y_nbatch_vsize] +// Then result will be a vector of n_batch size which will be saved with a +// stride of result_stride in memory starting from 'result': +// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize, +// x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize, +// ... +// x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize] +void BatchVectorBatchVectorDotProduct(const float* vector1, + const float* vector2, int v_size, + int n_batch, float* result, + int result_stride); + +// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC +// operation, the assumption here is that result array is initialized to valid +// values. +void VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size, + const float* batch_vector, + int n_batch, float* result); + +// Batch vector initialization with another vector. +void VectorBatchVectorAssign(const float* vector, int v_size, int n_batch, + float* batch_vector); + +// Apply sigmoid to elements of a vector. +void ApplySigmoidToVector(const float* vector, int v_size, float* result); + +// Apply activation function to elements of a vector. +void ApplyActivationToVector(const float* vector, int v_size, + ActivationFn activation, float* result); + +// Copy vector to another vector. +void CopyVector(const float* vector, int v_size, float* result); + +// Compute "1.0f - elements of vector" (used in CIFG). +void Sub1Vector(const float* vector, int v_size, float* result); + +// Fill vector with 0.f. +void ZeroVector(float* vector, int v_size); + +// Clip elements of a vector using a abs_limit value. +void ClipVector(const float* vector, int v_size, float abs_limit, + float* result); + +// Shift left a vector in place with v_size size. +void VectorShiftLeft(float* vector, int v_size, float shift_value); + +// Reduce-sum on a float input vector: +// input_vector: float pointer to input vector. +// input_stride: input vector stride. +// output_vector: float pointer to vector. +// output_size: output vector size. +// reduction_size: number of consecutive elements from input vector which are +// added to get one element of output. +void ReductionSumVector(const float* input_vector, int input_stride, + float* output_vector, int output_size, + int reduction_size); +} // namespace tensor_utils +} // namespace nn +} // namespace android + +#endif // FRAMEWORKS_ML_NN_COMMON_OPERATIONS_INTERNAL_TENSOR_UTILS_H_ diff --git a/src/runtime/ref/nn/common/operations/internal/tensor_utils_test.cc b/src/runtime/ref/nn/common/operations/internal/tensor_utils_test.cc new file mode 100644 index 0000000..ad5204d --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/tensor_utils_test.cc @@ -0,0 +1,197 @@ +/* + * Copyright 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gmock/gmock-matchers.h" +#include "gtest/gtest.h" +#include "tensor_utils.h" + +namespace android { +namespace nn { +namespace tensor_utils { + +namespace { + +using ::testing::FloatNear; +using ::testing::Matcher; + +std::vector> ArrayFloatNear(const std::vector& values, + float max_abs_error=1.e-6) { + std::vector> matchers; + matchers.reserve(values.size()); + for (const float& v : values) { + matchers.emplace_back(FloatNear(v, max_abs_error)); + } + return matchers; +} + +} // anonymous namespace + +TEST(uKernels, ClipTest) { + constexpr int kVectorSize = 10; + constexpr float kAbsLimit = 2.0; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.5, 3.0, -3.5, 4.0, -4.5}; + std::vector output(kVectorSize); + ClipVector(input, kVectorSize, kAbsLimit, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear( + {0.0, -0.5, 1.0, -1.5, 2.0, -2.0, 2.0, -2.0, 2.0, -2.0}))); +} + +TEST(uKernels, MatrixBatchVectorMultiplyAccumulateTest) { + constexpr int kRow = 3; + constexpr int kCol = 4; + constexpr int kBatch = 2; + static float matrix[kRow * kCol] = {1.0, 2.0, 3.0, 4.0, // + -1.0, -2.0, -3.0, -4.0, // + 1.0, -2.0, 3.0, -4.0}; + static float vector[kCol * kBatch] = {1.0, -1.0, 1.0, -1.0, // + 2.0, -2.0, 2.0, -2.0}; + std::vector output(kRow * kBatch); + std::fill(output.begin(), output.end(), 3.0); + MatrixBatchVectorMultiplyAccumulate(matrix, kRow, kCol, vector, kBatch, + output.data(), /*result_stride=*/1); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear({1., 5., 13., // + -1., 7., 23.}))); +} + +TEST(uKernels, VectorVectorCwiseProductTest) { + constexpr int kVectorSize = 10; + static float input1[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.5, 3.0, -3.5, 4.0, -4.5}; + static float input2[kVectorSize] = {0.1, -0.1, 0.1, -0.1, 0.1, + -0.1, 0.1, -0.1, 0.1, -0.1}; + std::vector output(kVectorSize); + VectorVectorCwiseProduct(input1, input2, kVectorSize, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear( + {0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45}))); +} + +TEST(uKernels, VectorVectorCwiseProductAccumulateTest) { + constexpr int kVectorSize = 10; + static float input1[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.5, 3.0, -3.5, 4.0, -4.5}; + static float input2[kVectorSize] = {0.1, -0.1, 0.1, -0.1, 0.1, + -0.1, 0.1, -0.1, 0.1, -0.1}; + std::vector output(kVectorSize); + std::fill(output.begin(), output.end(), 1.0); + VectorVectorCwiseProductAccumulate(input1, input2, kVectorSize, + output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear( + {1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45}))); +} + +TEST(uKernels, VectorBatchVectorAssignTest) { + constexpr int kVectorSize = 5; + constexpr int kBatchSize = 3; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize * kBatchSize); + VectorBatchVectorAssign(input, kVectorSize, kBatchSize, output.data()); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear( + {0.0, -0.5, 1.0, -1.5, 2.0, 0.0, -0.5, 1.0, -1.5, 2.0, + 0.0, -0.5, 1.0, -1.5, 2.0}))); +} + +TEST(uKernels, ApplySigmoidToVectorTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize); + ApplySigmoidToVector(input, kVectorSize, output.data()); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear( + {0.5, 0.377541, 0.731059, 0.182426, 0.880797}))); +} + +TEST(uKernels, ApplyActivationToVectorTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize); + ApplyActivationToVector(input, kVectorSize, kActivationRelu, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear({0.0, 0.0, 1.0, 0.0, 2.0}))); + + ApplyActivationToVector(input, kVectorSize, kActivationTanh, output.data()); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear( + {0.0, -0.462117, 0.761594, -0.905148, 0.964028}))); +} + +TEST(uKernels, CopyVectorTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize); + CopyVector(input, kVectorSize, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear({0.0, -0.5, 1.0, -1.5, 2.0}))); +} + +TEST(uKernels, Sub1VectorTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector output(kVectorSize); + Sub1Vector(input, kVectorSize, output.data()); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear({1.0, 1.5, 0.0, 2.5, -1.0}))); +} + +TEST(uKernels, ZeroVectorTest) { + constexpr int kVectorSize = 5; + std::vector output(kVectorSize); + ZeroVector(output.data(), kVectorSize); + EXPECT_THAT(output, + ElementsAreArray(ArrayFloatNear({0.0, 0.0, 0.0, 0.0, 0.0}))); +} + +TEST(uKernels, BatchVectorBatchVectorDotProductTest) { + constexpr int kVectorSize = 5; + constexpr int kBatch = 2; + static float input1[kVectorSize * kBatch] = {0.0, -0.5, 1.0, -1.5, 2.0, + -2.5, 3.0, -3.5, 4.0, -4.5}; + static float input2[kVectorSize * kBatch] = {0.1, -0.1, 0.1, -0.1, 0.1, + -0.1, 0.1, -0.1, 0.1, -0.1}; + std::vector output(kBatch); + BatchVectorBatchVectorDotProduct(input1, input2, kVectorSize, kBatch, + output.data(), /*result_stride=*/1); + EXPECT_THAT(output, ElementsAreArray(ArrayFloatNear({0.5, 1.75}))); +} + +TEST(uKernels, VectorShiftLeftTest) { + constexpr int kVectorSize = 5; + static float input[kVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0}; + std::vector result(kVectorSize); + VectorShiftLeft(input, kVectorSize, 3.0); + result.assign(input, input + kVectorSize); + EXPECT_THAT(result, + ElementsAreArray(ArrayFloatNear({-0.5, 1.0, -1.5, 2.0, 3.0}))); +} + +TEST(uKernels, ReductionSumVectorTest) { + constexpr int kInputVectorSize = 10; + constexpr int kOutputVectorSize = 5; + constexpr int kReductionSize = 2; + static float input[kInputVectorSize] = {0.0, -0.5, 1.0, -1.5, 2.0, + 0.0, -0.5, 1.0, 1.0, 2.0}; + std::vector result(kOutputVectorSize); + ReductionSumVector(input, + /*input_stride=*/1, result.data(), kOutputVectorSize, + kReductionSize); + EXPECT_THAT(result, + ElementsAreArray(ArrayFloatNear({-0.5, -0.5, 2.0, 0.5, 3.0}))); +} + +} // namespace tensor_utils +} // namespace nn +} // namespace android diff --git a/src/runtime/ref/nn/common/operations/internal/types.h b/src/runtime/ref/nn/common/operations/internal/types.h new file mode 100644 index 0000000..2000f73 --- /dev/null +++ b/src/runtime/ref/nn/common/operations/internal/types.h @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_TYPES_H_ +#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_TYPES_H_ + +#include "compatibility.h" + +namespace android { +namespace nn { + +enum class FusedActivationFunctionType { kNone, kRelu6, kRelu1, kRelu }; + +template +struct Dims { + int sizes[N]; + int strides[N]; +}; + +struct Shape; + +inline Dims<4> convertShapeToDims(const Shape& shape) { + Dims<4> dims; + for (int i=0; i<4; i++) { + dims.sizes[i] = 1; + } + + if (shape.dimensions.size() == 1) { + dims.sizes[0] = (int)getSizeOfDimension(shape, 0); + } else { + for (int i=0; i<4; i++) { + int src = (int)shape.dimensions.size()-i-1; + if (src >= 0) { + dims.sizes[i] = (int)getSizeOfDimension(shape, src); + } + } + } + + dims.strides[0] = 1; + for (int i = 1; i<4; i++) { + dims.strides[i] = dims.strides[i-1] * dims.sizes[i-1]; + } + return dims; +} + +inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) { + DCHECK(i0 >= 0 && i0 < dims.sizes[0]); + DCHECK(i1 >= 0 && i1 < dims.sizes[1]); + DCHECK(i2 >= 0 && i2 < dims.sizes[2]); + DCHECK(i3 >= 0 && i3 < dims.sizes[3]); + return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] + + i3 * dims.strides[3]; +} + +// Get array size, DCHECKing that the dim index is in range. +template +int ArraySize(const Dims& array, int index) { + DCHECK(index >= 0 && index < N); + return array.sizes[index]; +} + +// Get common array size, DCHECKing that they all agree. +template +int MatchingArraySize(const ArrayType1& array1, int index1, + const ArrayType2& array2, int index2) { + DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); + return ArraySize(array1, index1); +} + +template +int MatchingArraySize(const ArrayType1& array1, int index1, + const ArrayType2& array2, int index2, Args... args) { + DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); + return MatchingArraySize(array1, index1, args...); +} + +inline int RequiredBufferSizeForDims(const Dims<4>& dims) { + int max_offset = 0; + for (int i = 0; i < 4; i++) { + max_offset += (dims.sizes[i] - 1) * dims.strides[i]; + } + return max_offset + 1; +} + +template +bool IsPackedWithoutStrides(const Dims& dims) { + int expected_stride = 1; + for (int d = 0; d < N; d++) { + if (dims.strides[d] != expected_stride) return false; + expected_stride *= dims.sizes[d]; + } + return true; +} + +} // namespace nn +} // namespace android + +#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_TYPES_H_ diff --git a/src/runtime/ref/nn/depend/CMakeLists.txt b/src/runtime/ref/nn/depend/CMakeLists.txt index 9ac4286..04debbc 100644 --- a/src/runtime/ref/nn/depend/CMakeLists.txt +++ b/src/runtime/ref/nn/depend/CMakeLists.txt @@ -1,5 +1,5 @@ ADD_SUBDIRECTORY(android-base) -#ADD_SUBDIRECTORY(external) +ADD_SUBDIRECTORY(external) ADD_SUBDIRECTORY(hal) #ADD_SUBDIRECTORY(libc) ADD_SUBDIRECTORY(libcutils) diff --git a/src/runtime/ref/nn/depend/external/CMakeLists.txt b/src/runtime/ref/nn/depend/external/CMakeLists.txt new file mode 100644 index 0000000..3b679b0 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/CMakeLists.txt @@ -0,0 +1,13 @@ +ADD_SUBDIRECTORY(eigen) +ADD_SUBDIRECTORY(gemmlowp) + +SET(INC_DIRS + ${INC_DIRS} + PARENT_SCOPE +) + +SET(SRCS + ${SRCS} + PARENT_SCOPE +) + diff --git a/src/runtime/ref/nn/depend/external/eigen/CMakeLists.txt b/src/runtime/ref/nn/depend/external/eigen/CMakeLists.txt new file mode 100644 index 0000000..b76ac5a --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/CMakeLists.txt @@ -0,0 +1,10 @@ + +SET(CUR_INCS + ${CMAKE_CURRENT_SOURCE_DIR} +) + +SET(INC_DIRS + ${INC_DIRS} + ${CUR_INCS} + PARENT_SCOPE +) diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/CMakeLists.txt b/src/runtime/ref/nn/depend/external/eigen/Eigen/CMakeLists.txt new file mode 100644 index 0000000..9eb502b --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/CMakeLists.txt @@ -0,0 +1,19 @@ +include(RegexUtils) +test_escape_string_as_regex() + +file(GLOB Eigen_directory_files "*") + +escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + +foreach(f ${Eigen_directory_files}) + if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/src") + list(APPEND Eigen_directory_files_to_install ${f}) + endif() +endforeach(f ${Eigen_directory_files}) + +install(FILES + ${Eigen_directory_files_to_install} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel + ) + +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/Cholesky b/src/runtime/ref/nn/depend/external/eigen/Eigen/Cholesky new file mode 100644 index 0000000..369d1f5 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/Cholesky @@ -0,0 +1,41 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLESKY_MODULE_H +#define EIGEN_CHOLESKY_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Cholesky_Module Cholesky module + * + * + * + * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices. + * Those decompositions are also accessible via the following methods: + * - MatrixBase::llt() + * - MatrixBase::ldlt() + * - SelfAdjointView::llt() + * - SelfAdjointView::ldlt() + * + * \code + * #include + * \endcode + */ + +#include "src/Cholesky/LLT.h" +#include "src/Cholesky/LDLT.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke.h" +#include "src/Cholesky/LLT_LAPACKE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CHOLESKY_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/CholmodSupport b/src/runtime/ref/nn/depend/external/eigen/Eigen/CholmodSupport new file mode 100644 index 0000000..bed8924 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/CholmodSupport @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H +#define EIGEN_CHOLMODSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { + #include +} + +/** \ingroup Support_modules + * \defgroup CholmodSupport_Module CholmodSupport module + * + * This module provides an interface to the Cholmod library which is part of the suitesparse package. + * It provides the two following main factorization classes: + * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization. + * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial). + * + * For the sake of completeness, this module also propose the two following classes: + * - class CholmodSimplicialLLT + * - class CholmodSimplicialLDLT + * Note that these classes does not bring any particular advantage compared to the built-in + * SimplicialLLT and SimplicialLDLT factorization classes. + * + * \code + * #include + * \endcode + * + * In order to use this module, the cholmod headers must be accessible from the include paths, and your binary must be linked to the cholmod library and its dependencies. + * The dependencies depend on how cholmod has been compiled. + * For a cmake based project, you can use our FindCholmod.cmake module to help you in this task. + * + */ + +#include "src/CholmodSupport/CholmodSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CHOLMODSUPPORT_MODULE_H + diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/Core b/src/runtime/ref/nn/depend/external/eigen/Eigen/Core new file mode 100644 index 0000000..0f7fa63 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/Core @@ -0,0 +1,516 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2007-2011 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CORE_H +#define EIGEN_CORE_H + +// first thing Eigen does: stop the compiler from committing suicide +#include "src/Core/util/DisableStupidWarnings.h" + +// Handle NVCC/CUDA/SYCL +#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) + // Do not try asserts on CUDA and SYCL! + #ifndef EIGEN_NO_DEBUG + #define EIGEN_NO_DEBUG + #endif + + #ifdef EIGEN_INTERNAL_DEBUGGING + #undef EIGEN_INTERNAL_DEBUGGING + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif + + // All functions callable from CUDA code must be qualified with __device__ + #ifdef __CUDACC__ + // Do not try to vectorize on CUDA and SYCL! + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + + #define EIGEN_DEVICE_FUNC __host__ __device__ + // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro + // works properly on the device side + #include + #else + #define EIGEN_DEVICE_FUNC + #endif + +#else + #define EIGEN_DEVICE_FUNC + +#endif + +// When compiling CUDA device code with NVCC, pull in math functions from the +// global namespace. In host mode, and when device doee with clang, use the +// std versions. +#if defined(__CUDA_ARCH__) && defined(__NVCC__) + #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; +#else + #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; +#endif + +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) + #define EIGEN_EXCEPTIONS +#endif + +#ifdef EIGEN_EXCEPTIONS + #include +#endif + +// then include this file where all our macros are defined. It's really important to do it first because +// it's where we do all the alignment settings (platform detection and honoring the user's will if he +// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization. +#include "src/Core/util/Macros.h" + +// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) +// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. +#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) + #pragma GCC optimize ("-fno-ipa-cp-clone") +#endif + +#include + +// this include file manages BLAS and MKL related macros +// and inclusion of their respective header files +#include "src/Core/util/MKL_support.h" + +// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into +// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks +#if EIGEN_MAX_ALIGN_BYTES==0 + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif +#endif + +#if EIGEN_COMP_MSVC + #include // for _aligned_malloc -- need it regardless of whether vectorization is enabled + #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later + // Remember that usage of defined() in a #define is undefined by the standard. + // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. + #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 + #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER + #endif + #endif +#else + // Remember that usage of defined() in a #define is undefined by the standard + #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) + #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC + #endif +#endif + +#ifndef EIGEN_DONT_VECTORIZE + + #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) + + // Defines symbols for compile-time detection of which instructions are + // used. + // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_SSE + #define EIGEN_VECTORIZE_SSE2 + + // Detect sse3/ssse3/sse4: + // gcc and icc defines __SSE3__, ... + // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you + // want to force the use of those instructions with msvc. + #ifdef __SSE3__ + #define EIGEN_VECTORIZE_SSE3 + #endif + #ifdef __SSSE3__ + #define EIGEN_VECTORIZE_SSSE3 + #endif + #ifdef __SSE4_1__ + #define EIGEN_VECTORIZE_SSE4_1 + #endif + #ifdef __SSE4_2__ + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX__ + #define EIGEN_VECTORIZE_AVX + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX2__ + #define EIGEN_VECTORIZE_AVX2 + #endif + #ifdef __FMA__ + #define EIGEN_VECTORIZE_FMA + #endif + #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512) + #define EIGEN_VECTORIZE_AVX512 + #define EIGEN_VECTORIZE_AVX2 + #define EIGEN_VECTORIZE_AVX + #define EIGEN_VECTORIZE_FMA + #ifdef __AVX512DQ__ + #define EIGEN_VECTORIZE_AVX512DQ + #endif + #endif + + // include files + + // This extern "C" works around a MINGW-w64 compilation issue + // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354 + // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). + // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations + // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know; + // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. + // notice that since these are C headers, the extern "C" is theoretically needed anyways. + extern "C" { + // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. + // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: + #if EIGEN_COMP_ICC >= 1110 + #include + #else + #include + #include + #include + #ifdef EIGEN_VECTORIZE_SSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_1 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_2 + #include + #endif + #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512) + #include + #endif + #endif + } // end extern "C" + #elif defined __VSX__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_VSX + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel + #elif defined __ALTIVEC__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_ALTIVEC + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel + #elif (defined __ARM_NEON) || (defined __ARM_NEON__) + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_NEON + #include + #elif (defined __s390x__ && defined __VEC__) + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_ZVECTOR + #include + #endif +#endif + +#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG) + // We can use the optimized fp16 to float and float to fp16 conversion routines + #define EIGEN_HAS_FP16_C +#endif + +#if defined __CUDACC__ + #define EIGEN_VECTORIZE_CUDA + #include + #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 + #define EIGEN_HAS_CUDA_FP16 + #endif +#endif + +#if defined EIGEN_HAS_CUDA_FP16 + #include + #include +#endif + +#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) + #define EIGEN_HAS_OPENMP +#endif + +#ifdef EIGEN_HAS_OPENMP +#include +#endif + +// MSVC for windows mobile does not have the errno.h file +#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM +#define EIGEN_HAS_ERRNO +#endif + +#ifdef EIGEN_HAS_ERRNO +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for CHAR_BIT +// for min/max: +#include + +// for std::is_nothrow_move_assignable +#ifdef EIGEN_INCLUDE_TYPE_TRAITS +#include +#endif + +// for outputting debug info +#ifdef EIGEN_DEBUG_ASSIGN +#include +#endif + +// required for __cpuid, needs to be included after cmath +#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE + #include +#endif + +/** \brief Namespace containing all symbols from the %Eigen library. */ +namespace Eigen { + +inline static const char *SimdInstructionSetsInUse(void) { +#if defined(EIGEN_VECTORIZE_AVX512) + return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_AVX) + return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_2) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_1) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; +#elif defined(EIGEN_VECTORIZE_SSSE3) + return "SSE, SSE2, SSE3, SSSE3"; +#elif defined(EIGEN_VECTORIZE_SSE3) + return "SSE, SSE2, SSE3"; +#elif defined(EIGEN_VECTORIZE_SSE2) + return "SSE, SSE2"; +#elif defined(EIGEN_VECTORIZE_ALTIVEC) + return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_VSX) + return "VSX"; +#elif defined(EIGEN_VECTORIZE_NEON) + return "ARM NEON"; +#elif defined(EIGEN_VECTORIZE_ZVECTOR) + return "S390X ZVECTOR"; +#else + return "None"; +#endif +} + +} // end namespace Eigen + +#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT +// This will generate an error message: +#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information +#endif + +namespace Eigen { + +// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to +// ensure QNX/QCC support +using std::size_t; +// gcc 4.6.0 wants std:: for ptrdiff_t +using std::ptrdiff_t; + +} + +/** \defgroup Core_Module Core module + * This is the main module of Eigen providing dense matrix and vector support + * (both fixed and dynamic size) with all the features corresponding to a BLAS library + * and much more... + * + * \code + * #include + * \endcode + */ + +#include "src/Core/util/Constants.h" +#include "src/Core/util/Meta.h" +#include "src/Core/util/ForwardDeclarations.h" +#include "src/Core/util/StaticAssert.h" +#include "src/Core/util/XprHelper.h" +#include "src/Core/util/Memory.h" + +#include "src/Core/NumTraits.h" +#include "src/Core/MathFunctions.h" +#include "src/Core/GenericPacketMath.h" +#include "src/Core/MathFunctionsImpl.h" + +#if defined EIGEN_VECTORIZE_AVX512 + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/AVX/PacketMath.h" + #include "src/Core/arch/AVX512/PacketMath.h" + #include "src/Core/arch/AVX512/MathFunctions.h" +#elif defined EIGEN_VECTORIZE_AVX + // Use AVX for floats and doubles, SSE for integers + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/SSE/Complex.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/AVX/PacketMath.h" + #include "src/Core/arch/AVX/MathFunctions.h" + #include "src/Core/arch/AVX/Complex.h" + #include "src/Core/arch/AVX/TypeCasting.h" +#elif defined EIGEN_VECTORIZE_SSE + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/SSE/Complex.h" + #include "src/Core/arch/SSE/TypeCasting.h" +#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) + #include "src/Core/arch/AltiVec/PacketMath.h" + #include "src/Core/arch/AltiVec/MathFunctions.h" + #include "src/Core/arch/AltiVec/Complex.h" +#elif defined EIGEN_VECTORIZE_NEON + #include "src/Core/arch/NEON/PacketMath.h" + #include "src/Core/arch/NEON/MathFunctions.h" + #include "src/Core/arch/NEON/Complex.h" +#elif defined EIGEN_VECTORIZE_ZVECTOR + #include "src/Core/arch/ZVector/PacketMath.h" + #include "src/Core/arch/ZVector/MathFunctions.h" + #include "src/Core/arch/ZVector/Complex.h" +#endif + +// Half float support +#include "src/Core/arch/CUDA/Half.h" +#include "src/Core/arch/CUDA/PacketMathHalf.h" +#include "src/Core/arch/CUDA/TypeCasting.h" + +#if defined EIGEN_VECTORIZE_CUDA + #include "src/Core/arch/CUDA/PacketMath.h" + #include "src/Core/arch/CUDA/MathFunctions.h" +#endif + +#include "src/Core/arch/Default/Settings.h" + +#include "src/Core/functors/TernaryFunctors.h" +#include "src/Core/functors/BinaryFunctors.h" +#include "src/Core/functors/UnaryFunctors.h" +#include "src/Core/functors/NullaryFunctors.h" +#include "src/Core/functors/StlFunctors.h" +#include "src/Core/functors/AssignmentFunctors.h" + +// Specialized functors to enable the processing of complex numbers +// on CUDA devices +#include "src/Core/arch/CUDA/Complex.h" + +#include "src/Core/IO.h" +#include "src/Core/DenseCoeffsBase.h" +#include "src/Core/DenseBase.h" +#include "src/Core/MatrixBase.h" +#include "src/Core/EigenBase.h" + +#include "src/Core/Product.h" +#include "src/Core/CoreEvaluators.h" +#include "src/Core/AssignEvaluator.h" + +#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874 + // at least confirmed with Doxygen 1.5.5 and 1.5.6 + #include "src/Core/Assign.h" +#endif + +#include "src/Core/ArrayBase.h" +#include "src/Core/util/BlasUtil.h" +#include "src/Core/DenseStorage.h" +#include "src/Core/NestByValue.h" + +// #include "src/Core/ForceAlignedAccess.h" + +#include "src/Core/ReturnByValue.h" +#include "src/Core/NoAlias.h" +#include "src/Core/PlainObjectBase.h" +#include "src/Core/Matrix.h" +#include "src/Core/Array.h" +#include "src/Core/CwiseTernaryOp.h" +#include "src/Core/CwiseBinaryOp.h" +#include "src/Core/CwiseUnaryOp.h" +#include "src/Core/CwiseNullaryOp.h" +#include "src/Core/CwiseUnaryView.h" +#include "src/Core/SelfCwiseBinaryOp.h" +#include "src/Core/Dot.h" +#include "src/Core/StableNorm.h" +#include "src/Core/Stride.h" +#include "src/Core/MapBase.h" +#include "src/Core/Map.h" +#include "src/Core/Ref.h" +#include "src/Core/Block.h" +#include "src/Core/VectorBlock.h" +#include "src/Core/Transpose.h" +#include "src/Core/DiagonalMatrix.h" +#include "src/Core/Diagonal.h" +#include "src/Core/DiagonalProduct.h" +#include "src/Core/Redux.h" +#include "src/Core/Visitor.h" +#include "src/Core/Fuzzy.h" +#include "src/Core/Swap.h" +#include "src/Core/CommaInitializer.h" +#include "src/Core/GeneralProduct.h" +#include "src/Core/Solve.h" +#include "src/Core/Inverse.h" +#include "src/Core/SolverBase.h" +#include "src/Core/PermutationMatrix.h" +#include "src/Core/Transpositions.h" +#include "src/Core/TriangularMatrix.h" +#include "src/Core/SelfAdjointView.h" +#include "src/Core/products/GeneralBlockPanelKernel.h" +#include "src/Core/products/Parallelizer.h" +#include "src/Core/ProductEvaluators.h" +#include "src/Core/products/GeneralMatrixVector.h" +#include "src/Core/products/GeneralMatrixMatrix.h" +#include "src/Core/SolveTriangular.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular.h" +#include "src/Core/products/SelfadjointMatrixVector.h" +#include "src/Core/products/SelfadjointMatrixMatrix.h" +#include "src/Core/products/SelfadjointProduct.h" +#include "src/Core/products/SelfadjointRank2Update.h" +#include "src/Core/products/TriangularMatrixVector.h" +#include "src/Core/products/TriangularMatrixMatrix.h" +#include "src/Core/products/TriangularSolverMatrix.h" +#include "src/Core/products/TriangularSolverVector.h" +#include "src/Core/BandMatrix.h" +#include "src/Core/CoreIterators.h" +#include "src/Core/ConditionEstimator.h" + +#include "src/Core/BooleanRedux.h" +#include "src/Core/Select.h" +#include "src/Core/VectorwiseOp.h" +#include "src/Core/Random.h" +#include "src/Core/Replicate.h" +#include "src/Core/Reverse.h" +#include "src/Core/ArrayWrapper.h" + +#ifdef EIGEN_USE_BLAS +#include "src/Core/products/GeneralMatrixMatrix_BLAS.h" +#include "src/Core/products/GeneralMatrixVector_BLAS.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h" +#include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h" +#include "src/Core/products/SelfadjointMatrixVector_BLAS.h" +#include "src/Core/products/TriangularMatrixMatrix_BLAS.h" +#include "src/Core/products/TriangularMatrixVector_BLAS.h" +#include "src/Core/products/TriangularSolverMatrix_BLAS.h" +#endif // EIGEN_USE_BLAS + +#ifdef EIGEN_USE_MKL_VML +#include "src/Core/Assign_MKL.h" +#endif + +#include "src/Core/GlobalFunctions.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CORE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/Dense b/src/runtime/ref/nn/depend/external/eigen/Eigen/Dense new file mode 100644 index 0000000..5768910 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/Dense @@ -0,0 +1,7 @@ +#include "Core" +#include "LU" +#include "Cholesky" +#include "QR" +#include "SVD" +#include "Geometry" +#include "Eigenvalues" diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/Eigen b/src/runtime/ref/nn/depend/external/eigen/Eigen/Eigen new file mode 100644 index 0000000..654c8dc --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/Eigen @@ -0,0 +1,2 @@ +#include "Dense" +#include "Sparse" diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/Eigenvalues b/src/runtime/ref/nn/depend/external/eigen/Eigen/Eigenvalues new file mode 100644 index 0000000..009e529 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/Eigenvalues @@ -0,0 +1,57 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EIGENVALUES_MODULE_H +#define EIGEN_EIGENVALUES_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "Cholesky" +#include "Jacobi" +#include "Householder" +#include "LU" +#include "Geometry" + +/** \defgroup Eigenvalues_Module Eigenvalues module + * + * + * + * This module mainly provides various eigenvalue solvers. + * This module also provides some MatrixBase methods, including: + * - MatrixBase::eigenvalues(), + * - MatrixBase::operatorNorm() + * + * \code + * #include + * \endcode + */ + +#include "src/misc/RealSvd2x2.h" +#include "src/Eigenvalues/Tridiagonalization.h" +#include "src/Eigenvalues/RealSchur.h" +#include "src/Eigenvalues/EigenSolver.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver.h" +#include "src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h" +#include "src/Eigenvalues/HessenbergDecomposition.h" +#include "src/Eigenvalues/ComplexSchur.h" +#include "src/Eigenvalues/ComplexEigenSolver.h" +#include "src/Eigenvalues/RealQZ.h" +#include "src/Eigenvalues/GeneralizedEigenSolver.h" +#include "src/Eigenvalues/MatrixBaseEigenvalues.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke.h" +#include "src/Eigenvalues/RealSchur_LAPACKE.h" +#include "src/Eigenvalues/ComplexSchur_LAPACKE.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_EIGENVALUES_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/Geometry b/src/runtime/ref/nn/depend/external/eigen/Eigen/Geometry new file mode 100644 index 0000000..716d529 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/Geometry @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GEOMETRY_MODULE_H +#define EIGEN_GEOMETRY_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "SVD" +#include "LU" +#include + +/** \defgroup Geometry_Module Geometry module + * + * This module provides support for: + * - fixed-size homogeneous transformations + * - translation, scaling, 2D and 3D rotations + * - \link Quaternion quaternions \endlink + * - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3) + * - orthognal vector generation (\ref MatrixBase::unitOrthogonal) + * - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink + * - \link AlignedBox axis aligned bounding boxes \endlink + * - \link umeyama least-square transformation fitting \endlink + * + * \code + * #include + * \endcode + */ + +#include "src/Geometry/OrthoMethods.h" +#include "src/Geometry/EulerAngles.h" + +#include "src/Geometry/Homogeneous.h" +#include "src/Geometry/RotationBase.h" +#include "src/Geometry/Rotation2D.h" +#include "src/Geometry/Quaternion.h" +#include "src/Geometry/AngleAxis.h" +#include "src/Geometry/Transform.h" +#include "src/Geometry/Translation.h" +#include "src/Geometry/Scaling.h" +#include "src/Geometry/Hyperplane.h" +#include "src/Geometry/ParametrizedLine.h" +#include "src/Geometry/AlignedBox.h" +#include "src/Geometry/Umeyama.h" + +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX +#include "src/Geometry/arch/Geometry_SSE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_GEOMETRY_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/Householder b/src/runtime/ref/nn/depend/external/eigen/Eigen/Householder new file mode 100644 index 0000000..89cd81b --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/Householder @@ -0,0 +1,30 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_HOUSEHOLDER_MODULE_H +#define EIGEN_HOUSEHOLDER_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Householder_Module Householder module + * This module provides Householder transformations. + * + * \code + * #include + * \endcode + */ + +#include "src/Householder/Householder.h" +#include "src/Householder/HouseholderSequence.h" +#include "src/Householder/BlockHouseholder.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_HOUSEHOLDER_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/IterativeLinearSolvers b/src/runtime/ref/nn/depend/external/eigen/Eigen/IterativeLinearSolvers new file mode 100644 index 0000000..957d575 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/IterativeLinearSolvers @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H +#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module + * + * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. + * Those solvers are accessible via the following classes: + * - ConjugateGradient for selfadjoint (hermitian) matrices, + * - LeastSquaresConjugateGradient for rectangular least-square problems, + * - BiCGSTAB for general square matrices. + * + * These iterative solvers are associated with some preconditioners: + * - IdentityPreconditioner - not really useful + * - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices. + * - IncompleteLUT - incomplete LU factorization with dual thresholding + * + * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. + * + \code + #include + \endcode + */ + +#include "src/IterativeLinearSolvers/SolveWithGuess.h" +#include "src/IterativeLinearSolvers/IterativeSolverBase.h" +#include "src/IterativeLinearSolvers/BasicPreconditioners.h" +#include "src/IterativeLinearSolvers/ConjugateGradient.h" +#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" +#include "src/IterativeLinearSolvers/BiCGSTAB.h" +#include "src/IterativeLinearSolvers/IncompleteLUT.h" +#include "src/IterativeLinearSolvers/IncompleteCholesky.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/Jacobi b/src/runtime/ref/nn/depend/external/eigen/Eigen/Jacobi new file mode 100644 index 0000000..17c1d78 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/Jacobi @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_JACOBI_MODULE_H +#define EIGEN_JACOBI_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Jacobi_Module Jacobi module + * This module provides Jacobi and Givens rotations. + * + * \code + * #include + * \endcode + * + * In addition to listed classes, it defines the two following MatrixBase methods to apply a Jacobi or Givens rotation: + * - MatrixBase::applyOnTheLeft() + * - MatrixBase::applyOnTheRight(). + */ + +#include "src/Jacobi/Jacobi.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_JACOBI_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/LU b/src/runtime/ref/nn/depend/external/eigen/Eigen/LU new file mode 100644 index 0000000..6f6c556 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/LU @@ -0,0 +1,46 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LU_MODULE_H +#define EIGEN_LU_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup LU_Module LU module + * This module includes %LU decomposition and related notions such as matrix inversion and determinant. + * This module defines the following MatrixBase methods: + * - MatrixBase::inverse() + * - MatrixBase::determinant() + * + * \code + * #include + * \endcode + */ + +#include "src/misc/Kernel.h" +#include "src/misc/Image.h" +#include "src/LU/FullPivLU.h" +#include "src/LU/PartialPivLU.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke.h" +#include "src/LU/PartialPivLU_LAPACKE.h" +#endif +#include "src/LU/Determinant.h" +#include "src/LU/InverseImpl.h" + +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX + #include "src/LU/arch/Inverse_SSE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_LU_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/MetisSupport b/src/runtime/ref/nn/depend/external/eigen/Eigen/MetisSupport new file mode 100644 index 0000000..85c41bf --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/MetisSupport @@ -0,0 +1,35 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_METISSUPPORT_MODULE_H +#define EIGEN_METISSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +} + + +/** \ingroup Support_modules + * \defgroup MetisSupport_Module MetisSupport module + * + * \code + * #include + * \endcode + * This module defines an interface to the METIS reordering package (http://glaros.dtc.umn.edu/gkhome/views/metis). + * It can be used just as any other built-in method as explained in \link OrderingMethods_Module here. \endlink + */ + + +#include "src/MetisSupport/MetisSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_METISSUPPORT_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/OrderingMethods b/src/runtime/ref/nn/depend/external/eigen/Eigen/OrderingMethods new file mode 100644 index 0000000..d8ea361 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/OrderingMethods @@ -0,0 +1,73 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ORDERINGMETHODS_MODULE_H +#define EIGEN_ORDERINGMETHODS_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup OrderingMethods_Module OrderingMethods module + * + * This module is currently for internal use only + * + * It defines various built-in and external ordering methods for sparse matrices. + * They are typically used to reduce the number of elements during + * the sparse matrix decomposition (LLT, LU, QR). + * Precisely, in a preprocessing step, a permutation matrix P is computed using + * those ordering methods and applied to the columns of the matrix. + * Using for instance the sparse Cholesky decomposition, it is expected that + * the nonzeros elements in LLT(A*P) will be much smaller than that in LLT(A). + * + * + * Usage : + * \code + * #include + * \endcode + * + * A simple usage is as a template parameter in the sparse decomposition classes : + * + * \code + * SparseLU > solver; + * \endcode + * + * \code + * SparseQR > solver; + * \endcode + * + * It is possible as well to call directly a particular ordering method for your own purpose, + * \code + * AMDOrdering ordering; + * PermutationMatrix perm; + * SparseMatrix A; + * //Fill the matrix ... + * + * ordering(A, perm); // Call AMD + * \endcode + * + * \note Some of these methods (like AMD or METIS), need the sparsity pattern + * of the input matrix to be symmetric. When the matrix is structurally unsymmetric, + * Eigen computes internally the pattern of \f$A^T*A\f$ before calling the method. + * If your matrix is already symmetric (at leat in structure), you can avoid that + * by calling the method with a SelfAdjointView type. + * + * \code + * // Call the ordering on the pattern of the lower triangular matrix A + * ordering(A.selfadjointView(), perm); + * \endcode + */ + +#ifndef EIGEN_MPL2_ONLY +#include "src/OrderingMethods/Amd.h" +#endif + +#include "src/OrderingMethods/Ordering.h" +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/PaStiXSupport b/src/runtime/ref/nn/depend/external/eigen/Eigen/PaStiXSupport new file mode 100644 index 0000000..de3a63b --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/PaStiXSupport @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PASTIXSUPPORT_MODULE_H +#define EIGEN_PASTIXSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +#include +} + +#ifdef complex +#undef complex +#endif + +/** \ingroup Support_modules + * \defgroup PaStiXSupport_Module PaStiXSupport module + * + * This module provides an interface to the PaSTiX library. + * PaSTiX is a general \b supernodal, \b parallel and \b opensource sparse solver. + * It provides the two following main factorization classes: + * - class PastixLLT : a supernodal, parallel LLt Cholesky factorization. + * - class PastixLDLT: a supernodal, parallel LDLt Cholesky factorization. + * - class PastixLU : a supernodal, parallel LU factorization (optimized for a symmetric pattern). + * + * \code + * #include + * \endcode + * + * In order to use this module, the PaSTiX headers must be accessible from the include paths, and your binary must be linked to the PaSTiX library and its dependencies. + * The dependencies depend on how PaSTiX has been compiled. + * For a cmake based project, you can use our FindPaSTiX.cmake module to help you in this task. + * + */ + +#include "src/PaStiXSupport/PaStiXSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/PardisoSupport b/src/runtime/ref/nn/depend/external/eigen/Eigen/PardisoSupport new file mode 100755 index 0000000..340edf5 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/PardisoSupport @@ -0,0 +1,35 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PARDISOSUPPORT_MODULE_H +#define EIGEN_PARDISOSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include + +/** \ingroup Support_modules + * \defgroup PardisoSupport_Module PardisoSupport module + * + * This module brings support for the Intel(R) MKL PARDISO direct sparse solvers. + * + * \code + * #include + * \endcode + * + * In order to use this module, the MKL headers must be accessible from the include paths, and your binary must be linked to the MKL library and its dependencies. + * See this \ref TopicUsingIntelMKL "page" for more information on MKL-Eigen integration. + * + */ + +#include "src/PardisoSupport/PardisoSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_PARDISOSUPPORT_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/QR b/src/runtime/ref/nn/depend/external/eigen/Eigen/QR new file mode 100644 index 0000000..80838e3 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/QR @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_QR_MODULE_H +#define EIGEN_QR_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "Cholesky" +#include "Jacobi" +#include "Householder" + +/** \defgroup QR_Module QR module + * + * + * + * This module provides various QR decompositions + * This module also provides some MatrixBase methods, including: + * - MatrixBase::householderQr() + * - MatrixBase::colPivHouseholderQr() + * - MatrixBase::fullPivHouseholderQr() + * + * \code + * #include + * \endcode + */ + +#include "src/QR/HouseholderQR.h" +#include "src/QR/FullPivHouseholderQR.h" +#include "src/QR/ColPivHouseholderQR.h" +#include "src/QR/CompleteOrthogonalDecomposition.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke.h" +#include "src/QR/HouseholderQR_LAPACKE.h" +#include "src/QR/ColPivHouseholderQR_LAPACKE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_QR_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/QtAlignedMalloc b/src/runtime/ref/nn/depend/external/eigen/Eigen/QtAlignedMalloc new file mode 100644 index 0000000..c6571f1 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/QtAlignedMalloc @@ -0,0 +1,40 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_QTMALLOC_MODULE_H +#define EIGEN_QTMALLOC_MODULE_H + +#include "Core" + +#if (!EIGEN_MALLOC_ALREADY_ALIGNED) + +#include "src/Core/util/DisableStupidWarnings.h" + +void *qMalloc(std::size_t size) +{ + return Eigen::internal::aligned_malloc(size); +} + +void qFree(void *ptr) +{ + Eigen::internal::aligned_free(ptr); +} + +void *qRealloc(void *ptr, std::size_t size) +{ + void* newPtr = Eigen::internal::aligned_malloc(size); + memcpy(newPtr, ptr, size); + Eigen::internal::aligned_free(ptr); + return newPtr; +} + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif + +#endif // EIGEN_QTMALLOC_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/SPQRSupport b/src/runtime/ref/nn/depend/external/eigen/Eigen/SPQRSupport new file mode 100644 index 0000000..f70390c --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/SPQRSupport @@ -0,0 +1,34 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPQRSUPPORT_MODULE_H +#define EIGEN_SPQRSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "SuiteSparseQR.hpp" + +/** \ingroup Support_modules + * \defgroup SPQRSupport_Module SuiteSparseQR module + * + * This module provides an interface to the SPQR library, which is part of the suitesparse package. + * + * \code + * #include + * \endcode + * + * In order to use this module, the SPQR headers must be accessible from the include paths, and your binary must be linked to the SPQR library and its dependencies (Cholmod, AMD, COLAMD,...). + * For a cmake based project, you can use our FindSPQR.cmake and FindCholmod.Cmake modules + * + */ + +#include "src/CholmodSupport/CholmodSupport.h" +#include "src/SPQRSupport/SuiteSparseQRSupport.h" + +#endif diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/SVD b/src/runtime/ref/nn/depend/external/eigen/Eigen/SVD new file mode 100644 index 0000000..86143c2 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/SVD @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SVD_MODULE_H +#define EIGEN_SVD_MODULE_H + +#include "QR" +#include "Householder" +#include "Jacobi" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup SVD_Module SVD module + * + * + * + * This module provides SVD decomposition for matrices (both real and complex). + * Two decomposition algorithms are provided: + * - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones. + * - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems. + * These decompositions are accessible via the respective classes and following MatrixBase methods: + * - MatrixBase::jacobiSvd() + * - MatrixBase::bdcSvd() + * + * \code + * #include + * \endcode + */ + +#include "src/misc/RealSvd2x2.h" +#include "src/SVD/UpperBidiagonalization.h" +#include "src/SVD/SVDBase.h" +#include "src/SVD/JacobiSVD.h" +#include "src/SVD/BDCSVD.h" +#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) +#include "src/misc/lapacke.h" +#include "src/SVD/JacobiSVD_LAPACKE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SVD_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/Sparse b/src/runtime/ref/nn/depend/external/eigen/Eigen/Sparse new file mode 100644 index 0000000..136e681 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/Sparse @@ -0,0 +1,36 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSE_MODULE_H +#define EIGEN_SPARSE_MODULE_H + +/** \defgroup Sparse_Module Sparse meta-module + * + * Meta-module including all related modules: + * - \ref SparseCore_Module + * - \ref OrderingMethods_Module + * - \ref SparseCholesky_Module + * - \ref SparseLU_Module + * - \ref SparseQR_Module + * - \ref IterativeLinearSolvers_Module + * + \code + #include + \endcode + */ + +#include "SparseCore" +#include "OrderingMethods" +#ifndef EIGEN_MPL2_ONLY +#include "SparseCholesky" +#endif +#include "SparseLU" +#include "SparseQR" +#include "IterativeLinearSolvers" + +#endif // EIGEN_SPARSE_MODULE_H + diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseCholesky b/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseCholesky new file mode 100644 index 0000000..b6a320c --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseCholesky @@ -0,0 +1,45 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2013 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSECHOLESKY_MODULE_H +#define EIGEN_SPARSECHOLESKY_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup SparseCholesky_Module SparseCholesky module + * + * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices. + * Those decompositions are accessible via the following classes: + * - SimplicialLLt, + * - SimplicialLDLt + * + * Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module. + * + * \code + * #include + * \endcode + */ + +#ifdef EIGEN_MPL2_ONLY +#error The SparseCholesky module has nothing to offer in MPL2 only mode +#endif + +#include "src/SparseCholesky/SimplicialCholesky.h" + +#ifndef EIGEN_MPL2_ONLY +#include "src/SparseCholesky/SimplicialCholesky_impl.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSECHOLESKY_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseCore b/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseCore new file mode 100644 index 0000000..76966c4 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseCore @@ -0,0 +1,69 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSECORE_MODULE_H +#define EIGEN_SPARSECORE_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include +#include +#include +#include +#include + +/** + * \defgroup SparseCore_Module SparseCore module + * + * This module provides a sparse matrix representation, and basic associated matrix manipulations + * and operations. + * + * See the \ref TutorialSparse "Sparse tutorial" + * + * \code + * #include + * \endcode + * + * This module depends on: Core. + */ + +#include "src/SparseCore/SparseUtil.h" +#include "src/SparseCore/SparseMatrixBase.h" +#include "src/SparseCore/SparseAssign.h" +#include "src/SparseCore/CompressedStorage.h" +#include "src/SparseCore/AmbiVector.h" +#include "src/SparseCore/SparseCompressedBase.h" +#include "src/SparseCore/SparseMatrix.h" +#include "src/SparseCore/SparseMap.h" +#include "src/SparseCore/MappedSparseMatrix.h" +#include "src/SparseCore/SparseVector.h" +#include "src/SparseCore/SparseRef.h" +#include "src/SparseCore/SparseCwiseUnaryOp.h" +#include "src/SparseCore/SparseCwiseBinaryOp.h" +#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseBlock.h" +#include "src/SparseCore/SparseDot.h" +#include "src/SparseCore/SparseRedux.h" +#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparseDiagonalProduct.h" +#include "src/SparseCore/ConservativeSparseSparseProduct.h" +#include "src/SparseCore/SparseSparseProductWithPruning.h" +#include "src/SparseCore/SparseProduct.h" +#include "src/SparseCore/SparseDenseProduct.h" +#include "src/SparseCore/SparseSelfAdjointView.h" +#include "src/SparseCore/SparseTriangularView.h" +#include "src/SparseCore/TriangularSolver.h" +#include "src/SparseCore/SparsePermutation.h" +#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseSolverBase.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSECORE_MODULE_H + diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseLU b/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseLU new file mode 100644 index 0000000..38b38b5 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseLU @@ -0,0 +1,46 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSELU_MODULE_H +#define EIGEN_SPARSELU_MODULE_H + +#include "SparseCore" + +/** + * \defgroup SparseLU_Module SparseLU module + * This module defines a supernodal factorization of general sparse matrices. + * The code is fully optimized for supernode-panel updates with specialized kernels. + * Please, see the documentation of the SparseLU class for more details. + */ + +// Ordering interface +#include "OrderingMethods" + +#include "src/SparseLU/SparseLU_gemm_kernel.h" + +#include "src/SparseLU/SparseLU_Structs.h" +#include "src/SparseLU/SparseLU_SupernodalMatrix.h" +#include "src/SparseLU/SparseLUImpl.h" +#include "src/SparseCore/SparseColEtree.h" +#include "src/SparseLU/SparseLU_Memory.h" +#include "src/SparseLU/SparseLU_heap_relax_snode.h" +#include "src/SparseLU/SparseLU_relax_snode.h" +#include "src/SparseLU/SparseLU_pivotL.h" +#include "src/SparseLU/SparseLU_panel_dfs.h" +#include "src/SparseLU/SparseLU_kernel_bmod.h" +#include "src/SparseLU/SparseLU_panel_bmod.h" +#include "src/SparseLU/SparseLU_column_dfs.h" +#include "src/SparseLU/SparseLU_column_bmod.h" +#include "src/SparseLU/SparseLU_copy_to_ucol.h" +#include "src/SparseLU/SparseLU_pruneL.h" +#include "src/SparseLU/SparseLU_Utils.h" +#include "src/SparseLU/SparseLU.h" + +#endif // EIGEN_SPARSELU_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseQR b/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseQR new file mode 100644 index 0000000..a6f3b7f --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/SparseQR @@ -0,0 +1,37 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSEQR_MODULE_H +#define EIGEN_SPARSEQR_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup SparseQR_Module SparseQR module + * \brief Provides QR decomposition for sparse matrices + * + * This module provides a simplicial version of the left-looking Sparse QR decomposition. + * The columns of the input matrix should be reordered to limit the fill-in during the + * decomposition. Built-in methods (COLAMD, AMD) or external methods (METIS) can be used to this end. + * See the \link OrderingMethods_Module OrderingMethods\endlink module for the list + * of built-in and external ordering methods. + * + * \code + * #include + * \endcode + * + * + */ + +#include "OrderingMethods" +#include "src/SparseCore/SparseColEtree.h" +#include "src/SparseQR/SparseQR.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/StdDeque b/src/runtime/ref/nn/depend/external/eigen/Eigen/StdDeque new file mode 100644 index 0000000..bc68397 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/StdDeque @@ -0,0 +1,27 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STDDEQUE_MODULE_H +#define EIGEN_STDDEQUE_MODULE_H + +#include "Core" +#include + +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ + +#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) + +#else + +#include "src/StlSupport/StdDeque.h" + +#endif + +#endif // EIGEN_STDDEQUE_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/StdList b/src/runtime/ref/nn/depend/external/eigen/Eigen/StdList new file mode 100644 index 0000000..4c6262c --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/StdList @@ -0,0 +1,26 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STDLIST_MODULE_H +#define EIGEN_STDLIST_MODULE_H + +#include "Core" +#include + +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ + +#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) + +#else + +#include "src/StlSupport/StdList.h" + +#endif + +#endif // EIGEN_STDLIST_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/StdVector b/src/runtime/ref/nn/depend/external/eigen/Eigen/StdVector new file mode 100644 index 0000000..0c4697a --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/StdVector @@ -0,0 +1,27 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STDVECTOR_MODULE_H +#define EIGEN_STDVECTOR_MODULE_H + +#include "Core" +#include + +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ + +#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) + +#else + +#include "src/StlSupport/StdVector.h" + +#endif + +#endif // EIGEN_STDVECTOR_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/SuperLUSupport b/src/runtime/ref/nn/depend/external/eigen/Eigen/SuperLUSupport new file mode 100644 index 0000000..59312a8 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/SuperLUSupport @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H +#define EIGEN_SUPERLUSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#ifdef EMPTY +#define EIGEN_EMPTY_WAS_ALREADY_DEFINED +#endif + +typedef int int_t; +#include +#include +#include + +// slu_util.h defines a preprocessor token named EMPTY which is really polluting, +// so we remove it in favor of a SUPERLU_EMPTY token. +// If EMPTY was already defined then we don't undef it. + +#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED) +# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED +#elif defined(EMPTY) +# undef EMPTY +#endif + +#define SUPERLU_EMPTY (-1) + +namespace Eigen { struct SluMatrix; } + +/** \ingroup Support_modules + * \defgroup SuperLUSupport_Module SuperLUSupport module + * + * This module provides an interface to the SuperLU library. + * It provides the following factorization class: + * - class SuperLU: a supernodal sequential LU factorization. + * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). + * + * \warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported. + * + * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. + * + * \code + * #include + * \endcode + * + * In order to use this module, the superlu headers must be accessible from the include paths, and your binary must be linked to the superlu library and its dependencies. + * The dependencies depend on how superlu has been compiled. + * For a cmake based project, you can use our FindSuperLU.cmake module to help you in this task. + * + */ + +#include "src/SuperLUSupport/SuperLUSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SUPERLUSUPPORT_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/UmfPackSupport b/src/runtime/ref/nn/depend/external/eigen/Eigen/UmfPackSupport new file mode 100644 index 0000000..00eec80 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/UmfPackSupport @@ -0,0 +1,40 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H +#define EIGEN_UMFPACKSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +} + +/** \ingroup Support_modules + * \defgroup UmfPackSupport_Module UmfPackSupport module + * + * This module provides an interface to the UmfPack library which is part of the suitesparse package. + * It provides the following factorization class: + * - class UmfPackLU: a multifrontal sequential LU factorization. + * + * \code + * #include + * \endcode + * + * In order to use this module, the umfpack headers must be accessible from the include paths, and your binary must be linked to the umfpack library and its dependencies. + * The dependencies depend on how umfpack has been compiled. + * For a cmake based project, you can use our FindUmfPack.cmake module to help you in this task. + * + */ + +#include "src/UmfPackSupport/UmfPackSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_UMFPACKSUPPORT_MODULE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Cholesky/LDLT.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Cholesky/LDLT.h new file mode 100644 index 0000000..fcee7b2 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Cholesky/LDLT.h @@ -0,0 +1,669 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// Copyright (C) 2009 Keir Mierle +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2011 Timothy E. Holy +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LDLT_H +#define EIGEN_LDLT_H + +namespace Eigen { + +namespace internal { + template struct LDLT_Traits; + + // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef + enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite }; +} + +/** \ingroup Cholesky_Module + * + * \class LDLT + * + * \brief Robust Cholesky decomposition of a matrix with pivoting + * + * \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition + * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * The other triangular part won't be read. + * + * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite + * matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L + * is lower triangular with a unit diagonal and D is a diagonal matrix. + * + * The decomposition uses pivoting to ensure stability, so that L will have + * zeros in the bottom right rank(A) - n submatrix. Avoiding the square root + * on D also stabilizes the computation. + * + * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky + * decomposition to determine whether a system of equations has a solution. + * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT + */ +template class LDLT +{ + public: + typedef _MatrixType MatrixType; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + UpLo = _UpLo + }; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; + typedef Matrix TmpMatrixType; + + typedef Transpositions TranspositionType; + typedef PermutationMatrix PermutationType; + + typedef internal::LDLT_Traits Traits; + + /** \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LDLT::compute(const MatrixType&). + */ + LDLT() + : m_matrix(), + m_transpositions(), + m_sign(internal::ZeroSign), + m_isInitialized(false) + {} + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa LDLT() + */ + explicit LDLT(Index size) + : m_matrix(size, size), + m_transpositions(size), + m_temporary(size), + m_sign(internal::ZeroSign), + m_isInitialized(false) + {} + + /** \brief Constructor with decomposition + * + * This calculates the decomposition for the input \a matrix. + * + * \sa LDLT(Index size) + */ + template + explicit LDLT(const EigenBase& matrix) + : m_matrix(matrix.rows(), matrix.cols()), + m_transpositions(matrix.rows()), + m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), + m_isInitialized(false) + { + compute(matrix.derived()); + } + + /** \brief Constructs a LDLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa LDLT(const EigenBase&) + */ + template + explicit LDLT(EigenBase& matrix) + : m_matrix(matrix.derived()), + m_transpositions(matrix.rows()), + m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), + m_isInitialized(false) + { + compute(matrix.derived()); + } + + /** Clear any existing decomposition + * \sa rankUpdate(w,sigma) + */ + void setZero() + { + m_isInitialized = false; + } + + /** \returns a view of the upper triangular matrix U */ + inline typename Traits::MatrixU matrixU() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return Traits::getU(m_matrix); + } + + /** \returns a view of the lower triangular matrix L */ + inline typename Traits::MatrixL matrixL() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return Traits::getL(m_matrix); + } + + /** \returns the permutation matrix P as a transposition sequence. + */ + inline const TranspositionType& transpositionsP() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_transpositions; + } + + /** \returns the coefficients of the diagonal matrix D */ + inline Diagonal vectorD() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_matrix.diagonal(); + } + + /** \returns true if the matrix is positive (semidefinite) */ + inline bool isPositive() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign; + } + + /** \returns true if the matrix is negative (semidefinite) */ + inline bool isNegative(void) const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign; + } + + /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * This function also supports in-place solves using the syntax x = decompositionObject.solve(x) . + * + * \note_about_checking_solutions + * + * More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$ + * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$, + * \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then + * \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the + * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function + * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular. + * + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt() + */ + template + inline const Solve + solve(const MatrixBase& b) const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + eigen_assert(m_matrix.rows()==b.rows() + && "LDLT::solve(): invalid number of rows of the right hand side matrix b"); + return Solve(*this, b.derived()); + } + + template + bool solveInPlace(MatrixBase &bAndX) const; + + template + LDLT& compute(const EigenBase& matrix); + + /** \returns an estimate of the reciprocal condition number of the matrix of + * which \c *this is the LDLT decomposition. + */ + RealScalar rcond() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } + + template + LDLT& rankUpdate(const MatrixBase& w, const RealScalar& alpha=1); + + /** \returns the internal LDLT decomposition matrix + * + * TODO: document the storage layout + */ + inline const MatrixType& matrixLDLT() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_matrix; + } + + MatrixType reconstructedMatrix() const; + + /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint. + * + * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as: + * \code x = decomposition.adjoint().solve(b) \endcode + */ + const LDLT& adjoint() const { return *this; }; + + inline Index rows() const { return m_matrix.rows(); } + inline Index cols() const { return m_matrix.cols(); } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_info; + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif + + protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + + /** \internal + * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U. + * The strict upper part is used during the decomposition, the strict lower + * part correspond to the coefficients of L (its diagonal is equal to 1 and + * is not stored), and the diagonal entries correspond to D. + */ + MatrixType m_matrix; + RealScalar m_l1_norm; + TranspositionType m_transpositions; + TmpMatrixType m_temporary; + internal::SignMatrix m_sign; + bool m_isInitialized; + ComputationInfo m_info; +}; + +namespace internal { + +template struct ldlt_inplace; + +template<> struct ldlt_inplace +{ + template + static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) + { + using std::abs; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename TranspositionType::StorageIndex IndexType; + eigen_assert(mat.rows()==mat.cols()); + const Index size = mat.rows(); + bool found_zero_pivot = false; + bool ret = true; + + if (size <= 1) + { + transpositions.setIdentity(); + if (numext::real(mat.coeff(0,0)) > static_cast(0) ) sign = PositiveSemiDef; + else if (numext::real(mat.coeff(0,0)) < static_cast(0)) sign = NegativeSemiDef; + else sign = ZeroSign; + return true; + } + + for (Index k = 0; k < size; ++k) + { + // Find largest diagonal element + Index index_of_biggest_in_corner; + mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); + index_of_biggest_in_corner += k; + + transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner); + if(k != index_of_biggest_in_corner) + { + // apply the transposition while taking care to consider only + // the lower triangular part + Index s = size-index_of_biggest_in_corner-1; // trailing size after the biggest element + mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k)); + mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s)); + std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner)); + for(Index i=k+1;i::IsComplex) + mat.coeffRef(index_of_biggest_in_corner,k) = numext::conj(mat.coeff(index_of_biggest_in_corner,k)); + } + + // partition the matrix: + // A00 | - | - + // lu = A10 | A11 | - + // A20 | A21 | A22 + Index rs = size - k - 1; + Block A21(mat,k+1,k,rs,1); + Block A10(mat,k,0,1,k); + Block A20(mat,k+1,0,rs,k); + + if(k>0) + { + temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint(); + mat.coeffRef(k,k) -= (A10 * temp.head(k)).value(); + if(rs>0) + A21.noalias() -= A20 * temp.head(k); + } + + // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot + // was smaller than the cutoff value. However, since LDLT is not rank-revealing + // we should only make sure that we do not introduce INF or NaN values. + // Remark that LAPACK also uses 0 as the cutoff value. + RealScalar realAkk = numext::real(mat.coeffRef(k,k)); + bool pivot_is_valid = (abs(realAkk) > RealScalar(0)); + + if(k==0 && !pivot_is_valid) + { + // The entire diagonal is zero, there is nothing more to do + // except filling the transpositions, and checking whether the matrix is zero. + sign = ZeroSign; + for(Index j = 0; j0) && pivot_is_valid) + A21 /= realAkk; + + if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed + else if(!pivot_is_valid) found_zero_pivot = true; + + if (sign == PositiveSemiDef) { + if (realAkk < static_cast(0)) sign = Indefinite; + } else if (sign == NegativeSemiDef) { + if (realAkk > static_cast(0)) sign = Indefinite; + } else if (sign == ZeroSign) { + if (realAkk > static_cast(0)) sign = PositiveSemiDef; + else if (realAkk < static_cast(0)) sign = NegativeSemiDef; + } + } + + return ret; + } + + // Reference for the algorithm: Davis and Hager, "Multiple Rank + // Modifications of a Sparse Cholesky Factorization" (Algorithm 1) + // Trivial rearrangements of their computations (Timothy E. Holy) + // allow their algorithm to work for rank-1 updates even if the + // original matrix is not of full rank. + // Here only rank-1 updates are implemented, to reduce the + // requirement for intermediate storage and improve accuracy + template + static bool updateInPlace(MatrixType& mat, MatrixBase& w, const typename MatrixType::RealScalar& sigma=1) + { + using numext::isfinite; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + const Index size = mat.rows(); + eigen_assert(mat.cols() == size && w.size()==size); + + RealScalar alpha = 1; + + // Apply the update + for (Index j = 0; j < size; j++) + { + // Check for termination due to an original decomposition of low-rank + if (!(isfinite)(alpha)) + break; + + // Update the diagonal terms + RealScalar dj = numext::real(mat.coeff(j,j)); + Scalar wj = w.coeff(j); + RealScalar swj2 = sigma*numext::abs2(wj); + RealScalar gamma = dj*alpha + swj2; + + mat.coeffRef(j,j) += swj2/alpha; + alpha += swj2/dj; + + + // Update the terms of L + Index rs = size-j-1; + w.tail(rs) -= wj * mat.col(j).tail(rs); + if(gamma != 0) + mat.col(j).tail(rs) += (sigma*numext::conj(wj)/gamma)*w.tail(rs); + } + return true; + } + + template + static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1) + { + // Apply the permutation to the input w + tmp = transpositions * w; + + return ldlt_inplace::updateInPlace(mat,tmp,sigma); + } +}; + +template<> struct ldlt_inplace +{ + template + static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) + { + Transpose matt(mat); + return ldlt_inplace::unblocked(matt, transpositions, temp, sign); + } + + template + static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1) + { + Transpose matt(mat); + return ldlt_inplace::update(matt, transpositions, tmp, w.conjugate(), sigma); + } +}; + +template struct LDLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } +}; + +template struct LDLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } +}; + +} // end namespace internal + +/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix + */ +template +template +LDLT& LDLT::compute(const EigenBase& a) +{ + check_template_parameters(); + + eigen_assert(a.rows()==a.cols()); + const Index size = a.rows(); + + m_matrix = a.derived(); + + // Compute matrix L1 norm = max abs column sum. + m_l1_norm = RealScalar(0); + // TODO move this code to SelfAdjointView + for (Index col = 0; col < size; ++col) { + RealScalar abs_col_sum; + if (_UpLo == Lower) + abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>(); + else + abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); + if (abs_col_sum > m_l1_norm) + m_l1_norm = abs_col_sum; + } + + m_transpositions.resize(size); + m_isInitialized = false; + m_temporary.resize(size); + m_sign = internal::ZeroSign; + + m_info = internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue; + + m_isInitialized = true; + return *this; +} + +/** Update the LDLT decomposition: given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T. + * \param w a vector to be incorporated into the decomposition. + * \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1. + * \sa setZero() + */ +template +template +LDLT& LDLT::rankUpdate(const MatrixBase& w, const typename LDLT::RealScalar& sigma) +{ + typedef typename TranspositionType::StorageIndex IndexType; + const Index size = w.rows(); + if (m_isInitialized) + { + eigen_assert(m_matrix.rows()==size); + } + else + { + m_matrix.resize(size,size); + m_matrix.setZero(); + m_transpositions.resize(size); + for (Index i = 0; i < size; i++) + m_transpositions.coeffRef(i) = IndexType(i); + m_temporary.resize(size); + m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef; + m_isInitialized = true; + } + + internal::ldlt_inplace::update(m_matrix, m_transpositions, m_temporary, w, sigma); + + return *this; +} + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const +{ + eigen_assert(rhs.rows() == rows()); + // dst = P b + dst = m_transpositions * rhs; + + // dst = L^-1 (P b) + matrixL().solveInPlace(dst); + + // dst = D^-1 (L^-1 P b) + // more precisely, use pseudo-inverse of D (see bug 241) + using std::abs; + const typename Diagonal::RealReturnType vecD(vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and leads to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits::highest(); + + for (Index i = 0; i < vecD.size(); ++i) + { + if(abs(vecD(i)) > tolerance) + dst.row(i) /= vecD(i); + else + dst.row(i).setZero(); + } + + // dst = L^-T (D^-1 L^-1 P b) + matrixU().solveInPlace(dst); + + // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b + dst = m_transpositions.transpose() * dst; +} +#endif + +/** \internal use x = ldlt_object.solve(x); + * + * This is the \em in-place version of solve(). + * + * \param bAndX represents both the right-hand side matrix b and result x. + * + * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD. + * + * This version avoids a copy when the right hand side matrix b is not + * needed anymore. + * + * \sa LDLT::solve(), MatrixBase::ldlt() + */ +template +template +bool LDLT::solveInPlace(MatrixBase &bAndX) const +{ + eigen_assert(m_isInitialized && "LDLT is not initialized."); + eigen_assert(m_matrix.rows() == bAndX.rows()); + + bAndX = this->solve(bAndX); + + return true; +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: P^T L D L^* P. + * This function is provided for debug purpose. */ +template +MatrixType LDLT::reconstructedMatrix() const +{ + eigen_assert(m_isInitialized && "LDLT is not initialized."); + const Index size = m_matrix.rows(); + MatrixType res(size,size); + + // P + res.setIdentity(); + res = transpositionsP() * res; + // L^* P + res = matrixU() * res; + // D(L^*P) + res = vectorD().real().asDiagonal() * res; + // L(DL^*P) + res = matrixL() * res; + // P^T (LDL^*P) + res = transpositionsP().transpose() * res; + + return res; +} + +/** \cholesky_module + * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa MatrixBase::ldlt() + */ +template +inline const LDLT::PlainObject, UpLo> +SelfAdjointView::ldlt() const +{ + return LDLT(m_matrix); +} + +/** \cholesky_module + * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa SelfAdjointView::ldlt() + */ +template +inline const LDLT::PlainObject> +MatrixBase::ldlt() const +{ + return LDLT(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_LDLT_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Cholesky/LLT.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Cholesky/LLT.h new file mode 100644 index 0000000..87ca8d4 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Cholesky/LLT.h @@ -0,0 +1,534 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LLT_H +#define EIGEN_LLT_H + +namespace Eigen { + +namespace internal{ +template struct LLT_Traits; +} + +/** \ingroup Cholesky_Module + * + * \class LLT + * + * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features + * + * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition + * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * The other triangular part won't be read. + * + * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite + * matrix A such that A = LL^* = U^*U, where L is lower triangular. + * + * While the Cholesky decomposition is particularly useful to solve selfadjoint problems like D^*D x = b, + * for that purpose, we recommend the Cholesky decomposition without square root which is more stable + * and even faster. Nevertheless, this standard Cholesky decomposition remains useful in many other + * situations like generalised eigen problems with hermitian matrices. + * + * Remember that Cholesky decompositions are not rank-revealing. This LLT decomposition is only stable on positive definite matrices, + * use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations + * has a solution. + * + * Example: \include LLT_example.cpp + * Output: \verbinclude LLT_example.out + * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT + */ + /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) + * Note that during the decomposition, only the upper triangular part of A is considered. Therefore, + * the strict lower part does not have to store correct values. + */ +template class LLT +{ + public: + typedef _MatrixType MatrixType; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; + + enum { + PacketSize = internal::packet_traits::size, + AlignmentMask = int(PacketSize)-1, + UpLo = _UpLo + }; + + typedef internal::LLT_Traits Traits; + + /** + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LLT::compute(const MatrixType&). + */ + LLT() : m_matrix(), m_isInitialized(false) {} + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa LLT() + */ + explicit LLT(Index size) : m_matrix(size, size), + m_isInitialized(false) {} + + template + explicit LLT(const EigenBase& matrix) + : m_matrix(matrix.rows(), matrix.cols()), + m_isInitialized(false) + { + compute(matrix.derived()); + } + + /** \brief Constructs a LDLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when + * \c MatrixType is a Eigen::Ref. + * + * \sa LLT(const EigenBase&) + */ + template + explicit LLT(EigenBase& matrix) + : m_matrix(matrix.derived()), + m_isInitialized(false) + { + compute(matrix.derived()); + } + + /** \returns a view of the upper triangular matrix U */ + inline typename Traits::MatrixU matrixU() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return Traits::getU(m_matrix); + } + + /** \returns a view of the lower triangular matrix L */ + inline typename Traits::MatrixL matrixL() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return Traits::getL(m_matrix); + } + + /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * Since this LLT class assumes anyway that the matrix A is invertible, the solution + * theoretically exists and is unique regardless of b. + * + * Example: \include LLT_solve.cpp + * Output: \verbinclude LLT_solve.out + * + * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt() + */ + template + inline const Solve + solve(const MatrixBase& b) const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_matrix.rows()==b.rows() + && "LLT::solve(): invalid number of rows of the right hand side matrix b"); + return Solve(*this, b.derived()); + } + + template + void solveInPlace(MatrixBase &bAndX) const; + + template + LLT& compute(const EigenBase& matrix); + + /** \returns an estimate of the reciprocal condition number of the matrix of + * which \c *this is the Cholesky decomposition. + */ + RealScalar rcond() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_info == Success && "LLT failed because matrix appears to be negative"); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } + + /** \returns the LLT decomposition matrix + * + * TODO: document the storage layout + */ + inline const MatrixType& matrixLLT() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return m_matrix; + } + + MatrixType reconstructedMatrix() const; + + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return m_info; + } + + /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint. + * + * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as: + * \code x = decomposition.adjoint().solve(b) \endcode + */ + const LLT& adjoint() const { return *this; }; + + inline Index rows() const { return m_matrix.rows(); } + inline Index cols() const { return m_matrix.cols(); } + + template + LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif + + protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + + /** \internal + * Used to compute and store L + * The strict upper part is not used and even not initialized. + */ + MatrixType m_matrix; + RealScalar m_l1_norm; + bool m_isInitialized; + ComputationInfo m_info; +}; + +namespace internal { + +template struct llt_inplace; + +template +static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) +{ + using std::sqrt; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::ColXpr ColXpr; + typedef typename internal::remove_all::type ColXprCleaned; + typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; + typedef Matrix TempVectorType; + typedef typename TempVectorType::SegmentReturnType TempVecSegment; + + Index n = mat.cols(); + eigen_assert(mat.rows()==n && vec.size()==n); + + TempVectorType temp; + + if(sigma>0) + { + // This version is based on Givens rotations. + // It is faster than the other one below, but only works for updates, + // i.e., for sigma > 0 + temp = sqrt(sigma) * vec; + + for(Index i=0; i g; + g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); + + Index rs = n-i-1; + if(rs>0) + { + ColXprSegment x(mat.col(i).tail(rs)); + TempVecSegment y(temp.tail(rs)); + apply_rotation_in_the_plane(x, y, g); + } + } + } + else + { + temp = vec; + RealScalar beta = 1; + for(Index j=0; j struct llt_inplace +{ + typedef typename NumTraits::Real RealScalar; + template + static Index unblocked(MatrixType& mat) + { + using std::sqrt; + + eigen_assert(mat.rows()==mat.cols()); + const Index size = mat.rows(); + for(Index k = 0; k < size; ++k) + { + Index rs = size-k-1; // remaining size + + Block A21(mat,k+1,k,rs,1); + Block A10(mat,k,0,1,k); + Block A20(mat,k+1,0,rs,k); + + RealScalar x = numext::real(mat.coeff(k,k)); + if (k>0) x -= A10.squaredNorm(); + if (x<=RealScalar(0)) + return k; + mat.coeffRef(k,k) = x = sqrt(x); + if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint(); + if (rs>0) A21 /= x; + } + return -1; + } + + template + static Index blocked(MatrixType& m) + { + eigen_assert(m.rows()==m.cols()); + Index size = m.rows(); + if(size<32) + return unblocked(m); + + Index blockSize = size/8; + blockSize = (blockSize/16)*16; + blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128)); + + for (Index k=0; k A11(m,k, k, bs,bs); + Block A21(m,k+bs,k, rs,bs); + Block A22(m,k+bs,k+bs,rs,rs); + + Index ret; + if((ret=unblocked(A11))>=0) return k+ret; + if(rs>0) A11.adjoint().template triangularView().template solveInPlace(A21); + if(rs>0) A22.template selfadjointView().rankUpdate(A21,typename NumTraits::Literal(-1)); // bottleneck + } + return -1; + } + + template + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + { + return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); + } +}; + +template struct llt_inplace +{ + typedef typename NumTraits::Real RealScalar; + + template + static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat) + { + Transpose matt(mat); + return llt_inplace::unblocked(matt); + } + template + static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat) + { + Transpose matt(mat); + return llt_inplace::blocked(matt); + } + template + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + { + Transpose matt(mat); + return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); + } +}; + +template struct LLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } + static bool inplace_decomposition(MatrixType& m) + { return llt_inplace::blocked(m)==-1; } +}; + +template struct LLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } + static bool inplace_decomposition(MatrixType& m) + { return llt_inplace::blocked(m)==-1; } +}; + +} // end namespace internal + +/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix + * + * \returns a reference to *this + * + * Example: \include TutorialLinAlgComputeTwice.cpp + * Output: \verbinclude TutorialLinAlgComputeTwice.out + */ +template +template +LLT& LLT::compute(const EigenBase& a) +{ + check_template_parameters(); + + eigen_assert(a.rows()==a.cols()); + const Index size = a.rows(); + m_matrix.resize(size, size); + m_matrix = a.derived(); + + // Compute matrix L1 norm = max abs column sum. + m_l1_norm = RealScalar(0); + // TODO move this code to SelfAdjointView + for (Index col = 0; col < size; ++col) { + RealScalar abs_col_sum; + if (_UpLo == Lower) + abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>(); + else + abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); + if (abs_col_sum > m_l1_norm) + m_l1_norm = abs_col_sum; + } + + m_isInitialized = true; + bool ok = Traits::inplace_decomposition(m_matrix); + m_info = ok ? Success : NumericalIssue; + + return *this; +} + +/** Performs a rank one update (or dowdate) of the current decomposition. + * If A = LL^* before the rank one update, + * then after it we have LL^* = A + sigma * v v^* where \a v must be a vector + * of same dimension. + */ +template +template +LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType); + eigen_assert(v.size()==m_matrix.cols()); + eigen_assert(m_isInitialized); + if(internal::llt_inplace::rankUpdate(m_matrix,v,sigma)>=0) + m_info = NumericalIssue; + else + m_info = Success; + + return *this; +} + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const +{ + dst = rhs; + solveInPlace(dst); +} +#endif + +/** \internal use x = llt_object.solve(x); + * + * This is the \em in-place version of solve(). + * + * \param bAndX represents both the right-hand side matrix b and result x. + * + * This version avoids a copy when the right hand side matrix b is not needed anymore. + * + * \sa LLT::solve(), MatrixBase::llt() + */ +template +template +void LLT::solveInPlace(MatrixBase &bAndX) const +{ + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_matrix.rows()==bAndX.rows()); + matrixL().solveInPlace(bAndX); + matrixU().solveInPlace(bAndX); +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: L L^*. + * This function is provided for debug purpose. */ +template +MatrixType LLT::reconstructedMatrix() const +{ + eigen_assert(m_isInitialized && "LLT is not initialized."); + return matrixL() * matrixL().adjoint().toDenseMatrix(); +} + +/** \cholesky_module + * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() + */ +template +inline const LLT::PlainObject> +MatrixBase::llt() const +{ + return LLT(derived()); +} + +/** \cholesky_module + * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() + */ +template +inline const LLT::PlainObject, UpLo> +SelfAdjointView::llt() const +{ + return LLT(m_matrix); +} + +} // end namespace Eigen + +#endif // EIGEN_LLT_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h new file mode 100644 index 0000000..bc6489e --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h @@ -0,0 +1,99 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to LAPACKe + * LLt decomposition based on LAPACKE_?potrf function. + ******************************************************************************** +*/ + +#ifndef EIGEN_LLT_LAPACKE_H +#define EIGEN_LLT_LAPACKE_H + +namespace Eigen { + +namespace internal { + +template struct lapacke_llt; + +#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \ +template<> struct lapacke_llt \ +{ \ + template \ + static inline Index potrf(MatrixType& m, char uplo) \ + { \ + lapack_int matrix_order; \ + lapack_int size, lda, info, StorageOrder; \ + EIGTYPE* a; \ + eigen_assert(m.rows()==m.cols()); \ + /* Set up parameters for ?potrf */ \ + size = convert_index(m.rows()); \ + StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \ + matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ + a = &(m.coeffRef(0,0)); \ + lda = convert_index(m.outerStride()); \ +\ + info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \ + info = (info==0) ? -1 : info>0 ? info-1 : size; \ + return info; \ + } \ +}; \ +template<> struct llt_inplace \ +{ \ + template \ + static Index blocked(MatrixType& m) \ + { \ + return lapacke_llt::potrf(m, 'L'); \ + } \ + template \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \ +}; \ +template<> struct llt_inplace \ +{ \ + template \ + static Index blocked(MatrixType& m) \ + { \ + return lapacke_llt::potrf(m, 'U'); \ + } \ + template \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + { \ + Transpose matt(mat); \ + return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); \ + } \ +}; + +EIGEN_LAPACKE_LLT(double, double, d) +EIGEN_LAPACKE_LLT(float, float, s) +EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z) +EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_LLT_LAPACKE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/CholmodSupport/CholmodSupport.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/CholmodSupport/CholmodSupport.h new file mode 100644 index 0000000..5719720 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/CholmodSupport/CholmodSupport.h @@ -0,0 +1,639 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLMODSUPPORT_H +#define EIGEN_CHOLMODSUPPORT_H + +namespace Eigen { + +namespace internal { + +template struct cholmod_configure_matrix; + +template<> struct cholmod_configure_matrix { + template + static void run(CholmodType& mat) { + mat.xtype = CHOLMOD_REAL; + mat.dtype = CHOLMOD_DOUBLE; + } +}; + +template<> struct cholmod_configure_matrix > { + template + static void run(CholmodType& mat) { + mat.xtype = CHOLMOD_COMPLEX; + mat.dtype = CHOLMOD_DOUBLE; + } +}; + +// Other scalar types are not yet suppotred by Cholmod +// template<> struct cholmod_configure_matrix { +// template +// static void run(CholmodType& mat) { +// mat.xtype = CHOLMOD_REAL; +// mat.dtype = CHOLMOD_SINGLE; +// } +// }; +// +// template<> struct cholmod_configure_matrix > { +// template +// static void run(CholmodType& mat) { +// mat.xtype = CHOLMOD_COMPLEX; +// mat.dtype = CHOLMOD_SINGLE; +// } +// }; + +} // namespace internal + +/** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object. + * Note that the data are shared. + */ +template +cholmod_sparse viewAsCholmod(Ref > mat) +{ + cholmod_sparse res; + res.nzmax = mat.nonZeros(); + res.nrow = mat.rows(); + res.ncol = mat.cols(); + res.p = mat.outerIndexPtr(); + res.i = mat.innerIndexPtr(); + res.x = mat.valuePtr(); + res.z = 0; + res.sorted = 1; + if(mat.isCompressed()) + { + res.packed = 1; + res.nz = 0; + } + else + { + res.packed = 0; + res.nz = mat.innerNonZeroPtr(); + } + + res.dtype = 0; + res.stype = -1; + + if (internal::is_same<_StorageIndex,int>::value) + { + res.itype = CHOLMOD_INT; + } + else if (internal::is_same<_StorageIndex,long>::value) + { + res.itype = CHOLMOD_LONG; + } + else + { + eigen_assert(false && "Index type not supported yet"); + } + + // setup res.xtype + internal::cholmod_configure_matrix<_Scalar>::run(res); + + res.stype = 0; + + return res; +} + +template +const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat) +{ + cholmod_sparse res = viewAsCholmod(Ref >(mat.const_cast_derived())); + return res; +} + +template +const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat) +{ + cholmod_sparse res = viewAsCholmod(Ref >(mat.const_cast_derived())); + return res; +} + +/** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. + * The data are not copied but shared. */ +template +cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) +{ + cholmod_sparse res = viewAsCholmod(Ref >(mat.matrix().const_cast_derived())); + + if(UpLo==Upper) res.stype = 1; + if(UpLo==Lower) res.stype = -1; + + return res; +} + +/** Returns a view of the Eigen \b dense matrix \a mat as Cholmod dense matrix. + * The data are not copied but shared. */ +template +cholmod_dense viewAsCholmod(MatrixBase& mat) +{ + EIGEN_STATIC_ASSERT((internal::traits::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + typedef typename Derived::Scalar Scalar; + + cholmod_dense res; + res.nrow = mat.rows(); + res.ncol = mat.cols(); + res.nzmax = res.nrow * res.ncol; + res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride(); + res.x = (void*)(mat.derived().data()); + res.z = 0; + + internal::cholmod_configure_matrix::run(res); + + return res; +} + +/** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix. + * The data are not copied but shared. */ +template +MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) +{ + return MappedSparseMatrix + (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], + static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); +} + +enum CholmodMode { + CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt +}; + + +/** \ingroup CholmodSupport_Module + * \class CholmodBase + * \brief The base class for the direct Cholesky factorization of Cholmod + * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT + */ +template +class CholmodBase : public SparseSolverBase +{ + protected: + typedef SparseSolverBase Base; + using Base::derived; + using Base::m_isInitialized; + public: + typedef _MatrixType MatrixType; + enum { UpLo = _UpLo }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef MatrixType CholMatrixType; + typedef typename MatrixType::StorageIndex StorageIndex; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + + public: + + CholmodBase() + : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) + { + EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); + m_shiftOffset[0] = m_shiftOffset[1] = 0.0; + cholmod_start(&m_cholmod); + } + + explicit CholmodBase(const MatrixType& matrix) + : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) + { + EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); + m_shiftOffset[0] = m_shiftOffset[1] = 0.0; + cholmod_start(&m_cholmod); + compute(matrix); + } + + ~CholmodBase() + { + if(m_cholmodFactor) + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + cholmod_finish(&m_cholmod); + } + + inline StorageIndex cols() const { return internal::convert_index(m_cholmodFactor->n); } + inline StorageIndex rows() const { return internal::convert_index(m_cholmodFactor->n); } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + + /** Computes the sparse Cholesky decomposition of \a matrix */ + Derived& compute(const MatrixType& matrix) + { + analyzePattern(matrix); + factorize(matrix); + return derived(); + } + + /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. + * + * This function is particularly useful when solving for several problems having the same structure. + * + * \sa factorize() + */ + void analyzePattern(const MatrixType& matrix) + { + if(m_cholmodFactor) + { + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + m_cholmodFactor = 0; + } + cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); + m_cholmodFactor = cholmod_analyze(&A, &m_cholmod); + + this->m_isInitialized = true; + this->m_info = Success; + m_analysisIsOk = true; + m_factorizationIsOk = false; + } + + /** Performs a numeric decomposition of \a matrix + * + * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed. + * + * \sa analyzePattern() + */ + void factorize(const MatrixType& matrix) + { + eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); + cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); + cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod); + + // If the factorization failed, minor is the column at which it did. On success minor == n. + this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); + m_factorizationIsOk = true; + } + + /** Returns a reference to the Cholmod's configuration structure to get a full control over the performed operations. + * See the Cholmod user guide for details. */ + cholmod_common& cholmod() { return m_cholmod; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal */ + template + void _solve_impl(const MatrixBase &b, MatrixBase &dest) const + { + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + const Index size = m_cholmodFactor->n; + EIGEN_UNUSED_VARIABLE(size); + eigen_assert(size==b.rows()); + + // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref. + Ref > b_ref(b.derived()); + + cholmod_dense b_cd = viewAsCholmod(b_ref); + cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); + if(!x_cd) + { + this->m_info = NumericalIssue; + return; + } + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); + cholmod_free_dense(&x_cd, &m_cholmod); + } + + /** \internal */ + template + void _solve_impl(const SparseMatrixBase &b, SparseMatrixBase &dest) const + { + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + const Index size = m_cholmodFactor->n; + EIGEN_UNUSED_VARIABLE(size); + eigen_assert(size==b.rows()); + + // note: cs stands for Cholmod Sparse + Ref > b_ref(b.const_cast_derived()); + cholmod_sparse b_cs = viewAsCholmod(b_ref); + cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); + if(!x_cs) + { + this->m_info = NumericalIssue; + return; + } + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + dest.derived() = viewAsEigen(*x_cs); + cholmod_free_sparse(&x_cs, &m_cholmod); + } + #endif // EIGEN_PARSED_BY_DOXYGEN + + + /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization. + * + * During the numerical factorization, an offset term is added to the diagonal coefficients:\n + * \c d_ii = \a offset + \c d_ii + * + * The default is \a offset=0. + * + * \returns a reference to \c *this. + */ + Derived& setShift(const RealScalar& offset) + { + m_shiftOffset[0] = double(offset); + return derived(); + } + + /** \returns the determinant of the underlying matrix from the current factorization */ + Scalar determinant() const + { + using std::exp; + return exp(logDeterminant()); + } + + /** \returns the log determinant of the underlying matrix from the current factorization */ + Scalar logDeterminant() const + { + using std::log; + using numext::real; + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + + RealScalar logDet = 0; + Scalar *x = static_cast(m_cholmodFactor->x); + if (m_cholmodFactor->is_super) + { + // Supernodal factorization stored as a packed list of dense column-major blocs, + // as described by the following structure: + + // super[k] == index of the first column of the j-th super node + StorageIndex *super = static_cast(m_cholmodFactor->super); + // pi[k] == offset to the description of row indices + StorageIndex *pi = static_cast(m_cholmodFactor->pi); + // px[k] == offset to the respective dense block + StorageIndex *px = static_cast(m_cholmodFactor->px); + + Index nb_super_nodes = m_cholmodFactor->nsuper; + for (Index k=0; k < nb_super_nodes; ++k) + { + StorageIndex ncols = super[k + 1] - super[k]; + StorageIndex nrows = pi[k + 1] - pi[k]; + + Map, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1)); + logDet += sk.real().log().sum(); + } + } + else + { + // Simplicial factorization stored as standard CSC matrix. + StorageIndex *p = static_cast(m_cholmodFactor->p); + Index size = m_cholmodFactor->n; + for (Index k=0; kis_ll) + logDet *= 2.0; + return logDet; + }; + + template + void dumpMemory(Stream& /*s*/) + {} + + protected: + mutable cholmod_common m_cholmod; + cholmod_factor* m_cholmodFactor; + double m_shiftOffset[2]; + mutable ComputationInfo m_info; + int m_factorizationIsOk; + int m_analysisIsOk; +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodSimplicialLLT + * \brief A simplicial direct Cholesky (LLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization + * using the Cholmod library. + * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT + */ +template +class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodSimplicialLLT() : Base() { init(); } + + CholmodSimplicialLLT(const MatrixType& matrix) : Base() + { + init(); + this->compute(matrix); + } + + ~CholmodSimplicialLLT() {} + protected: + void init() + { + m_cholmod.final_asis = 0; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + m_cholmod.final_ll = 1; + } +}; + + +/** \ingroup CholmodSupport_Module + * \class CholmodSimplicialLDLT + * \brief A simplicial direct Cholesky (LDLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization + * using the Cholmod library. + * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT + */ +template +class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodSimplicialLDLT() : Base() { init(); } + + CholmodSimplicialLDLT(const MatrixType& matrix) : Base() + { + init(); + this->compute(matrix); + } + + ~CholmodSimplicialLDLT() {} + protected: + void init() + { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + } +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodSupernodalLLT + * \brief A supernodal Cholesky (LLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization + * using the Cholmod library. + * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept + */ +template +class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodSupernodalLLT() : Base() { init(); } + + CholmodSupernodalLLT(const MatrixType& matrix) : Base() + { + init(); + this->compute(matrix); + } + + ~CholmodSupernodalLLT() {} + protected: + void init() + { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SUPERNODAL; + } +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodDecomposition + * \brief A general Cholesky factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization + * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * This variant permits to change the underlying Cholesky method at runtime. + * On the other hand, it does not provide access to the result of the factorization. + * The default is to let Cholmod automatically choose between a simplicial and supernodal factorization. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept + */ +template +class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodDecomposition() : Base() { init(); } + + CholmodDecomposition(const MatrixType& matrix) : Base() + { + init(); + this->compute(matrix); + } + + ~CholmodDecomposition() {} + + void setMode(CholmodMode mode) + { + switch(mode) + { + case CholmodAuto: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_AUTO; + break; + case CholmodSimplicialLLt: + m_cholmod.final_asis = 0; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + m_cholmod.final_ll = 1; + break; + case CholmodSupernodalLLt: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SUPERNODAL; + break; + case CholmodLDLt: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + break; + default: + break; + } + } + protected: + void init() + { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_AUTO; + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CHOLMODSUPPORT_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Array.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Array.h new file mode 100644 index 0000000..e10020d --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Array.h @@ -0,0 +1,331 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAY_H +#define EIGEN_ARRAY_H + +namespace Eigen { + +namespace internal { +template +struct traits > : traits > +{ + typedef ArrayXpr XprKind; + typedef ArrayBase > XprBase; +}; +} + +/** \class Array + * \ingroup Core_Module + * + * \brief General-purpose arrays with easy API for coefficient-wise operations + * + * The %Array class is very similar to the Matrix class. It provides + * general-purpose one- and two-dimensional arrays. The difference between the + * %Array and the %Matrix class is primarily in the API: the API for the + * %Array class provides easy access to coefficient-wise operations, while the + * API for the %Matrix class provides easy access to linear-algebra + * operations. + * + * See documentation of class Matrix for detailed information on the template parameters + * storage layout. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. + * + * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy + */ +template +class Array + : public PlainObjectBase > +{ + public: + + typedef PlainObjectBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Array) + + enum { Options = _Options }; + typedef typename Base::PlainObject PlainObject; + + protected: + template + friend struct internal::conservative_resize_like_impl; + + using Base::m_storage; + + public: + + using Base::base; + using Base::coeff; + using Base::coeffRef; + + /** + * The usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const EigenBase &other) + { + return Base::operator=(other); + } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() + */ + /* This overload is needed because the usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const Scalar &value) + { + Base::setConstant(value); + return *this; + } + + /** Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const DenseBase& other) + { + return Base::_set(other); + } + + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const Array& other) + { + return Base::_set(other); + } + + /** Default constructor. + * + * For fixed-size matrices, does nothing. + * + * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix + * is called a null matrix. This constructor is the unique way to create null matrices: resizing + * a matrix to 0 is not supported. + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array() : Base() + { + Base::_check_template_params(); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + // FIXME is it still needed ?? + /** \internal */ + EIGEN_DEVICE_FUNC + Array(internal::constructor_without_unaligned_array_assert) + : Base(internal::constructor_without_unaligned_array_assert()) + { + Base::_check_template_params(); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } +#endif + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) + : Base(std::move(other)) + { + Base::_check_template_params(); + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); + } + EIGEN_DEVICE_FUNC + Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) + { + other.swap(*this); + return *this; + } +#endif + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(const T& x) + { + Base::_check_template_params(); + Base::template _init1(x); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1) + { + Base::_check_template_params(); + this->template _init2(val0, val1); + } + #else + /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC explicit Array(const Scalar *data); + /** Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass the dimension here, so it makes more sense to use the default + * constructor Array() instead. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(Index dim); + /** constructs an initialized 1x1 Array with the given coefficient */ + Array(const Scalar& value); + /** constructs an uninitialized array with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size arrays. For fixed-size arrays, + * it is redundant to pass these parameters, so one should use the default constructor + * Array() instead. */ + Array(Index rows, Index cols); + /** constructs an initialized 2D vector with given coefficients */ + Array(const Scalar& val0, const Scalar& val1); + #endif + + /** constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3) + m_storage.data()[0] = val0; + m_storage.data()[1] = val1; + m_storage.data()[2] = val2; + } + /** constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4) + m_storage.data()[0] = val0; + m_storage.data()[1] = val1; + m_storage.data()[2] = val2; + m_storage.data()[3] = val3; + } + + /** Copy constructor */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const Array& other) + : Base(other) + { } + + private: + struct PrivateType {}; + public: + + /** \sa MatrixBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const EigenBase &other, + typename internal::enable_if::value, + PrivateType>::type = PrivateType()) + : Base(other.derived()) + { } + + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } + + #ifdef EIGEN_ARRAY_PLUGIN + #include EIGEN_ARRAY_PLUGIN + #endif + + private: + + template + friend struct internal::matrix_swap_impl; +}; + +/** \defgroup arraytypedefs Global array typedefs + * \ingroup Core_Module + * + * Eigen defines several typedef shortcuts for most common 1D and 2D array types. + * + * The general patterns are the following: + * + * \c ArrayRowsColsType where \c Rows and \c Cols can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size, + * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd + * for complex double. + * + * For example, \c Array33d is a fixed-size 3x3 array type of doubles, and \c ArrayXXf is a dynamic-size matrix of floats. + * + * There are also \c ArraySizeType which are self-explanatory. For example, \c Array4cf is + * a fixed-size 1D array of 4 complex floats. + * + * \sa class Array + */ + +#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##SizeSuffix##SizeSuffix##TypeSuffix; \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##SizeSuffix##TypeSuffix; + +#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##Size##X##TypeSuffix; \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##X##Size##TypeSuffix; + +#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4) + +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int, i) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float, f) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double, d) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex, cf) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex, cd) + +#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES +#undef EIGEN_MAKE_ARRAY_TYPEDEFS + +#undef EIGEN_MAKE_ARRAY_TYPEDEFS_LARGE + +#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ +using Eigen::Matrix##SizeSuffix##TypeSuffix; \ +using Eigen::Vector##SizeSuffix##TypeSuffix; \ +using Eigen::RowVector##SizeSuffix##TypeSuffix; + +#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \ + +#define EIGEN_USING_ARRAY_TYPEDEFS \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd) + +} // end namespace Eigen + +#endif // EIGEN_ARRAY_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ArrayBase.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ArrayBase.h new file mode 100644 index 0000000..3dbc708 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ArrayBase.h @@ -0,0 +1,226 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAYBASE_H +#define EIGEN_ARRAYBASE_H + +namespace Eigen { + +template class MatrixWrapper; + +/** \class ArrayBase + * \ingroup Core_Module + * + * \brief Base class for all 1D and 2D array, and related expressions + * + * An array is similar to a dense vector or matrix. While matrices are mathematical + * objects with well defined linear algebra operators, an array is just a collection + * of scalar values arranged in a one or two dimensionnal fashion. As the main consequence, + * all operations applied to an array are performed coefficient wise. Furthermore, + * arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient + * constructors allowing to easily write generic code working for both scalar values + * and arrays. + * + * This class is the base that is inherited by all array expression types. + * + * \tparam Derived is the derived type, e.g., an array or an expression type. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. + * + * \sa class MatrixBase, \ref TopicClassHierarchy + */ +template class ArrayBase + : public DenseBase +{ + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** The base class for a given storage type. */ + typedef ArrayBase StorageBaseType; + + typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + typedef DenseBase Base; + using Base::RowsAtCompileTime; + using Base::ColsAtCompileTime; + using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + + using Base::derived; + using Base::const_cast_derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + using Base::lazyAssign; + using Base::operator=; + using Base::operator+=; + using Base::operator-=; + using Base::operator*=; + using Base::operator/=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Base::PlainObject PlainObject; + + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" +# include "../plugins/MatrixCwiseUnaryOps.h" +# include "../plugins/ArrayCwiseUnaryOps.h" +# include "../plugins/CommonCwiseBinaryOps.h" +# include "../plugins/MatrixCwiseBinaryOps.h" +# include "../plugins/ArrayCwiseBinaryOps.h" +# ifdef EIGEN_ARRAYBASE_PLUGIN +# include EIGEN_ARRAYBASE_PLUGIN +# endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const ArrayBase& other) + { + internal::call_assignment(derived(), other.derived()); + return derived(); + } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const Scalar &value) + { Base::setConstant(value); return derived(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const Scalar& scalar); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const Scalar& scalar); + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const ArrayBase& other); + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const ArrayBase& other); + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator*=(const ArrayBase& other); + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator/=(const ArrayBase& other); + + public: + EIGEN_DEVICE_FUNC + ArrayBase& array() { return *this; } + EIGEN_DEVICE_FUNC + const ArrayBase& array() const { return *this; } + + /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array + * \sa MatrixBase::array() */ + EIGEN_DEVICE_FUNC + MatrixWrapper matrix() { return MatrixWrapper(derived()); } + EIGEN_DEVICE_FUNC + const MatrixWrapper matrix() const { return MatrixWrapper(derived()); } + +// template +// inline void evalTo(Dest& dst) const { dst = matrix(); } + + protected: + EIGEN_DEVICE_FUNC + ArrayBase() : Base() {} + + private: + explicit ArrayBase(Index); + ArrayBase(Index,Index); + template explicit ArrayBase(const ArrayBase&); + protected: + // mixing arrays and matrices is not legal + template Derived& operator+=(const MatrixBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} + // mixing arrays and matrices is not legal + template Derived& operator-=(const MatrixBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} +}; + +/** replaces \c *this by \c *this - \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +ArrayBase::operator-=(const ArrayBase &other) +{ + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); +} + +/** replaces \c *this by \c *this + \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +ArrayBase::operator+=(const ArrayBase& other) +{ + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); +} + +/** replaces \c *this by \c *this * \a other coefficient wise. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +ArrayBase::operator*=(const ArrayBase& other) +{ + call_assignment(derived(), other.derived(), internal::mul_assign_op()); + return derived(); +} + +/** replaces \c *this by \c *this / \a other coefficient wise. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +ArrayBase::operator/=(const ArrayBase& other) +{ + call_assignment(derived(), other.derived(), internal::div_assign_op()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_ARRAYBASE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ArrayWrapper.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ArrayWrapper.h new file mode 100644 index 0000000..688aadd --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ArrayWrapper.h @@ -0,0 +1,209 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAYWRAPPER_H +#define EIGEN_ARRAYWRAPPER_H + +namespace Eigen { + +/** \class ArrayWrapper + * \ingroup Core_Module + * + * \brief Expression of a mathematical vector or matrix as an array object + * + * This class is the return type of MatrixBase::array(), and most of the time + * this is the only way it is use. + * + * \sa MatrixBase::array(), class MatrixWrapper + */ + +namespace internal { +template +struct traits > + : public traits::type > +{ + typedef ArrayXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag + }; +}; +} + +template +class ArrayWrapper : public ArrayBase > +{ + public: + typedef ArrayBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) + typedef typename internal::remove_all::type NestedExpression; + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + typedef typename internal::ref_selector::non_const_type NestedExpressionType; + + using Base::coeffRef; + + EIGEN_DEVICE_FUNC + explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC + inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC + inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC + inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC + inline Index innerStride() const { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC + inline const Scalar* data() const { return m_expression.data(); } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return m_expression.coeffRef(rowId, colId); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index index) const + { + return m_expression.coeffRef(index); + } + + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& dst) const { dst = m_expression; } + + const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC + nestedExpression() const + { + return m_expression; + } + + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC + void resize(Index newSize) { m_expression.resize(newSize); } + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index,Index)*/ + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.resize(rows,cols); } + + protected: + NestedExpressionType m_expression; +}; + +/** \class MatrixWrapper + * \ingroup Core_Module + * + * \brief Expression of an array as a mathematical vector or matrix + * + * This class is the return type of ArrayBase::matrix(), and most of the time + * this is the only way it is use. + * + * \sa MatrixBase::matrix(), class ArrayWrapper + */ + +namespace internal { +template +struct traits > + : public traits::type > +{ + typedef MatrixXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag + }; +}; +} + +template +class MatrixWrapper : public MatrixBase > +{ + public: + typedef MatrixBase > Base; + EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) + typedef typename internal::remove_all::type NestedExpression; + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + typedef typename internal::ref_selector::non_const_type NestedExpressionType; + + using Base::coeffRef; + + EIGEN_DEVICE_FUNC + explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC + inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC + inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC + inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC + inline Index innerStride() const { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC + inline const Scalar* data() const { return m_expression.data(); } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return m_expression.derived().coeffRef(rowId, colId); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index index) const + { + return m_expression.coeffRef(index); + } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + nestedExpression() const + { + return m_expression; + } + + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC + void resize(Index newSize) { m_expression.resize(newSize); } + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index,Index)*/ + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.resize(rows,cols); } + + protected: + NestedExpressionType m_expression; +}; + +} // end namespace Eigen + +#endif // EIGEN_ARRAYWRAPPER_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Assign.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Assign.h new file mode 100644 index 0000000..53806ba --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Assign.h @@ -0,0 +1,90 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007 Michael Olbrich +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_H +#define EIGEN_ASSIGN_H + +namespace Eigen { + +template +template +EIGEN_STRONG_INLINE Derived& DenseBase + ::lazyAssign(const DenseBase& other) +{ + enum{ + SameType = internal::is_same::value + }; + + EIGEN_STATIC_ASSERT_LVALUE(Derived) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) + EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + + eigen_assert(rows() == other.rows() && cols() == other.cols()); + internal::call_assignment_no_alias(derived(),other.derived()); + + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const ReturnByValue& other) +{ + other.derived().evalTo(derived()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/AssignEvaluator.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/AssignEvaluator.h new file mode 100644 index 0000000..b0ec7b7 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/AssignEvaluator.h @@ -0,0 +1,935 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_EVALUATOR_H +#define EIGEN_ASSIGN_EVALUATOR_H + +namespace Eigen { + +// This implementation is based on Assign.h + +namespace internal { + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +// copy_using_evaluator_traits is based on assign_traits + +template +struct copy_using_evaluator_traits +{ + typedef typename DstEvaluator::XprType Dst; + typedef typename Dst::Scalar DstScalar; + + enum { + DstFlags = DstEvaluator::Flags, + SrcFlags = SrcEvaluator::Flags + }; + +public: + enum { + DstAlignment = DstEvaluator::Alignment, + SrcAlignment = SrcEvaluator::Alignment, + DstHasDirectAccess = DstFlags & DirectAccessBit, + JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) + }; + +private: + enum { + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + OuterStride = int(outer_stride_at_compile_time::ret), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime + }; + + // TODO distinguish between linear traversal and inner-traversals + typedef typename find_best_packet::type LinearPacketType; + typedef typename find_best_packet::type InnerPacketType; + + enum { + LinearPacketSize = unpacket_traits::size, + InnerPacketSize = unpacket_traits::size + }; + +public: + enum { + LinearRequiredAlignment = unpacket_traits::alignment, + InnerRequiredAlignment = unpacket_traits::alignment + }; + +private: + enum { + DstIsRowMajor = DstFlags&RowMajorBit, + SrcIsRowMajor = SrcFlags&RowMajorBit, + StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), + MightVectorize = bool(StorageOrdersAgree) + && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) + && bool(functor_traits::PacketAccess), + MayInnerVectorize = MightVectorize + && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 + && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 + && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), + MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), + MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess + && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) + && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize))) + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix + However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ + }; + +public: + enum { + Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal) + : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal), + Vectorized = int(Traversal) == InnerVectorizedTraversal + || int(Traversal) == LinearVectorizedTraversal + || int(Traversal) == SliceVectorizedTraversal + }; + + typedef typename conditional::type PacketType; + +private: + enum { + ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize + : Vectorized ? InnerPacketSize + : 1, + UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, + MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic + && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), + MayUnrollInner = int(InnerSize) != Dynamic + && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) + }; + +public: + enum { + Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) + ? ( + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) + ) + : int(Traversal) == int(LinearVectorizedTraversal) + ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment))) + ? int(CompleteUnrolling) + : int(NoUnrolling) ) + : int(Traversal) == int(LinearTraversal) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(NoUnrolling) ) +#if EIGEN_UNALIGNED_VECTORIZE + : int(Traversal) == int(SliceVectorizedTraversal) + ? ( bool(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) ) +#endif + : int(NoUnrolling) + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; + std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(DstAlignment) + EIGEN_DEBUG_VAR(SrcAlignment) + EIGEN_DEBUG_VAR(LinearRequiredAlignment) + EIGEN_DEBUG_VAR(InnerRequiredAlignment) + EIGEN_DEBUG_VAR(JointAlignment) + EIGEN_DEBUG_VAR(InnerSize) + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(LinearPacketSize) + EIGEN_DEBUG_VAR(InnerPacketSize) + EIGEN_DEBUG_VAR(ActualPacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) + EIGEN_DEBUG_VAR(MayInnerVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; + EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(MayUnrollCompletely) + EIGEN_DEBUG_VAR(MayUnrollInner) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; + std::cerr << std::endl; + } +#endif +}; + +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +/************************ +*** Default traversal *** +************************/ + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.assignCoeffByOuterInner(outer, inner); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.assignCoeffByOuterInner(outer, Index_); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) + { + kernel.assignCoeff(Index); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime, + SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, + DstAlignment = Kernel::AssignmentTraits::DstAlignment + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.template assignPacketByOuterInner(outer, inner); + enum { NextIndex = Index + unpacket_traits::size }; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + typedef typename Kernel::PacketType PacketType; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.template assignPacketByOuterInner(outer, Index_); + enum { NextIndex = Index_ + unpacket_traits::size }; + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +// dense_assignment_loop is based on assign_impl + +template +struct dense_assignment_loop; + +/************************ +*** Default traversal *** +************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) + { + for(Index outer = 0; outer < kernel.outerSize(); ++outer) { + for(Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +/*************************** +*** Linear vectorization *** +***************************/ + + +// The goal of unaligned_dense_assignment_loop is simply to factorize the handling +// of the non vectorizable beginning and ending parts + +template +struct unaligned_dense_assignment_loop +{ + // if IsAligned = true, then do nothing + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} +}; + +template <> +struct unaligned_dense_assignment_loop +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. + // FIXME check which version exhibits this issue +#if EIGEN_COMP_MSVC + template + static EIGEN_DONT_INLINE void run(Kernel &kernel, + Index start, + Index end) +#else + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, + Index start, + Index end) +#endif + { + for (Index index = start; index < end; ++index) + kernel.assignCoeff(index); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment, + packetSize = unpacket_traits::size, + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = packet_traits::AlignedOnScalar ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment), + srcAlignment = Kernel::AssignmentTraits::JointAlignment + }; + const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(kernel.dstDataPtr(), size); + const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + unaligned_dense_assignment_loop::run(kernel, 0, alignedStart); + + for(Index index = alignedStart; index < alignedEnd; index += packetSize) + kernel.template assignPacket(index); + + unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { size = DstXprType::SizeAtCompileTime, + packetSize =unpacket_traits::size, + alignedSize = (size/packetSize)*packetSize }; + + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct dense_assignment_loop +{ + typedef typename Kernel::PacketType PacketType; + enum { + SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, + DstAlignment = Kernel::AssignmentTraits::DstAlignment + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index packetSize = unpacket_traits::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) + kernel.template assignPacketByOuterInner(outer, inner); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::AssignmentTraits Traits; + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + for(Index i = 0; i < size; ++i) + kernel.assignCoeff(i); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Slice vectorization *** +***************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + packetSize = unpacket_traits::size, + requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), + alignable = packet_traits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = alignable ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment) + }; + const Scalar *dst_ptr = kernel.dstDataPtr(); + if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0) + { + // the pointer is not aligend-on scalar, so alignment is not possible + return dense_assignment_loop::run(kernel); + } + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner(outer, inner); + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { size = DstXprType::InnerSizeAtCompileTime, + packetSize =unpacket_traits::size, + vectorizableSize = (size/packetSize)*packetSize }; + + for(Index outer = 0; outer < kernel.outerSize(); ++outer) + { + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } + } +}; +#endif + + +/*************************************************************************** +* Part 4 : Generic dense assignment kernel +***************************************************************************/ + +// This class generalize the assignment of a coefficient (or packet) from one dense evaluator +// to another dense writable evaluator. +// It is parametrized by the two evaluators, and the actual assignment functor. +// This abstraction level permits to keep the evaluation loops as simple and as generic as possible. +// One can customize the assignment using this generic dense_assignment_kernel with different +// functors, or by completely overloading it, by-passing a functor. +template +class generic_dense_assignment_kernel +{ +protected: + typedef typename DstEvaluatorTypeT::XprType DstXprType; + typedef typename SrcEvaluatorTypeT::XprType SrcXprType; +public: + + typedef DstEvaluatorTypeT DstEvaluatorType; + typedef SrcEvaluatorTypeT SrcEvaluatorType; + typedef typename DstEvaluatorType::Scalar Scalar; + typedef copy_using_evaluator_traits AssignmentTraits; + typedef typename AssignmentTraits::PacketType PacketType; + + + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) + { + #ifdef EIGEN_DEBUG_ASSIGN + AssignmentTraits::debug(); + #endif + } + + EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } + + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } + + /// Assign src(row,col) to dst(row,col) through the assignment functor. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) + { + m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) + { + m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignCoeff(row, col); + } + + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) + { + m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) + { + m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacket(row, col); + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::RowsAtCompileTime) == 1 ? 0 + : int(Traits::ColsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? outer + : inner; + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::ColsAtCompileTime) == 1 ? 0 + : int(Traits::RowsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? inner + : outer; + } + + EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const + { + return m_dstExpr.data(); + } + +protected: + DstEvaluatorType& m_dst; + const SrcEvaluatorType& m_src; + const Functor &m_functor; + // TODO find a way to avoid the needs of the original expression + DstXprType& m_dstExpr; +}; + +/*************************************************************************** +* Part 5 : Entry point for dense rectangular assignment +***************************************************************************/ + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/) +{ + EIGEN_ONLY_USED_FOR_DEBUG(dst); + EIGEN_ONLY_USED_FOR_DEBUG(src); + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op &/*func*/) +{ + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols))) + dst.resize(dstRows, dstCols); + eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func) +{ + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + + SrcEvaluatorType srcEvaluator(src); + + // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, + // we need to resize the destination after the source evaluator has been created. + resize_if_allowed(dst, src, func); + + DstEvaluatorType dstEvaluator(dst); + + typedef generic_dense_assignment_kernel Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop::run(kernel); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) +{ + call_dense_assignment_loop(dst, src, internal::assign_op()); +} + +/*************************************************************************** +* Part 6 : Generic assignment +***************************************************************************/ + +// Based on the respective shapes of the destination and source, +// the class AssignmentKind determine the kind of assignment mechanism. +// AssignmentKind must define a Kind typedef. +template struct AssignmentKind; + +// Assignement kind defined in this file: +struct Dense2Dense {}; +struct EigenBase2EigenBase {}; + +template struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; +template<> struct AssignmentKind { typedef Dense2Dense Kind; }; + +// This is the main assignment class +template< typename DstXprType, typename SrcXprType, typename Functor, + typename Kind = typename AssignmentKind< typename evaluator_traits::Shape , typename evaluator_traits::Shape >::Kind, + typename EnableIf = void> +struct Assignment; + + +// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. +// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. +// So this intermediate function removes everything related to "assume-aliasing" such that Assignment +// does not has to bother about these annoying details. + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(const Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} + +// Deal with "assume-aliasing" +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing::value, void*>::type = 0) +{ + typename plain_matrix_type::type tmp(src); + call_assignment_no_alias(dst, tmp, func); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if::value, void*>::type = 0) +{ + call_assignment_no_alias(dst, src, func); +} + +// by-pass "assume-aliasing" +// When there is no aliasing, we require that 'dst' has been properly resized +template class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(NoAlias& dst, const Src& src, const Func& func) +{ + call_assignment_no_alias(dst.expression(), src, func); +} + + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) +{ + enum { + NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) + || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) + ) && int(Dst::SizeAtCompileTime) != 1 + }; + + typedef typename internal::conditional, Dst>::type ActualDstTypeCleaned; + typedef typename internal::conditional, Dst&>::type ActualDstType; + ActualDstType actualDst(dst); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); + + Assignment::run(actualDst, src, func); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias(Dst& dst, const Src& src) +{ + call_assignment_no_alias(dst, src, internal::assign_op()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) +{ + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar); + + Assignment::run(dst, src, func); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) +{ + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); +} + +// forward declaration +template void check_for_aliasing(const Dst &dst, const Src &src); + +// Generic Dense to Dense assignment +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> +struct Assignment +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { +#ifndef EIGEN_NO_DEBUG + internal::check_for_aliasing(dst, src); +#endif + + call_dense_assignment_loop(dst, src, func); + } +}; + +// Generic assignment through evalTo. +// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> +struct Assignment +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.evalTo(dst); + } + + // NOTE The following two functions are templated to avoid their instanciation if not needed + // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type. + template + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.addTo(dst); + } + + template + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.subTo(dst); + } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_EVALUATOR_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Assign_MKL.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Assign_MKL.h new file mode 100755 index 0000000..6c2ab92 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Assign_MKL.h @@ -0,0 +1,176 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + Copyright (C) 2015 Gael Guennebaud + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() + ******************************************************************************** +*/ + +#ifndef EIGEN_ASSIGN_VML_H +#define EIGEN_ASSIGN_VML_H + +namespace Eigen { + +namespace internal { + +template +class vml_assign_traits +{ + private: + enum { + DstHasDirectAccess = Dst::Flags & DirectAccessBit, + SrcHasDirectAccess = Src::Flags & DirectAccessBit, + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + + MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, + MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), + VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, + LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD + }; + public: + enum { + EnableVml = MightEnableVml && LargeEnough, + Traversal = MightLinearize ? LinearTraversal : DefaultTraversal + }; +}; + +#define EIGEN_PP_EXPAND(ARG) ARG +#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) +#define EIGEN_VMLMODE_EXPAND_LA , VML_HA +#else +#define EIGEN_VMLMODE_EXPAND_LA , VML_LA +#endif + +#define EIGEN_VMLMODE_EXPAND__ + +#define EIGEN_VMLMODE_PREFIX_LA vm +#define EIGEN_VMLMODE_PREFIX__ v +#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested> \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml>::type> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + if(vml_assign_traits::Traversal==LinearTraversal) { \ + VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ + &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ + }; \ + + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) + + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA) +// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested, typename Plain> \ + struct Assignment, SrcXprNested, \ + const CwiseNullaryOp,Plain> >, assign_op, \ + Dense2Dense, typename enable_if::EnableVml>::type> { \ + typedef CwiseBinaryOp, SrcXprNested, \ + const CwiseNullaryOp,Plain> > SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + VMLTYPE exponent = reinterpret_cast(src.rhs().functor().m_other); \ + if(vml_assign_traits::Traversal==LinearTraversal) \ + { \ + VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \ + &(src.lhs().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ + }; + +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_VML_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/BandMatrix.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/BandMatrix.h new file mode 100644 index 0000000..4978c91 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/BandMatrix.h @@ -0,0 +1,353 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BANDMATRIX_H +#define EIGEN_BANDMATRIX_H + +namespace Eigen { + +namespace internal { + +template +class BandMatrixBase : public EigenBase +{ + public: + + enum { + Flags = internal::traits::Flags, + CoeffReadCost = internal::traits::CoeffReadCost, + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + Supers = internal::traits::Supers, + Subs = internal::traits::Subs, + Options = internal::traits::Options + }; + typedef typename internal::traits::Scalar Scalar; + typedef Matrix DenseMatrixType; + typedef typename DenseMatrixType::StorageIndex StorageIndex; + typedef typename internal::traits::CoefficientsType CoefficientsType; + typedef EigenBase Base; + + protected: + enum { + DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) + ? 1 + Supers + Subs + : Dynamic, + SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime) + }; + + public: + + using Base::derived; + using Base::rows; + using Base::cols; + + /** \returns the number of super diagonals */ + inline Index supers() const { return derived().supers(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return derived().subs(); } + + /** \returns an expression of the underlying coefficient matrix */ + inline const CoefficientsType& coeffs() const { return derived().coeffs(); } + + /** \returns an expression of the underlying coefficient matrix */ + inline CoefficientsType& coeffs() { return derived().coeffs(); } + + /** \returns a vector expression of the \a i -th column, + * only the meaningful part is returned. + * \warning the internal storage must be column major. */ + inline Block col(Index i) + { + EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + Index start = 0; + Index len = coeffs().rows(); + if (i<=supers()) + { + start = supers()-i; + len = (std::min)(rows(),std::max(0,coeffs().rows() - (supers()-i))); + } + else if (i>=rows()-subs()) + len = std::max(0,coeffs().rows() - (i + 1 - rows() + subs())); + return Block(coeffs(), start, i, len, 1); + } + + /** \returns a vector expression of the main diagonal */ + inline Block diagonal() + { return Block(coeffs(),supers(),0,1,(std::min)(rows(),cols())); } + + /** \returns a vector expression of the main diagonal (const version) */ + inline const Block diagonal() const + { return Block(coeffs(),supers(),0,1,(std::min)(rows(),cols())); } + + template struct DiagonalIntReturnType { + enum { + ReturnOpposite = (Options&SelfAdjoint) && (((Index)>0 && Supers==0) || ((Index)<0 && Subs==0)), + Conjugate = ReturnOpposite && NumTraits::IsComplex, + ActualIndex = ReturnOpposite ? -Index : Index, + DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic) + ? Dynamic + : (ActualIndex<0 + ? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex) + : EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex)) + }; + typedef Block BuildType; + typedef typename internal::conditional,BuildType >, + BuildType>::type Type; + }; + + /** \returns a vector expression of the \a N -th sub or super diagonal */ + template inline typename DiagonalIntReturnType::Type diagonal() + { + return typename DiagonalIntReturnType::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N)); + } + + /** \returns a vector expression of the \a N -th sub or super diagonal */ + template inline const typename DiagonalIntReturnType::Type diagonal() const + { + return typename DiagonalIntReturnType::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N)); + } + + /** \returns a vector expression of the \a i -th sub or super diagonal */ + inline Block diagonal(Index i) + { + eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers())); + return Block(coeffs(), supers()-i, std::max(0,i), 1, diagonalLength(i)); + } + + /** \returns a vector expression of the \a i -th sub or super diagonal */ + inline const Block diagonal(Index i) const + { + eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers())); + return Block(coeffs(), supers()-i, std::max(0,i), 1, diagonalLength(i)); + } + + template inline void evalTo(Dest& dst) const + { + dst.resize(rows(),cols()); + dst.setZero(); + dst.diagonal() = diagonal(); + for (Index i=1; i<=supers();++i) + dst.diagonal(i) = diagonal(i); + for (Index i=1; i<=subs();++i) + dst.diagonal(-i) = diagonal(-i); + } + + DenseMatrixType toDenseMatrix() const + { + DenseMatrixType res(rows(),cols()); + evalTo(res); + return res; + } + + protected: + + inline Index diagonalLength(Index i) const + { return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); } +}; + +/** + * \class BandMatrix + * \ingroup Core_Module + * + * \brief Represents a rectangular matrix with a banded storage + * + * \tparam _Scalar Numeric type, i.e. float, double, int + * \tparam _Rows Number of rows, or \b Dynamic + * \tparam _Cols Number of columns, or \b Dynamic + * \tparam _Supers Number of super diagonal + * \tparam _Subs Number of sub diagonal + * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint + * The former controls \ref TopicStorageOrders "storage order", and defaults to + * column-major. The latter controls whether the matrix represents a selfadjoint + * matrix in which case either Supers of Subs have to be null. + * + * \sa class TridiagonalMatrix + */ + +template +struct traits > +{ + typedef _Scalar Scalar; + typedef Dense StorageKind; + typedef Eigen::Index StorageIndex; + enum { + CoeffReadCost = NumTraits::ReadCost, + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _Rows, + MaxColsAtCompileTime = _Cols, + Flags = LvalueBit, + Supers = _Supers, + Subs = _Subs, + Options = _Options, + DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic + }; + typedef Matrix CoefficientsType; +}; + +template +class BandMatrix : public BandMatrixBase > +{ + public: + + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageIndex StorageIndex; + typedef typename internal::traits::CoefficientsType CoefficientsType; + + explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) + : m_coeffs(1+supers+subs,cols), + m_rows(rows), m_supers(supers), m_subs(subs) + { + } + + /** \returns the number of columns */ + inline Index rows() const { return m_rows.value(); } + + /** \returns the number of rows */ + inline Index cols() const { return m_coeffs.cols(); } + + /** \returns the number of super diagonals */ + inline Index supers() const { return m_supers.value(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return m_subs.value(); } + + inline const CoefficientsType& coeffs() const { return m_coeffs; } + inline CoefficientsType& coeffs() { return m_coeffs; } + + protected: + + CoefficientsType m_coeffs; + internal::variable_if_dynamic m_rows; + internal::variable_if_dynamic m_supers; + internal::variable_if_dynamic m_subs; +}; + +template +class BandMatrixWrapper; + +template +struct traits > +{ + typedef typename _CoefficientsType::Scalar Scalar; + typedef typename _CoefficientsType::StorageKind StorageKind; + typedef typename _CoefficientsType::StorageIndex StorageIndex; + enum { + CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost, + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _Rows, + MaxColsAtCompileTime = _Cols, + Flags = LvalueBit, + Supers = _Supers, + Subs = _Subs, + Options = _Options, + DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic + }; + typedef _CoefficientsType CoefficientsType; +}; + +template +class BandMatrixWrapper : public BandMatrixBase > +{ + public: + + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::CoefficientsType CoefficientsType; + typedef typename internal::traits::StorageIndex StorageIndex; + + explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) + : m_coeffs(coeffs), + m_rows(rows), m_supers(supers), m_subs(subs) + { + EIGEN_UNUSED_VARIABLE(cols); + //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows()); + } + + /** \returns the number of columns */ + inline Index rows() const { return m_rows.value(); } + + /** \returns the number of rows */ + inline Index cols() const { return m_coeffs.cols(); } + + /** \returns the number of super diagonals */ + inline Index supers() const { return m_supers.value(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return m_subs.value(); } + + inline const CoefficientsType& coeffs() const { return m_coeffs; } + + protected: + + const CoefficientsType& m_coeffs; + internal::variable_if_dynamic m_rows; + internal::variable_if_dynamic m_supers; + internal::variable_if_dynamic m_subs; +}; + +/** + * \class TridiagonalMatrix + * \ingroup Core_Module + * + * \brief Represents a tridiagonal matrix with a compact banded storage + * + * \tparam Scalar Numeric type, i.e. float, double, int + * \tparam Size Number of rows and cols, or \b Dynamic + * \tparam Options Can be 0 or \b SelfAdjoint + * + * \sa class BandMatrix + */ +template +class TridiagonalMatrix : public BandMatrix +{ + typedef BandMatrix Base; + typedef typename Base::StorageIndex StorageIndex; + public: + explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} + + inline typename Base::template DiagonalIntReturnType<1>::Type super() + { return Base::template diagonal<1>(); } + inline const typename Base::template DiagonalIntReturnType<1>::Type super() const + { return Base::template diagonal<1>(); } + inline typename Base::template DiagonalIntReturnType<-1>::Type sub() + { return Base::template diagonal<-1>(); } + inline const typename Base::template DiagonalIntReturnType<-1>::Type sub() const + { return Base::template diagonal<-1>(); } + protected: +}; + + +struct BandShape {}; + +template +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BANDMATRIX_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Block.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Block.h new file mode 100644 index 0000000..11de45c --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Block.h @@ -0,0 +1,452 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BLOCK_H +#define EIGEN_BLOCK_H + +namespace Eigen { + +namespace internal { +template +struct traits > : traits +{ + typedef typename traits::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + enum{ + MatrixRows = traits::RowsAtCompileTime, + MatrixCols = traits::ColsAtCompileTime, + RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows, + ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols, + MaxRowsAtCompileTime = BlockRows==0 ? 0 + : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) + : int(traits::MaxRowsAtCompileTime), + MaxColsAtCompileTime = BlockCols==0 ? 0 + : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) + : int(traits::MaxColsAtCompileTime), + + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : XprTypeIsRowMajor, + HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, + Flags = (traits::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit, + // FIXME DirectAccessBit should not be handled by expressions + // + // Alignment is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + Alignment = 0 + }; +}; + +template::ret> class BlockImpl_dense; + +} // end namespace internal + +template class BlockImpl; + +/** \class Block + * \ingroup Core_Module + * + * \brief Expression of a fixed-size or dynamic-size block + * + * \tparam XprType the type of the expression in which we are taking a block + * \tparam BlockRows the number of rows of the block we are taking at compile time (optional) + * \tparam BlockCols the number of columns of the block we are taking at compile time (optional) + * \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or + * to set of columns of a column major matrix (optional). The parameter allows to determine + * at compile time whether aligned access is possible on the block expression. + * + * This class represents an expression of either a fixed-size or dynamic-size block. It is the return + * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block(Index,Index) and + * most of the time this is the only way it is used. + * + * However, if you want to directly maniputate block expressions, + * for instance if you want to write a function returning such an expression, you + * will need to use this class. + * + * Here is an example illustrating the dynamic case: + * \include class_Block.cpp + * Output: \verbinclude class_Block.out + * + * \note Even though this expression has dynamic size, in the case where \a XprType + * has fixed size, this expression inherits a fixed maximal size which means that evaluating + * it does not cause a dynamic memory allocation. + * + * Here is an example illustrating the fixed-size case: + * \include class_FixedBlock.cpp + * Output: \verbinclude class_FixedBlock.out + * + * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock + */ +template class Block + : public BlockImpl::StorageKind> +{ + typedef BlockImpl::StorageKind> Impl; + public: + //typedef typename Impl::Base Base; + typedef Impl Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Block) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) + + typedef typename internal::remove_all::type NestedExpression; + + /** Column or Row constructor + */ + EIGEN_DEVICE_FUNC + inline Block(XprType& xpr, Index i) : Impl(xpr,i) + { + eigen_assert( (i>=0) && ( + ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows() + && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols()); + } + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC + inline Block(XprType& xpr, + Index startRow, Index startCol, + Index blockRows, Index blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) + { + eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows) + && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols)); + eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows + && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols); + } +}; + +// The generic default implementation for dense block simplu forward to the internal::BlockImpl_dense +// that must be specialized for direct and non-direct access... +template +class BlockImpl + : public internal::BlockImpl_dense +{ + typedef internal::BlockImpl_dense Impl; + typedef typename XprType::StorageIndex StorageIndex; + public: + typedef Impl Base; + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {} + EIGEN_DEVICE_FUNC + inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) {} +}; + +namespace internal { + +/** \internal Internal implementation of dense Blocks in the general case. */ +template class BlockImpl_dense + : public internal::dense_xpr_base >::type +{ + typedef Block BlockType; + typedef typename internal::ref_selector::non_const_type XprTypeNested; + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) + + // class InnerIterator; // FIXME apparently never used + + /** Column or Row constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, Index i) + : m_xpr(xpr), + // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime, + // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1, + // all other cases are invalid. + // The case a 1x1 matrix seems ambiguous, but the result is the same anyway. + m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0), + m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0), + m_blockRows(BlockRows==1 ? 1 : xpr.rows()), + m_blockCols(BlockCols==1 ? 1 : xpr.cols()) + {} + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) + : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), + m_blockRows(BlockRows), m_blockCols(BlockCols) + {} + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, + Index startRow, Index startCol, + Index blockRows, Index blockCols) + : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), + m_blockRows(blockRows), m_blockCols(blockCols) + {} + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); } + + EIGEN_DEVICE_FUNC + inline Scalar& coeffRef(Index rowId, Index colId) + { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const + { + return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_DEVICE_FUNC + inline Scalar& coeffRef(Index index) + { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index index) const + { + return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + EIGEN_DEVICE_FUNC + inline const CoeffReturnType coeff(Index index) const + { + return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + template + inline PacketScalar packet(Index rowId, Index colId) const + { + return m_xpr.template packet(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + template + inline void writePacket(Index rowId, Index colId, const PacketScalar& val) + { + m_xpr.template writePacket(rowId + m_startRow.value(), colId + m_startCol.value(), val); + } + + template + inline PacketScalar packet(Index index) const + { + return m_xpr.template packet + (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + template + inline void writePacket(Index index, const PacketScalar& val) + { + m_xpr.template writePacket + (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val); + } + + #ifdef EIGEN_PARSED_BY_DOXYGEN + /** \sa MapBase::data() */ + EIGEN_DEVICE_FUNC inline const Scalar* data() const; + EIGEN_DEVICE_FUNC inline Index innerStride() const; + EIGEN_DEVICE_FUNC inline Index outerStride() const; + #endif + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const + { + return m_xpr; + } + + EIGEN_DEVICE_FUNC + XprType& nestedExpression() { return m_xpr; } + + EIGEN_DEVICE_FUNC + StorageIndex startRow() const + { + return m_startRow.value(); + } + + EIGEN_DEVICE_FUNC + StorageIndex startCol() const + { + return m_startCol.value(); + } + + protected: + + XprTypeNested m_xpr; + const internal::variable_if_dynamic m_startRow; + const internal::variable_if_dynamic m_startCol; + const internal::variable_if_dynamic m_blockRows; + const internal::variable_if_dynamic m_blockCols; +}; + +/** \internal Internal implementation of dense Blocks in the direct access case.*/ +template +class BlockImpl_dense + : public MapBase > +{ + typedef Block BlockType; + typedef typename internal::ref_selector::non_const_type XprTypeNested; + enum { + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0 + }; + public: + + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) + + /** Column or Row constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, Index i) + : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) + || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()), + BlockRows==1 ? 1 : xpr.rows(), + BlockCols==1 ? 1 : xpr.cols()), + m_xpr(xpr), + m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0), + m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0) + { + init(); + } + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), + m_xpr(xpr), m_startRow(startRow), m_startCol(startCol) + { + init(); + } + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, + Index startRow, Index startCol, + Index blockRows, Index blockCols) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols), + m_xpr(xpr), m_startRow(startRow), m_startCol(startCol) + { + init(); + } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const + { + return m_xpr; + } + + EIGEN_DEVICE_FUNC + XprType& nestedExpression() { return m_xpr; } + + /** \sa MapBase::innerStride() */ + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return internal::traits::HasSameStorageOrderAsXprType + ? m_xpr.innerStride() + : m_xpr.outerStride(); + } + + /** \sa MapBase::outerStride() */ + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return m_outerStride; + } + + EIGEN_DEVICE_FUNC + StorageIndex startRow() const + { + return m_startRow.value(); + } + + EIGEN_DEVICE_FUNC + StorageIndex startCol() const + { + return m_startCol.value(); + } + + #ifndef __SUNPRO_CC + // FIXME sunstudio is not friendly with the above friend... + // META-FIXME there is no 'friend' keyword around here. Is this obsolete? + protected: + #endif + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal used by allowAligned() */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols) + : Base(data, blockRows, blockCols), m_xpr(xpr) + { + init(); + } + #endif + + protected: + EIGEN_DEVICE_FUNC + void init() + { + m_outerStride = internal::traits::HasSameStorageOrderAsXprType + ? m_xpr.outerStride() + : m_xpr.innerStride(); + } + + XprTypeNested m_xpr; + const internal::variable_if_dynamic m_startRow; + const internal::variable_if_dynamic m_startCol; + Index m_outerStride; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BLOCK_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/BooleanRedux.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/BooleanRedux.h new file mode 100644 index 0000000..8409d87 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/BooleanRedux.h @@ -0,0 +1,164 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ALLANDANY_H +#define EIGEN_ALLANDANY_H + +namespace Eigen { + +namespace internal { + +template +struct all_unroller +{ + typedef typename Derived::ExpressionTraits Traits; + enum { + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime + }; + + static inline bool run(const Derived &mat) + { + return all_unroller::run(mat) && mat.coeff(row, col); + } +}; + +template +struct all_unroller +{ + static inline bool run(const Derived &/*mat*/) { return true; } +}; + +template +struct all_unroller +{ + static inline bool run(const Derived &) { return false; } +}; + +template +struct any_unroller +{ + typedef typename Derived::ExpressionTraits Traits; + enum { + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime + }; + + static inline bool run(const Derived &mat) + { + return any_unroller::run(mat) || mat.coeff(row, col); + } +}; + +template +struct any_unroller +{ + static inline bool run(const Derived & /*mat*/) { return false; } +}; + +template +struct any_unroller +{ + static inline bool run(const Derived &) { return false; } +}; + +} // end namespace internal + +/** \returns true if all coefficients are true + * + * Example: \include MatrixBase_all.cpp + * Output: \verbinclude MatrixBase_all.out + * + * \sa any(), Cwise::operator<() + */ +template +inline bool DenseBase::all() const +{ + typedef internal::evaluator Evaluator; + enum { + unroll = SizeAtCompileTime != Dynamic + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + }; + Evaluator evaluator(derived()); + if(unroll) + return internal::all_unroller::run(evaluator); + else + { + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if (!evaluator.coeff(i, j)) return false; + return true; + } +} + +/** \returns true if at least one coefficient is true + * + * \sa all() + */ +template +inline bool DenseBase::any() const +{ + typedef internal::evaluator Evaluator; + enum { + unroll = SizeAtCompileTime != Dynamic + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + }; + Evaluator evaluator(derived()); + if(unroll) + return internal::any_unroller::run(evaluator); + else + { + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if (evaluator.coeff(i, j)) return true; + return false; + } +} + +/** \returns the number of coefficients which evaluate to true + * + * \sa all(), any() + */ +template +inline Eigen::Index DenseBase::count() const +{ + return derived().template cast().template cast().sum(); +} + +/** \returns true is \c *this contains at least one Not A Number (NaN). + * + * \sa allFinite() + */ +template +inline bool DenseBase::hasNaN() const +{ +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isNaN().any(); +#else + return !((derived().array()==derived().array()).all()); +#endif +} + +/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. + * + * \sa hasNaN() + */ +template +inline bool DenseBase::allFinite() const +{ +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isFinite().all(); +#else + return !((derived()-derived()).hasNaN()); +#endif +} + +} // end namespace Eigen + +#endif // EIGEN_ALLANDANY_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CommaInitializer.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CommaInitializer.h new file mode 100644 index 0000000..d218e98 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CommaInitializer.h @@ -0,0 +1,160 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMMAINITIALIZER_H +#define EIGEN_COMMAINITIALIZER_H + +namespace Eigen { + +/** \class CommaInitializer + * \ingroup Core_Module + * + * \brief Helper class used by the comma initializer operator + * + * This class is internally used to implement the comma initializer feature. It is + * the return type of MatrixBase::operator<<, and most of the time this is the only + * way it is used. + * + * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() + */ +template +struct CommaInitializer +{ + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC + inline CommaInitializer(XprType& xpr, const Scalar& s) + : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) + { + m_xpr.coeffRef(0,0) = s; + } + + template + EIGEN_DEVICE_FUNC + inline CommaInitializer(XprType& xpr, const DenseBase& other) + : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows()) + { + m_xpr.block(0, 0, other.rows(), other.cols()) = other; + } + + /* Copy/Move constructor which transfers ownership. This is crucial in + * absence of return value optimization to avoid assertions during destruction. */ + // FIXME in C++11 mode this could be replaced by a proper RValue constructor + EIGEN_DEVICE_FUNC + inline CommaInitializer(const CommaInitializer& o) + : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) { + // Mark original object as finished. In absence of R-value references we need to const_cast: + const_cast(o).m_row = m_xpr.rows(); + const_cast(o).m_col = m_xpr.cols(); + const_cast(o).m_currentBlockRows = 0; + } + + /* inserts a scalar value in the target matrix */ + EIGEN_DEVICE_FUNC + CommaInitializer& operator,(const Scalar& s) + { + if (m_col==m_xpr.cols()) + { + m_row+=m_currentBlockRows; + m_col = 0; + m_currentBlockRows = 1; + eigen_assert(m_row + EIGEN_DEVICE_FUNC + CommaInitializer& operator,(const DenseBase& other) + { + if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows)) + { + m_row+=m_currentBlockRows; + m_col = 0; + m_currentBlockRows = other.rows(); + eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() + && "Too many rows passed to comma initializer (operator<<)"); + } + eigen_assert((m_col + other.cols() <= m_xpr.cols()) + && "Too many coefficients passed to comma initializer (operator<<)"); + eigen_assert(m_currentBlockRows==other.rows()); + m_xpr.template block + (m_row, m_col, other.rows(), other.cols()) = other; + m_col += other.cols(); + return *this; + } + + EIGEN_DEVICE_FUNC + inline ~CommaInitializer() +#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS + EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) +#endif + { + finished(); + } + + /** \returns the built matrix once all its coefficients have been set. + * Calling finished is 100% optional. Its purpose is to write expressions + * like this: + * \code + * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished()); + * \endcode + */ + EIGEN_DEVICE_FUNC + inline XprType& finished() { + eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0) + && m_col == m_xpr.cols() + && "Too few coefficients passed to comma initializer (operator<<)"); + return m_xpr; + } + + XprType& m_xpr; // target expression + Index m_row; // current row id + Index m_col; // current col id + Index m_currentBlockRows; // current block height +}; + +/** \anchor MatrixBaseCommaInitRef + * Convenient operator to set the coefficients of a matrix. + * + * The coefficients must be provided in a row major order and exactly match + * the size of the matrix. Otherwise an assertion is raised. + * + * Example: \include MatrixBase_set.cpp + * Output: \verbinclude MatrixBase_set.out + * + * \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order. + * + * \sa CommaInitializer::finished(), class CommaInitializer + */ +template +inline CommaInitializer DenseBase::operator<< (const Scalar& s) +{ + return CommaInitializer(*static_cast(this), s); +} + +/** \sa operator<<(const Scalar&) */ +template +template +inline CommaInitializer +DenseBase::operator<<(const DenseBase& other) +{ + return CommaInitializer(*static_cast(this), other); +} + +} // end namespace Eigen + +#endif // EIGEN_COMMAINITIALIZER_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ConditionEstimator.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ConditionEstimator.h new file mode 100644 index 0000000..aa7efdc --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ConditionEstimator.h @@ -0,0 +1,175 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com) +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CONDITIONESTIMATOR_H +#define EIGEN_CONDITIONESTIMATOR_H + +namespace Eigen { + +namespace internal { + +template +struct rcond_compute_sign { + static inline Vector run(const Vector& v) { + const RealVector v_abs = v.cwiseAbs(); + return (v_abs.array() == static_cast(0)) + .select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs)); + } +}; + +// Partial specialization to avoid elementwise division for real vectors. +template +struct rcond_compute_sign { + static inline Vector run(const Vector& v) { + return (v.array() < static_cast(0)) + .select(-Vector::Ones(v.size()), Vector::Ones(v.size())); + } +}; + +/** + * \returns an estimate of ||inv(matrix)||_1 given a decomposition of + * \a matrix that implements .solve() and .adjoint().solve() methods. + * + * This function implements Algorithms 4.1 and 5.1 from + * http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf + * which also forms the basis for the condition number estimators in + * LAPACK. Since at most 10 calls to the solve method of dec are + * performed, the total cost is O(dims^2), as opposed to O(dims^3) + * needed to compute the inverse matrix explicitly. + * + * The most common usage is in estimating the condition number + * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be + * computed directly in O(n^2) operations. + * + * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and + * LLT. + * + * \sa FullPivLU, PartialPivLU, LDLT, LLT. + */ +template +typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec) +{ + typedef typename Decomposition::MatrixType MatrixType; + typedef typename Decomposition::Scalar Scalar; + typedef typename Decomposition::RealScalar RealScalar; + typedef typename internal::plain_col_type::type Vector; + typedef typename internal::plain_col_type::type RealVector; + const bool is_complex = (NumTraits::IsComplex != 0); + + eigen_assert(dec.rows() == dec.cols()); + const Index n = dec.rows(); + if (n == 0) + return 0; + + // Disable Index to float conversion warning +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif + Vector v = dec.solve(Vector::Ones(n) / Scalar(n)); +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif + + // lower_bound is a lower bound on + // ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1 + // and is the objective maximized by the ("super-") gradient ascent + // algorithm below. + RealScalar lower_bound = v.template lpNorm<1>(); + if (n == 1) + return lower_bound; + + // Gradient ascent algorithm follows: We know that the optimum is achieved at + // one of the simplices v = e_i, so in each iteration we follow a + // super-gradient to move towards the optimal one. + RealScalar old_lower_bound = lower_bound; + Vector sign_vector(n); + Vector old_sign_vector; + Index v_max_abs_index = -1; + Index old_v_max_abs_index = v_max_abs_index; + for (int k = 0; k < 4; ++k) + { + sign_vector = internal::rcond_compute_sign::run(v); + if (k > 0 && !is_complex && sign_vector == old_sign_vector) { + // Break if the solution stagnated. + break; + } + // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )| + v = dec.adjoint().solve(sign_vector); + v.real().cwiseAbs().maxCoeff(&v_max_abs_index); + if (v_max_abs_index == old_v_max_abs_index) { + // Break if the solution stagnated. + break; + } + // Move to the new simplex e_j, where j = v_max_abs_index. + v = dec.solve(Vector::Unit(n, v_max_abs_index)); // v = inv(matrix) * e_j. + lower_bound = v.template lpNorm<1>(); + if (lower_bound <= old_lower_bound) { + // Break if the gradient step did not increase the lower_bound. + break; + } + if (!is_complex) { + old_sign_vector = sign_vector; + } + old_v_max_abs_index = v_max_abs_index; + old_lower_bound = lower_bound; + } + // The following calculates an independent estimate of ||matrix||_1 by + // multiplying matrix by a vector with entries of slowly increasing + // magnitude and alternating sign: + // v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1. + // This improvement to Hager's algorithm above is due to Higham. It was + // added to make the algorithm more robust in certain corner cases where + // large elements in the matrix might otherwise escape detection due to + // exact cancellation (especially when op and op_adjoint correspond to a + // sequence of backsubstitutions and permutations), which could cause + // Hager's algorithm to vastly underestimate ||matrix||_1. + Scalar alternating_sign(RealScalar(1)); + for (Index i = 0; i < n; ++i) { + // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates + v[i] = alternating_sign * static_cast(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1)))); + alternating_sign = -alternating_sign; + } + v = dec.solve(v); + const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n)); + return numext::maxi(lower_bound, alternate_lower_bound); +} + +/** \brief Reciprocal condition number estimator. + * + * Computing a decomposition of a dense matrix takes O(n^3) operations, while + * this method estimates the condition number quickly and reliably in O(n^2) + * operations. + * + * \returns an estimate of the reciprocal condition number + * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and + * its decomposition. Supports the following decompositions: FullPivLU, + * PartialPivLU, LDLT, and LLT. + * + * \sa FullPivLU, PartialPivLU, LDLT, LLT. + */ +template +typename Decomposition::RealScalar +rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec) +{ + typedef typename Decomposition::RealScalar RealScalar; + eigen_assert(dec.rows() == dec.cols()); + if (dec.rows() == 0) return RealScalar(1); + if (matrix_norm == RealScalar(0)) return RealScalar(0); + if (dec.rows() == 1) return RealScalar(1); + const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec); + return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0) + : (RealScalar(1) / inverse_matrix_norm) / matrix_norm); +} + +} // namespace internal + +} // namespace Eigen + +#endif diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CoreEvaluators.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CoreEvaluators.h new file mode 100644 index 0000000..f7c1eff --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CoreEvaluators.h @@ -0,0 +1,1671 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_COREEVALUATORS_H +#define EIGEN_COREEVALUATORS_H + +namespace Eigen { + +namespace internal { + +// This class returns the evaluator kind from the expression storage kind. +// Default assumes index based accessors +template +struct storage_kind_to_evaluator_kind { + typedef IndexBased Kind; +}; + +// This class returns the evaluator shape from the expression storage kind. +// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc. +template struct storage_kind_to_shape; + +template<> struct storage_kind_to_shape { typedef DenseShape Shape; }; +template<> struct storage_kind_to_shape { typedef SolverShape Shape; }; +template<> struct storage_kind_to_shape { typedef PermutationShape Shape; }; +template<> struct storage_kind_to_shape { typedef TranspositionsShape Shape; }; + +// Evaluators have to be specialized with respect to various criteria such as: +// - storage/structure/shape +// - scalar type +// - etc. +// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators. +// We currently distinguish the following kind of evaluators: +// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) +// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - ternary_evaluator for expression taking three arguments (CwiseTernaryOp) +// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. +// - mapbase_evaluator for Map, Block, Ref +// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) + +template< typename T, + typename Arg1Kind = typename evaluator_traits::Kind, + typename Arg2Kind = typename evaluator_traits::Kind, + typename Arg3Kind = typename evaluator_traits::Kind, + typename Arg1Scalar = typename traits::Scalar, + typename Arg2Scalar = typename traits::Scalar, + typename Arg3Scalar = typename traits::Scalar> struct ternary_evaluator; + +template< typename T, + typename LhsKind = typename evaluator_traits::Kind, + typename RhsKind = typename evaluator_traits::Kind, + typename LhsScalar = typename traits::Scalar, + typename RhsScalar = typename traits::Scalar> struct binary_evaluator; + +template< typename T, + typename Kind = typename evaluator_traits::Kind, + typename Scalar = typename T::Scalar> struct unary_evaluator; + +// evaluator_traits contains traits for evaluator + +template +struct evaluator_traits_base +{ + // by default, get evaluator kind and shape from storage + typedef typename storage_kind_to_evaluator_kind::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape::StorageKind>::Shape Shape; +}; + +// Default evaluator traits +template +struct evaluator_traits : public evaluator_traits_base +{ +}; + +template::Shape > +struct evaluator_assume_aliasing { + static const bool value = false; +}; + +// By default, we assume a unary expression: +template +struct evaluator : public unary_evaluator +{ + typedef unary_evaluator Base; + EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} +}; + + +// TODO: Think about const-correctness +template +struct evaluator + : evaluator +{ + EIGEN_DEVICE_FUNC + explicit evaluator(const T& xpr) : evaluator(xpr) {} +}; + +// ---------- base class for all evaluators ---------- + +template +struct evaluator_base : public noncopyable +{ + // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. + typedef traits ExpressionTraits; + + enum { + Alignment = 0 + }; +}; + +// -------------------- Matrix and Array -------------------- +// +// evaluator is a common base class for the +// Matrix and Array evaluators. +// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, +// so no need for more sophisticated dispatching. + +template +struct evaluator > + : evaluator_base +{ + typedef PlainObjectBase PlainObjectType; + typedef typename PlainObjectType::Scalar Scalar; + typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = PlainObjectType::IsRowMajor, + IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, + RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, + ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, + + CoeffReadCost = NumTraits::ReadCost, + Flags = traits::EvaluatorFlags, + Alignment = traits::Alignment + }; + + EIGEN_DEVICE_FUNC evaluator() + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + if (IsRowMajor) + return m_data[row * m_outerStride.value() + col]; + else + return m_data[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_data[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + if (IsRowMajor) + return const_cast(m_data)[row * m_outerStride.value() + col]; + else + return const_cast(m_data)[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return const_cast(m_data)[index]; + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + if (IsRowMajor) + return ploadt(m_data + row * m_outerStride.value() + col); + else + return ploadt(m_data + row + col * m_outerStride.value()); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return ploadt(m_data + index); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + if (IsRowMajor) + return pstoret + (const_cast(m_data) + row * m_outerStride.value() + col, x); + else + return pstoret + (const_cast(m_data) + row + col * m_outerStride.value(), x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + return pstoret(const_cast(m_data) + index, x); + } + +protected: + const Scalar *m_data; + + // We do not need to know the outer stride for vectors + variable_if_dynamic m_outerStride; +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Matrix XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Array XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +// -------------------- Transpose -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Transpose XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags ^ RowMajorBit, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(col, row); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(col, row); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename XprType::Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(col, row); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(col, row, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +// -------------------- CwiseNullaryOp -------------------- +// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. +// Likewise, there is not need to more sophisticated dispatching here. + +template::value, + bool has_unary = has_unary_operator::value, + bool has_binary = has_binary_operator::value> +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp(); } +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp(i,j); } +}; + +// We need the following specialization for vector-only functors assigned to a runtime vector, +// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd. +// In this case, i==0 and j is used for the actual iteration. +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i==0 || j==0); + return op(i+j); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i==0 || j==0); + return op.template packetOp(i+j); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } +}; + +template +struct nullary_wrapper {}; + +#if 0 && EIGEN_COMP_MSVC>0 +// Disable this ugly workaround. This is now handled in traits::match, +// but this piece of code might still become handly if some other weird compilation +// erros pop up again. + +// MSVC exhibits a weird compilation error when +// compiling: +// Eigen::MatrixXf A = MatrixXf::Random(3,3); +// Ref R = 2.f*A; +// and that has_*ary_operator> have not been instantiated yet. +// The "problem" is that evaluator<2.f*A> is instantiated by traits::match<2.f*A> +// and at that time has_*ary_operator returns true regardless of T. +// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>. +// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(), +// and packet() are really instantiated as implemented below: + +// This is a simple wrapper around Index to enforce the re-instantiation of +// has_*ary_operator when needed. +template struct nullary_wrapper_workaround_msvc { + nullary_wrapper_workaround_msvc(const T&); + operator T()const; +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i); + } +}; +#endif // MSVC workaround + +template +struct evaluator > + : evaluator_base > +{ + typedef CwiseNullaryOp XprType; + typedef typename internal::remove_all::type PlainObjectTypeCleaned; + + enum { + CoeffReadCost = internal::functor_traits::Cost, + + Flags = (evaluator::Flags + & ( HereditaryBits + | (functor_has_linear_access::ret ? LinearAccessBit : 0) + | (functor_traits::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), + Alignment = AlignedMax + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) + : m_functor(n.functor()), m_wrapper() + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(IndexType row, IndexType col) const + { + return m_wrapper(m_functor, row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(IndexType index) const + { + return m_wrapper(m_functor,index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(IndexType row, IndexType col) const + { + return m_wrapper.template packetOp(m_functor, row, col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(IndexType index) const + { + return m_wrapper.template packetOp(m_functor, index); + } + +protected: + const NullaryOp m_functor; + const internal::nullary_wrapper m_wrapper; +}; + +// -------------------- CwiseUnaryOp -------------------- + +template +struct unary_evaluator, IndexBased > + : evaluator_base > +{ + typedef CwiseUnaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = evaluator::Flags + & (HereditaryBits | LinearAccessBit | (functor_traits::PacketAccess ? PacketAccessBit : 0)), + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& op) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_argImpl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_argImpl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet(index)); + } + +protected: + const UnaryOp m_functor; + evaluator m_argImpl; +}; + +// -------------------- CwiseTernaryOp -------------------- + +// this is a ternary expression +template +struct evaluator > + : public ternary_evaluator > +{ + typedef CwiseTernaryOp XprType; + typedef ternary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct ternary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseTernaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + Arg1Flags = evaluator::Flags, + Arg2Flags = evaluator::Flags, + Arg3Flags = evaluator::Flags, + SameType = is_same::value && is_same::value, + StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit), + Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & ( + HereditaryBits + | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN( + EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment), + evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_arg1Impl.template packet(row, col), + m_arg2Impl.template packet(row, col), + m_arg3Impl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + +protected: + const TernaryOp m_functor; + evaluator m_arg1Impl; + evaluator m_arg2Impl; + evaluator m_arg3Impl; +}; + +// -------------------- CwiseBinaryOp -------------------- + +// this is a binary expression +template +struct evaluator > + : public binary_evaluator > +{ + typedef CwiseBinaryOp XprType; + typedef binary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct binary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseBinaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + LhsFlags = evaluator::Flags, + RhsFlags = evaluator::Flags, + SameType = is_same::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment,evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_lhsImpl.template packet(row, col), + m_rhsImpl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_lhsImpl.template packet(index), + m_rhsImpl.template packet(index)); + } + +protected: + const BinaryOp m_functor; + evaluator m_lhsImpl; + evaluator m_rhsImpl; +}; + +// -------------------- CwiseUnaryView -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef CwiseUnaryView XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), + + Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_unaryOp(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_unaryOp(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_unaryOp(m_argImpl.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_unaryOp(m_argImpl.coeffRef(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_unaryOp(m_argImpl.coeffRef(index)); + } + +protected: + const UnaryOp m_unaryOp; + evaluator m_argImpl; +}; + +// -------------------- Map -------------------- + +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template +struct mapbase_evaluator; + +template +struct mapbase_evaluator : evaluator_base +{ + typedef Derived XprType; + typedef typename XprType::PointerType PointerType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, + CoeffReadCost = NumTraits::ReadCost + }; + + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) + : m_data(const_cast(map.data())), + m_innerStride(map.innerStride()), + m_outerStride(map.outerStride()) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_data[index * m_innerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_data[index * m_innerStride.value()]; + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + return internal::ploadt(ptr); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return internal::ploadt(m_data + index * m_innerStride.value()); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + return internal::pstoret(ptr, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + internal::pstoret(m_data + index * m_innerStride.value(), x); + } +protected: + EIGEN_DEVICE_FUNC + inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); } + EIGEN_DEVICE_FUNC + inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); } + + PointerType m_data; + const internal::variable_if_dynamic m_innerStride; + const internal::variable_if_dynamic m_outerStride; +}; + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Map XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int( evaluator::Flags) & (LinearAccessMask&PacketAccessMask), + + Alignment = int(MapOptions)&int(AlignedMask) + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) + : mapbase_evaluator(map) + { } +}; + +// -------------------- Ref -------------------- + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Ref XprType; + + enum { + Flags = evaluator >::Flags, + Alignment = evaluator >::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) + : mapbase_evaluator(ref) + { } +}; + +// -------------------- Block -------------------- + +template::ret> struct block_evaluator; + +template +struct evaluator > + : block_evaluator +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + + ArgTypeIsRowMajor = (int(evaluator::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ArgTypeIsRowMajor, + HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, + + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = evaluator::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, + + PacketAlignment = unpacket_traits::alignment, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) + }; + typedef block_evaluator block_evaluator_type; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } +}; + +// no direct-access => dispatch to a unary evaluator +template +struct block_evaluator + : unary_evaluator > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : unary_evaluator(block) + {} +}; + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) + : m_argImpl(block.nestedExpression()), + m_startRow(block.startRow()), + m_startCol(block.startCol()) + { } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + RowsAtCompileTime = XprType::RowsAtCompileTime + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(m_startRow.value() + row, m_startCol.value() + col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return packet(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + return writePacket(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); + } + +protected: + evaluator m_argImpl; + const variable_if_dynamic m_startRow; + const variable_if_dynamic m_startCol; +}; + +// TODO: This evaluator does not actually use the child evaluator; +// all action is via the data() as returned by the Block expression. + +template +struct block_evaluator + : mapbase_evaluator, + typename Block::PlainObject> +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : mapbase_evaluator(block) + { + // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime + eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); + } +}; + + +// -------------------- Select -------------------- +// NOTE shall we introduce a ternary_evaluator? + +// TODO enable vectorization for Select +template +struct evaluator > + : evaluator_base > +{ + typedef Select XprType; + enum { + CoeffReadCost = evaluator::CoeffReadCost + + EIGEN_PLAIN_ENUM_MAX(evaluator::CoeffReadCost, + evaluator::CoeffReadCost), + + Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, + + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) + : m_conditionImpl(select.conditionMatrix()), + m_thenImpl(select.thenMatrix()), + m_elseImpl(select.elseMatrix()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + if (m_conditionImpl.coeff(row, col)) + return m_thenImpl.coeff(row, col); + else + return m_elseImpl.coeff(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + if (m_conditionImpl.coeff(index)) + return m_thenImpl.coeff(index); + else + return m_elseImpl.coeff(index); + } + +protected: + evaluator m_conditionImpl; + evaluator m_thenImpl; + evaluator m_elseImpl; +}; + + +// -------------------- Replicate -------------------- + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Replicate XprType; + typedef typename XprType::CoeffReturnType CoeffReturnType; + enum { + Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor + }; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0, + Flags = (evaluator::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits::Flags & RowMajorBit), + + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) + : m_arg(replicate.nestedExpression()), + m_argImpl(m_arg), + m_rows(replicate.nestedExpression().rows()), + m_cols(replicate.nestedExpression().cols()) + {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.coeff(actual_row, actual_col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.coeff(actual_index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.template packet(actual_row, actual_col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.template packet(actual_index); + } + +protected: + const ArgTypeNested m_arg; + evaluator m_argImpl; + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- PartialReduxExpr -------------------- + +template< typename ArgType, typename MemberOp, int Direction> +struct evaluator > + : evaluator_base > +{ + typedef PartialReduxExpr XprType; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + typedef typename ArgType::Scalar InputScalar; + typedef typename XprType::Scalar Scalar; + enum { + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) + }; + typedef typename MemberOp::template Cost CostOpType; + enum { + CoeffReadCost = TraversalSize==Dynamic ? HugeCost + : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), + + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit, + + Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) + : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index i, Index j) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(j)); + else + return m_functor(m_arg.row(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index index) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(index)); + else + return m_functor(m_arg.row(index)); + } + +protected: + typename internal::add_const_on_value_type::type m_arg; + const MemberOp m_functor; +}; + + +// -------------------- MatrixWrapper and ArrayWrapper -------------------- +// +// evaluator_wrapper_base is a common base class for the +// MatrixWrapper and ArrayWrapper evaluators. + +template +struct evaluator_wrapper_base + : evaluator_base +{ + typedef typename remove_all::type ArgType; + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + + typedef typename ArgType::Scalar Scalar; + typedef typename ArgType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(row, col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(row, col, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef MatrixWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef ArrayWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + + +// -------------------- Reverse -------------------- + +// defined in Reverse.h: +template struct reverse_packet_cond; + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Reverse XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::IsRowMajor, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + ReversePacket = (Direction == BothDirections) + || ((Direction == Vertical) && IsColMajor) + || ((Direction == Horizontal) && IsRowMajor), + + CoeffReadCost = evaluator::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1)) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), + + Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f. + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) + : m_argImpl(reverse.nestedExpression()), + m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1), + m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + return reverse_packet::run(m_argImpl.template packet( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + enum { PacketSize = unpacket_traits::size }; + return preverse(m_argImpl.template packet(m_rows.value() * m_cols.value() - index - PacketSize)); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + // FIXME we could factorize some code with packet(i,j) + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + m_argImpl.template writePacket( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col, + reverse_packet::run(x)); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + enum { PacketSize = unpacket_traits::size }; + m_argImpl.template writePacket + (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x)); + } + +protected: + evaluator m_argImpl; + + // If we do not reverse rows, then we do not need to know the number of rows; same for columns + // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors. + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- Diagonal -------------------- + +template +struct evaluator > + : evaluator_base > +{ + typedef Diagonal XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = (unsigned int)(evaluator::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit, + + Alignment = 0 + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) + : m_argImpl(diagonal.nestedExpression()), + m_index(diagonal.index()) + { } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index) const + { + return m_argImpl.coeff(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index + rowOffset(), index + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index) + { + return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); + } + +protected: + evaluator m_argImpl; + const internal::variable_if_dynamicindex m_index; + +private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } +}; + + +//---------------------------------------------------------------------- +// deprecated code +//---------------------------------------------------------------------- + +// -------------------- EvalToTemp -------------------- + +// expression class for evaluating nested expression to a temporary + +template class EvalToTemp; + +template +struct traits > + : public traits +{ }; + +template +class EvalToTemp + : public dense_xpr_base >::type +{ + public: + + typedef typename dense_xpr_base::type Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) + + explicit EvalToTemp(const ArgType& arg) + : m_arg(arg) + { } + + const ArgType& arg() const + { + return m_arg; + } + + Index rows() const + { + return m_arg.rows(); + } + + Index cols() const + { + return m_arg.cols(); + } + + private: + const ArgType& m_arg; +}; + +template +struct evaluator > + : public evaluator +{ + typedef EvalToTemp XprType; + typedef typename ArgType::PlainObject PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.arg()) + { + ::new (static_cast(this)) Base(m_result); + } + + // This constructor is used when nesting an EvalTo evaluator in another evaluator + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) + : m_result(arg) + { + ::new (static_cast(this)) Base(m_result); + } + +protected: + PlainObject m_result; +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COREEVALUATORS_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CoreIterators.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CoreIterators.h new file mode 100644 index 0000000..4eb42b9 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CoreIterators.h @@ -0,0 +1,127 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COREITERATORS_H +#define EIGEN_COREITERATORS_H + +namespace Eigen { + +/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core + */ + +namespace internal { + +template +class inner_iterator_selector; + +} + +/** \class InnerIterator + * \brief An InnerIterator allows to loop over the element of any matrix expression. + * + * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed. + * + * TODO: add a usage example + */ +template +class InnerIterator +{ +protected: + typedef internal::inner_iterator_selector::Kind> IteratorType; + typedef internal::evaluator EvaluatorType; + typedef typename internal::traits::Scalar Scalar; +public: + /** Construct an iterator over the \a outerId -th row or column of \a xpr */ + InnerIterator(const XprType &xpr, const Index &outerId) + : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize()) + {} + + /// \returns the value of the current coefficient. + EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); } + /** Increment the iterator \c *this to the next non-zero coefficient. + * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView + */ + EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } + /// \returns the column or row index of the current coefficient. + EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } + /// \returns the row index of the current coefficient. + EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); } + /// \returns the column index of the current coefficient. + EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); } + /// \returns \c true if the iterator \c *this still references a valid coefficient. + EIGEN_STRONG_INLINE operator bool() const { return m_iter; } + +protected: + EvaluatorType m_eval; + IteratorType m_iter; +private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix A; + // SparseMatrix::InnerIterator it(A,0); + template InnerIterator(const EigenBase&,Index outer); +}; + +namespace internal { + +// Generic inner iterator implementation for dense objects +template +class inner_iterator_selector +{ +protected: + typedef evaluator EvaluatorType; + typedef typename traits::Scalar Scalar; + enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit }; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize) + : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize) + {} + + EIGEN_STRONG_INLINE Scalar value() const + { + return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner) + : m_eval.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } + +protected: + const EvaluatorType& m_eval; + Index m_inner; + const Index m_outer; + const Index m_end; +}; + +// For iterator-based evaluator, inner-iterator is already implemented as +// evaluator<>::InnerIterator +template +class inner_iterator_selector + : public evaluator::InnerIterator +{ +protected: + typedef typename evaluator::InnerIterator Base; + typedef evaluator EvaluatorType; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/) + : Base(eval, outerId) + {} +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COREITERATORS_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseBinaryOp.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseBinaryOp.h new file mode 100644 index 0000000..a36765e --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseBinaryOp.h @@ -0,0 +1,184 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_BINARY_OP_H +#define EIGEN_CWISE_BINARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > +{ + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef typename remove_all::type Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of< + BinaryOp( + const typename Lhs::Scalar&, + const typename Rhs::Scalar& + ) + >::type Scalar; + typedef typename cwise_promote_storage_type::StorageKind, + typename traits::StorageKind, + BinaryOp>::ret StorageKind; + typedef typename promote_index_type::StorageIndex, + typename traits::StorageIndex>::type StorageIndex; + typedef typename Lhs::Nested LhsNested; + typedef typename Rhs::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + enum { + Flags = cwise_promote_storage_order::StorageKind,typename traits::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value + }; +}; +} // end namespace internal + +template +class CwiseBinaryOpImpl; + +/** \class CwiseBinaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions + * + * \tparam BinaryOp template functor implementing the operator + * \tparam LhsType the type of the left-hand side + * \tparam RhsType the type of the right-hand side + * + * This class represents an expression where a coefficient-wise binary operator is applied to two expressions. + * It is the return type of binary operators, by which we mean only those binary operators where + * both the left-hand side and the right-hand side are Eigen expressions. + * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseBinaryOp types explicitly. + * + * \sa MatrixBase::binaryExpr(const MatrixBase &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseBinaryOp : + public CwiseBinaryOpImpl< + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>, + internal::no_assignment_operator +{ + public: + + typedef typename internal::remove_all::type Functor; + typedef typename internal::remove_all::type Lhs; + typedef typename internal::remove_all::type Rhs; + + typedef typename CwiseBinaryOpImpl< + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) + + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; + typedef typename internal::remove_reference::type _LhsNested; + typedef typename internal::remove_reference::type _RhsNested; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp()) + : m_lhs(aLhs), m_rhs(aRhs), m_functor(func) + { + EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar); + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs) + eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time optimizations + if (internal::traits::type>::RowsAtCompileTime==Dynamic) + return m_rhs.rows(); + else + return m_lhs.rows(); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time optimizations + if (internal::traits::type>::ColsAtCompileTime==Dynamic) + return m_rhs.cols(); + else + return m_lhs.cols(); + } + + /** \returns the left hand side nested expression */ + EIGEN_DEVICE_FUNC + const _LhsNested& lhs() const { return m_lhs; } + /** \returns the right hand side nested expression */ + EIGEN_DEVICE_FUNC + const _RhsNested& rhs() const { return m_rhs; } + /** \returns the functor representing the binary operation */ + EIGEN_DEVICE_FUNC + const BinaryOp& functor() const { return m_functor; } + + protected: + LhsNested m_lhs; + RhsNested m_rhs; + const BinaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseBinaryOpImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + +/** replaces \c *this by \c *this - \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +MatrixBase::operator-=(const MatrixBase &other) +{ + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); +} + +/** replaces \c *this by \c *this + \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +MatrixBase::operator+=(const MatrixBase& other) +{ + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_CWISE_BINARY_OP_H + diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseNullaryOp.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseNullaryOp.h new file mode 100644 index 0000000..ddd607e --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseNullaryOp.h @@ -0,0 +1,866 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_NULLARY_OP_H +#define EIGEN_CWISE_NULLARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > : traits +{ + enum { + Flags = traits::Flags & RowMajorBit + }; +}; + +} // namespace internal + +/** \class CwiseNullaryOp + * \ingroup Core_Module + * + * \brief Generic expression of a matrix where all coefficients are defined by a functor + * + * \tparam NullaryOp template functor implementing the operator + * \tparam PlainObjectType the underlying plain matrix/array type + * + * This class represents an expression of a generic nullary operator. + * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods, + * and most of the time this is the only way it is used. + * + * However, if you want to write a function returning such an expression, you + * will need to use this class. + * + * The functor NullaryOp must expose one of the following method: + + + + +
\c operator()() if the procedural generation does not depend on the coefficient entries (e.g., random numbers)
\c operator()(Index i)if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace)
\c operator()(Index i,Index j)if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)
+ * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors. + * + * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding + * C++11 random number generators. + * + * A nullary expression can also be used to implement custom sophisticated matrix manipulations + * that cannot be covered by the existing set of natively supported matrix manipulations. + * See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations + * on the behavior of CwiseNullaryOp. + * + * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr + */ +template +class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp >::type, internal::no_assignment_operator +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp) + + EIGEN_DEVICE_FUNC + CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp()) + : m_rows(rows), m_cols(cols), m_functor(func) + { + eigen_assert(rows >= 0 + && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 + && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); } + + /** \returns the functor representing the nullary operation */ + EIGEN_DEVICE_FUNC + const NullaryOp& functor() const { return m_functor; } + + protected: + const internal::variable_if_dynamic m_rows; + const internal::variable_if_dynamic m_cols; + const NullaryOp m_functor; +}; + + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) +{ + return CwiseNullaryOp(rows, cols, func); +} + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * Here is an example with C++11 random generators: \include random_cpp11.cpp + * Output: \verbinclude random_cpp11.out + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); + else return CwiseNullaryOp(size, 1, func); +} + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * This variant is only for fixed-size DenseBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +DenseBase::NullaryExpr(const CustomNullaryOp& func) +{ + return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); +} + +/** \returns an expression of a constant matrix of value \a value + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this DenseBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(Index rows, Index cols, const Scalar& value) +{ + return DenseBase::NullaryExpr(rows, cols, internal::scalar_constant_op(value)); +} + +/** \returns an expression of a constant matrix of value \a value + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this DenseBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(Index size, const Scalar& value) +{ + return DenseBase::NullaryExpr(size, internal::scalar_constant_op(value)); +} + +/** \returns an expression of a constant matrix of value \a value + * + * This variant is only for fixed-size DenseBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(const Scalar& value) +{ + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op(value)); +} + +/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&) + * + * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); +} + +/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&) + * + * \sa LinSpaced(Scalar,Scalar) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); +} + +/** + * \brief Sets a linearly spaced vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced.cpp + * Output: \verbinclude DenseBase_LinSpaced.out + * + * For integer scalar types, an even spacing is possible if and only if the length of the range, + * i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the + * number of values \c high-low+1 (meaning each value can be repeated the same number of time). + * If one of these two considions is not satisfied, then \c high is lowered to the largest value + * satisfying one of this constraint. + * Here are some examples: + * + * Example: \include DenseBase_LinSpacedInt.cpp + * Output: \verbinclude DenseBase_LinSpacedInt.out + * + * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); +} + +/** + * \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&) + * Special version for fixed size types which does not require the size parameter. + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); +} + +/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ +template +EIGEN_DEVICE_FUNC bool DenseBase::isApproxToConstant +(const Scalar& val, const RealScalar& prec) const +{ + typename internal::nested_eval::type self(derived()); + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if(!internal::isApprox(self.coeff(i, j), val, prec)) + return false; + return true; +} + +/** This is just an alias for isApproxToConstant(). + * + * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ +template +EIGEN_DEVICE_FUNC bool DenseBase::isConstant +(const Scalar& val, const RealScalar& prec) const +{ + return isApproxToConstant(val, prec); +} + +/** Alias for setConstant(): sets all coefficients in this expression to \a val. + * + * \sa setConstant(), Constant(), class CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar& val) +{ + setConstant(val); +} + +/** Sets all coefficients in this expression to value \a val. + * + * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) +{ + return derived() = Constant(rows(), cols(), val); +} + +/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val. + * + * \only_for_vectors + * + * Example: \include Matrix_setConstant_int.cpp + * Output: \verbinclude Matrix_setConstant_int.out + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setConstant(Index size, const Scalar& val) +{ + resize(size); + return setConstant(val); +} + +/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val. + * + * \param rows the new number of rows + * \param cols the new number of columns + * \param val the value to which all coefficients are set + * + * Example: \include Matrix_setConstant_int_int.cpp + * Output: \verbinclude Matrix_setConstant_int_int.out + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setConstant(Index rows, Index cols, const Scalar& val) +{ + resize(rows, cols); + return setConstant(val); +} + +/** + * \brief Sets a linearly spaced vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * Example: \include DenseBase_setLinSpaced.cpp + * Output: \verbinclude DenseBase_setLinSpaced.out + * + * For integer scalar types, do not miss the explanations on the definition + * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink. + * + * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); +} + +/** + * \brief Sets a linearly spaced vector. + * + * The function fills \c *this with equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * For integer scalar types, do not miss the explanations on the definition + * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink. + * + * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return setLinSpaced(size(), low, high); +} + +// zero: + +/** \returns an expression of a zero matrix. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used + * instead. + * + * Example: \include MatrixBase_zero_int_int.cpp + * Output: \verbinclude MatrixBase_zero_int_int.out + * + * \sa Zero(), Zero(Index) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero(Index rows, Index cols) +{ + return Constant(rows, cols, Scalar(0)); +} + +/** \returns an expression of a zero vector. + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * Example: \include MatrixBase_zero_int.cpp + * Output: \verbinclude MatrixBase_zero_int.out + * + * \sa Zero(), Zero(Index,Index) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero(Index size) +{ + return Constant(size, Scalar(0)); +} + +/** \returns an expression of a fixed-size zero matrix or vector. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_zero.cpp + * Output: \verbinclude MatrixBase_zero.out + * + * \sa Zero(Index), Zero(Index,Index) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero() +{ + return Constant(Scalar(0)); +} + +/** \returns true if *this is approximately equal to the zero matrix, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isZero.cpp + * Output: \verbinclude MatrixBase_isZero.out + * + * \sa class CwiseNullaryOp, Zero() + */ +template +EIGEN_DEVICE_FUNC bool DenseBase::isZero(const RealScalar& prec) const +{ + typename internal::nested_eval::type self(derived()); + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) + return false; + return true; +} + +/** Sets all coefficients in this expression to zero. + * + * Example: \include MatrixBase_setZero.cpp + * Output: \verbinclude MatrixBase_setZero.out + * + * \sa class CwiseNullaryOp, Zero() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setZero() +{ + return setConstant(Scalar(0)); +} + +/** Resizes to the given \a size, and sets all coefficients in this expression to zero. + * + * \only_for_vectors + * + * Example: \include Matrix_setZero_int.cpp + * Output: \verbinclude Matrix_setZero_int.out + * + * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setZero(Index newSize) +{ + resize(newSize); + return setConstant(Scalar(0)); +} + +/** Resizes to the given size, and sets all coefficients in this expression to zero. + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setZero_int_int.cpp + * Output: \verbinclude Matrix_setZero_int_int.out + * + * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setZero(Index rows, Index cols) +{ + resize(rows, cols); + return setConstant(Scalar(0)); +} + +// ones: + +/** \returns an expression of a matrix where all coefficients equal one. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Ones() should be used + * instead. + * + * Example: \include MatrixBase_ones_int_int.cpp + * Output: \verbinclude MatrixBase_ones_int_int.out + * + * \sa Ones(), Ones(Index), isOnes(), class Ones + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones(Index rows, Index cols) +{ + return Constant(rows, cols, Scalar(1)); +} + +/** \returns an expression of a vector where all coefficients equal one. + * + * The parameter \a newSize is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Ones() should be used + * instead. + * + * Example: \include MatrixBase_ones_int.cpp + * Output: \verbinclude MatrixBase_ones_int.out + * + * \sa Ones(), Ones(Index,Index), isOnes(), class Ones + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones(Index newSize) +{ + return Constant(newSize, Scalar(1)); +} + +/** \returns an expression of a fixed-size matrix or vector where all coefficients equal one. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_ones.cpp + * Output: \verbinclude MatrixBase_ones.out + * + * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones() +{ + return Constant(Scalar(1)); +} + +/** \returns true if *this is approximately equal to the matrix where all coefficients + * are equal to 1, within the precision given by \a prec. + * + * Example: \include MatrixBase_isOnes.cpp + * Output: \verbinclude MatrixBase_isOnes.out + * + * \sa class CwiseNullaryOp, Ones() + */ +template +EIGEN_DEVICE_FUNC bool DenseBase::isOnes +(const RealScalar& prec) const +{ + return isApproxToConstant(Scalar(1), prec); +} + +/** Sets all coefficients in this expression to one. + * + * Example: \include MatrixBase_setOnes.cpp + * Output: \verbinclude MatrixBase_setOnes.out + * + * \sa class CwiseNullaryOp, Ones() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() +{ + return setConstant(Scalar(1)); +} + +/** Resizes to the given \a newSize, and sets all coefficients in this expression to one. + * + * \only_for_vectors + * + * Example: \include Matrix_setOnes_int.cpp + * Output: \verbinclude Matrix_setOnes_int.out + * + * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setOnes(Index newSize) +{ + resize(newSize); + return setConstant(Scalar(1)); +} + +/** Resizes to the given size, and sets all coefficients in this expression to one. + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setOnes_int_int.cpp + * Output: \verbinclude Matrix_setOnes_int_int.out + * + * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setOnes(Index rows, Index cols) +{ + resize(rows, cols); + return setConstant(Scalar(1)); +} + +// Identity: + +/** \returns an expression of the identity matrix (not necessarily square). + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Identity() should be used + * instead. + * + * Example: \include MatrixBase_identity_int_int.cpp + * Output: \verbinclude MatrixBase_identity_int_int.out + * + * \sa Identity(), setIdentity(), isIdentity() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +MatrixBase::Identity(Index rows, Index cols) +{ + return DenseBase::NullaryExpr(rows, cols, internal::scalar_identity_op()); +} + +/** \returns an expression of the identity matrix (not necessarily square). + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variant taking size arguments. + * + * Example: \include MatrixBase_identity.cpp + * Output: \verbinclude MatrixBase_identity.out + * + * \sa Identity(Index,Index), setIdentity(), isIdentity() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +MatrixBase::Identity() +{ + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return MatrixBase::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op()); +} + +/** \returns true if *this is approximately equal to the identity matrix + * (not necessarily square), + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isIdentity.cpp + * Output: \verbinclude MatrixBase_isIdentity.out + * + * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), setIdentity() + */ +template +bool MatrixBase::isIdentity +(const RealScalar& prec) const +{ + typename internal::nested_eval::type self(derived()); + for(Index j = 0; j < cols(); ++j) + { + for(Index i = 0; i < rows(); ++i) + { + if(i == j) + { + if(!internal::isApprox(self.coeff(i, j), static_cast(1), prec)) + return false; + } + else + { + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) + return false; + } + } + } + return true; +} + +namespace internal { + +template=16)> +struct setIdentity_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Derived& run(Derived& m) + { + return m = Derived::Identity(m.rows(), m.cols()); + } +}; + +template +struct setIdentity_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Derived& run(Derived& m) + { + m.setZero(); + const Index size = numext::mini(m.rows(), m.cols()); + for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1); + return m; + } +}; + +} // end namespace internal + +/** Writes the identity expression (not necessarily square) into *this. + * + * Example: \include MatrixBase_setIdentity.cpp + * Output: \verbinclude MatrixBase_setIdentity.out + * + * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() +{ + return internal::setIdentity_impl::run(derived()); +} + +/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this. + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setIdentity_int_int.cpp + * Output: \verbinclude Matrix_setIdentity_int_int.out + * + * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index rows, Index cols) +{ + derived().resize(rows, cols); + return setIdentity(); +} + +/** \returns an expression of the i-th unit (basis) vector. + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index newSize, Index i) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i); +} + +/** \returns an expression of the i-th unit (basis) vector. + * + * \only_for_vectors + * + * This variant is for fixed-size vector only. + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index i) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return BasisReturnType(SquareMatrixType::Identity(),i); +} + +/** \returns an expression of the X axis unit vector (1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitX() +{ return Derived::Unit(0); } + +/** \returns an expression of the Y axis unit vector (0,1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitY() +{ return Derived::Unit(1); } + +/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitZ() +{ return Derived::Unit(2); } + +/** \returns an expression of the W axis unit vector (0,0,0,1) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitW() +{ return Derived::Unit(3); } + +} // end namespace Eigen + +#endif // EIGEN_CWISE_NULLARY_OP_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseTernaryOp.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseTernaryOp.h new file mode 100644 index 0000000..9f3576f --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseTernaryOp.h @@ -0,0 +1,197 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_TERNARY_OP_H +#define EIGEN_CWISE_TERNARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > { + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef typename remove_all::type Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Arg1, Arg2, and Arg3 to have the same scalar type + // (see CwiseTernaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of::type Scalar; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + typedef typename Arg1::Nested Arg1Nested; + typedef typename Arg2::Nested Arg2Nested; + typedef typename Arg3::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + enum { Flags = _Arg1Nested::Flags & RowMajorBit }; +}; +} // end namespace internal + +template +class CwiseTernaryOpImpl; + +/** \class CwiseTernaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise ternary operator is + * applied to two expressions + * + * \tparam TernaryOp template functor implementing the operator + * \tparam Arg1Type the type of the first argument + * \tparam Arg2Type the type of the second argument + * \tparam Arg3Type the type of the third argument + * + * This class represents an expression where a coefficient-wise ternary + * operator is applied to three expressions. + * It is the return type of ternary operators, by which we mean only those + * ternary operators where + * all three arguments are Eigen expressions. + * For example, the return type of betainc(matrix1, matrix2, matrix3) is a + * CwiseTernaryOp. + * + * Most of the time, this is the only way that it is used, so you typically + * don't have to name + * CwiseTernaryOp types explicitly. + * + * \sa MatrixBase::ternaryExpr(const MatrixBase &, const + * MatrixBase &, const CustomTernaryOp &) const, class CwiseBinaryOp, + * class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseTernaryOp : public CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>, + internal::no_assignment_operator +{ + public: + typedef typename internal::remove_all::type Arg1; + typedef typename internal::remove_all::type Arg2; + typedef typename internal::remove_all::type Arg3; + + typedef typename CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp) + + typedef typename internal::ref_selector::type Arg1Nested; + typedef typename internal::ref_selector::type Arg2Nested; + typedef typename internal::ref_selector::type Arg3Nested; + typedef typename internal::remove_reference::type _Arg1Nested; + typedef typename internal::remove_reference::type _Arg2Nested; + typedef typename internal::remove_reference::type _Arg3Nested; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2, + const Arg3& a3, + const TernaryOp& func = TernaryOp()) + : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) { + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3) + + // The index types should match + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + + eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && + a1.rows() == a3.rows() && a1.cols() == a3.cols()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg3.rows(); + else if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg2.rows(); + else + return m_arg1.rows(); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg3.cols(); + else if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg2.cols(); + else + return m_arg1.cols(); + } + + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg1Nested& arg1() const { return m_arg1; } + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg2Nested& arg2() const { return m_arg2; } + /** \returns the third argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg3Nested& arg3() const { return m_arg3; } + /** \returns the functor representing the ternary operation */ + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + protected: + Arg1Nested m_arg1; + Arg2Nested m_arg2; + Arg3Nested m_arg3; + const TernaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseTernaryOpImpl + : public internal::generic_xpr_base< + CwiseTernaryOp >::type { + public: + typedef typename internal::generic_xpr_base< + CwiseTernaryOp >::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_TERNARY_OP_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseUnaryOp.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseUnaryOp.h new file mode 100644 index 0000000..1d2dd19 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseUnaryOp.h @@ -0,0 +1,103 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_UNARY_OP_H +#define EIGEN_CWISE_UNARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > + : traits +{ + typedef typename result_of< + UnaryOp(const typename XprType::Scalar&) + >::type Scalar; + typedef typename XprType::Nested XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + enum { + Flags = _XprTypeNested::Flags & RowMajorBit + }; +}; +} + +template +class CwiseUnaryOpImpl; + +/** \class CwiseUnaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise unary operator is applied to an expression + * + * \tparam UnaryOp template functor implementing the operator + * \tparam XprType the type of the expression to which we are applying the unary operator + * + * This class represents an expression where a unary operator is applied to an expression. + * It is the return type of all operations taking exactly 1 input expression, regardless of the + * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix + * is considered unary, because only the right-hand side is an expression, and its + * return type is a specialization of CwiseUnaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseUnaryOp types explicitly. + * + * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp + */ +template +class CwiseUnaryOp : public CwiseUnaryOpImpl::StorageKind>, internal::no_assignment_operator +{ + public: + + typedef typename CwiseUnaryOpImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + typedef typename internal::ref_selector::type XprTypeNested; + typedef typename internal::remove_all::type NestedExpression; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index cols() const { return m_xpr.cols(); } + + /** \returns the functor representing the unary operation */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const UnaryOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& + nestedExpression() const { return m_xpr; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename internal::remove_all::type& + nestedExpression() { return m_xpr; } + + protected: + XprTypeNested m_xpr; + const UnaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseUnaryOpImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_UNARY_OP_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseUnaryView.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseUnaryView.h new file mode 100644 index 0000000..2710330 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/CwiseUnaryView.h @@ -0,0 +1,128 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_UNARY_VIEW_H +#define EIGEN_CWISE_UNARY_VIEW_H + +namespace Eigen { + +namespace internal { +template +struct traits > + : traits +{ + typedef typename result_of< + ViewOp(const typename traits::Scalar&) + >::type Scalar; + typedef typename MatrixType::Nested MatrixTypeNested; + typedef typename remove_all::type _MatrixTypeNested; + enum { + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions + MatrixTypeInnerStride = inner_stride_at_compile_time::ret, + // need to cast the sizeof's from size_t to int explicitly, otherwise: + // "error: no integral type can represent all of the enumerator values + InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic + ? int(Dynamic) + : int(MatrixTypeInnerStride) * int(sizeof(typename traits::Scalar) / sizeof(Scalar)), + OuterStrideAtCompileTime = outer_stride_at_compile_time::ret == Dynamic + ? int(Dynamic) + : outer_stride_at_compile_time::ret * int(sizeof(typename traits::Scalar) / sizeof(Scalar)) + }; +}; +} + +template +class CwiseUnaryViewImpl; + +/** \class CwiseUnaryView + * \ingroup Core_Module + * + * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector + * + * \tparam ViewOp template functor implementing the view + * \tparam MatrixType the type of the matrix we are applying the unary operator + * + * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector. + * It is the return type of real() and imag(), and most of the time this is the only way it is used. + * + * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp + */ +template +class CwiseUnaryView : public CwiseUnaryViewImpl::StorageKind> +{ + public: + + typedef typename CwiseUnaryViewImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + typedef typename internal::ref_selector::non_const_type MatrixTypeNested; + typedef typename internal::remove_all::type NestedExpression; + + explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) + : m_matrix(mat), m_functor(func) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) + + EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } + + /** \returns the functor representing unary operation */ + const ViewOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + const typename internal::remove_all::type& + nestedExpression() const { return m_matrix; } + + /** \returns the nested expression */ + typename internal::remove_reference::type& + nestedExpression() { return m_matrix.const_cast_derived(); } + + protected: + MatrixTypeNested m_matrix; + ViewOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseUnaryViewImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + +template +class CwiseUnaryViewImpl + : public internal::dense_xpr_base< CwiseUnaryView >::type +{ + public: + + typedef CwiseUnaryView Derived; + typedef typename internal::dense_xpr_base< CwiseUnaryView >::type Base; + + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) + + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); } + + EIGEN_DEVICE_FUNC inline Index innerStride() const + { + return derived().nestedExpression().innerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); + } + + EIGEN_DEVICE_FUNC inline Index outerStride() const + { + return derived().nestedExpression().outerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_UNARY_VIEW_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DenseBase.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DenseBase.h new file mode 100644 index 0000000..90066ae --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DenseBase.h @@ -0,0 +1,611 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSEBASE_H +#define EIGEN_DENSEBASE_H + +namespace Eigen { + +namespace internal { + +// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type. +// This dummy function simply aims at checking that at compile time. +static inline void check_DenseIndex_is_signed() { + EIGEN_STATIC_ASSERT(NumTraits::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE); +} + +} // end namespace internal + +/** \class DenseBase + * \ingroup Core_Module + * + * \brief Base class for all dense matrices, vectors, and arrays + * + * This class is the base that is inherited by all dense objects (matrix, vector, arrays, + * and related expression types). The common Eigen API for dense objects is contained in this class. + * + * \tparam Derived is the derived type, e.g., a matrix type or an expression. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. + * + * \sa \blank \ref TopicClassHierarchy + */ +template class DenseBase +#ifndef EIGEN_PARSED_BY_DOXYGEN + : public DenseCoeffsBase +#else + : public DenseCoeffsBase +#endif // not EIGEN_PARSED_BY_DOXYGEN +{ + public: + + /** Inner iterator type to iterate over the coefficients of a row or column. + * \sa class InnerIterator + */ + typedef Eigen::InnerIterator InnerIterator; + + typedef typename internal::traits::StorageKind StorageKind; + + /** + * \brief The type used to store indices + * \details This typedef is relevant for types that store multiple indices such as + * PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index + * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase. + */ + typedef typename internal::traits::StorageIndex StorageIndex; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. */ + typedef typename internal::traits::Scalar Scalar; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. + * + * It is an alias for the Scalar type */ + typedef Scalar value_type; + + typedef typename NumTraits::Real RealScalar; + typedef DenseCoeffsBase Base; + + using Base::derived; + using Base::const_cast_derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::rowIndexByOuterInner; + using Base::colIndexByOuterInner; + using Base::coeff; + using Base::coeffByOuterInner; + using Base::operator(); + using Base::operator[]; + using Base::x; + using Base::y; + using Base::z; + using Base::w; + using Base::stride; + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; + typedef typename Base::CoeffReturnType CoeffReturnType; + + enum { + + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + /**< The number of rows at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */ + + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + /**< The number of columns at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */ + + + SizeAtCompileTime = (internal::size_at_compile_time::RowsAtCompileTime, + internal::traits::ColsAtCompileTime>::ret), + /**< This is equal to the number of coefficients, i.e. the number of + * rows times the number of columns, or to \a Dynamic if this is not + * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ + + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + /**< This value is equal to the maximum possible number of rows that this expression + * might have. If this expression might have an arbitrarily high number of rows, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime + */ + + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + /**< This value is equal to the maximum possible number of columns that this expression + * might have. If this expression might have an arbitrarily high number of columns, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime + */ + + MaxSizeAtCompileTime = (internal::size_at_compile_time::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime>::ret), + /**< This value is equal to the maximum possible number of coefficients that this expression + * might have. If this expression might have an arbitrarily high number of coefficients, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime + */ + + IsVectorAtCompileTime = internal::traits::MaxRowsAtCompileTime == 1 + || internal::traits::MaxColsAtCompileTime == 1, + /**< This is set to true if either the number of rows or the number of + * columns is known at compile-time to be equal to 1. Indeed, in that case, + * we are dealing with a column-vector (if there is only one column) or with + * a row-vector (if there is only one row). */ + + Flags = internal::traits::Flags, + /**< This stores expression \ref flags flags which may or may not be inherited by new expressions + * constructed from this one. See the \ref flags "list of flags". + */ + + IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression has row-major storage order. */ + + InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) + : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + + InnerStrideAtCompileTime = internal::inner_stride_at_compile_time::ret, + OuterStrideAtCompileTime = internal::outer_stride_at_compile_time::ret + }; + + typedef typename internal::find_best_packet::type PacketScalar; + + enum { IsPlainObjectBase = 0 }; + + /** The plain matrix type corresponding to this expression. + * \sa PlainObject */ + typedef Matrix::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainMatrix; + + /** The plain array type corresponding to this expression. + * \sa PlainObject */ + typedef Array::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainArray; + + /** \brief The plain matrix or array type corresponding to this expression. + * + * This is not necessarily exactly the return type of eval(). In the case of plain matrices, + * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed + * that the return type of eval() is either PlainObject or const PlainObject&. + */ + typedef typename internal::conditional::XprKind,MatrixXpr >::value, + PlainMatrix, PlainArray>::type PlainObject; + + /** \returns the number of nonzero coefficients which is in practice the number + * of stored coefficients. */ + EIGEN_DEVICE_FUNC + inline Index nonZeros() const { return size(); } + + /** \returns the outer size. + * + * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension + * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a + * column-major matrix, and the number of rows for a row-major matrix. */ + EIGEN_DEVICE_FUNC + Index outerSize() const + { + return IsVectorAtCompileTime ? 1 + : int(IsRowMajor) ? this->rows() : this->cols(); + } + + /** \returns the inner size. + * + * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension + * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a + * column-major matrix, and the number of columns for a row-major matrix. */ + EIGEN_DEVICE_FUNC + Index innerSize() const + { + return IsVectorAtCompileTime ? this->size() + : int(IsRowMajor) ? this->cols() : this->rows(); + } + + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + EIGEN_DEVICE_FUNC + void resize(Index newSize) + { + EIGEN_ONLY_USED_FOR_DEBUG(newSize); + eigen_assert(newSize == this->size() + && "DenseBase::resize() does not actually allow to resize."); + } + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) + { + EIGEN_ONLY_USED_FOR_DEBUG(rows); + EIGEN_ONLY_USED_FOR_DEBUG(cols); + eigen_assert(rows == this->rows() && cols == this->cols() + && "DenseBase::resize() does not actually allow to resize."); + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; + /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */ + typedef CwiseNullaryOp,PlainObject> SequentialLinSpacedReturnType; + /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ + typedef CwiseNullaryOp,PlainObject> RandomAccessLinSpacedReturnType; + /** \internal the return type of MatrixBase::eigenvalues() */ + typedef Matrix::Scalar>::Real, internal::traits::ColsAtCompileTime, 1> EigenvaluesReturnType; + +#endif // not EIGEN_PARSED_BY_DOXYGEN + + /** Copies \a other into *this. \returns a reference to *this. */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const DenseBase& other); + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const DenseBase& other); + + template + EIGEN_DEVICE_FUNC + Derived& operator=(const EigenBase &other); + + template + EIGEN_DEVICE_FUNC + Derived& operator+=(const EigenBase &other); + + template + EIGEN_DEVICE_FUNC + Derived& operator-=(const EigenBase &other); + + template + EIGEN_DEVICE_FUNC + Derived& operator=(const ReturnByValue& func); + + /** \internal + * Copies \a other into *this without evaluating other. \returns a reference to *this. + * \deprecated */ + template + EIGEN_DEVICE_FUNC + Derived& lazyAssign(const DenseBase& other); + + EIGEN_DEVICE_FUNC + CommaInitializer operator<< (const Scalar& s); + + /** \deprecated it now returns \c *this */ + template + EIGEN_DEPRECATED + const Derived& flagged() const + { return derived(); } + + template + EIGEN_DEVICE_FUNC + CommaInitializer operator<< (const DenseBase& other); + + typedef Transpose TransposeReturnType; + EIGEN_DEVICE_FUNC + TransposeReturnType transpose(); + typedef typename internal::add_const >::type ConstTransposeReturnType; + EIGEN_DEVICE_FUNC + ConstTransposeReturnType transpose() const; + EIGEN_DEVICE_FUNC + void transposeInPlace(); + + EIGEN_DEVICE_FUNC static const ConstantReturnType + Constant(Index rows, Index cols, const Scalar& value); + EIGEN_DEVICE_FUNC static const ConstantReturnType + Constant(Index size, const Scalar& value); + EIGEN_DEVICE_FUNC static const ConstantReturnType + Constant(const Scalar& value); + + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType + LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType + LinSpaced(Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType + LinSpaced(Sequential_t, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType + LinSpaced(const Scalar& low, const Scalar& high); + + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp + NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func); + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp + NullaryExpr(Index size, const CustomNullaryOp& func); + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp + NullaryExpr(const CustomNullaryOp& func); + + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(); + + EIGEN_DEVICE_FUNC void fill(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setZero(); + EIGEN_DEVICE_FUNC Derived& setOnes(); + EIGEN_DEVICE_FUNC Derived& setRandom(); + + template EIGEN_DEVICE_FUNC + bool isApprox(const DenseBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC + bool isMuchSmallerThan(const RealScalar& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + template EIGEN_DEVICE_FUNC + bool isMuchSmallerThan(const DenseBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + + EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; + + inline bool hasNaN() const; + inline bool allFinite() const; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator*=(const Scalar& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator/=(const Scalar& other); + + typedef typename internal::add_const_on_value_type::type>::type EvalReturnType; + /** \returns the matrix or vector obtained by evaluating this expression. + * + * Notice that in the case of a plain matrix or vector (not an expression) this function just returns + * a const reference, in order to avoid a useless copy. + * + * \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE EvalReturnType eval() const + { + // Even though MSVC does not honor strong inlining when the return type + // is a dynamic matrix, we desperately need strong inlining for fixed + // size types on MSVC. + return typename internal::eval::type(derived()); + } + + /** swaps *this with the expression \a other. + * + */ + template + EIGEN_DEVICE_FUNC + void swap(const DenseBase& other) + { + EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); + } + + /** swaps *this with the matrix or array \a other. + * + */ + template + EIGEN_DEVICE_FUNC + void swap(PlainObjectBase& other) + { + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.derived(), internal::swap_assign_op()); + } + + EIGEN_DEVICE_FUNC inline const NestByValue nestByValue() const; + EIGEN_DEVICE_FUNC inline const ForceAlignedAccess forceAlignedAccess() const; + EIGEN_DEVICE_FUNC inline ForceAlignedAccess forceAlignedAccess(); + template EIGEN_DEVICE_FUNC + inline const typename internal::conditional,Derived&>::type forceAlignedAccessIf() const; + template EIGEN_DEVICE_FUNC + inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + + EIGEN_DEVICE_FUNC Scalar sum() const; + EIGEN_DEVICE_FUNC Scalar mean() const; + EIGEN_DEVICE_FUNC Scalar trace() const; + + EIGEN_DEVICE_FUNC Scalar prod() const; + + EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff() const; + EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff() const; + + template EIGEN_DEVICE_FUNC + typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const; + template EIGEN_DEVICE_FUNC + typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const; + template EIGEN_DEVICE_FUNC + typename internal::traits::Scalar minCoeff(IndexType* index) const; + template EIGEN_DEVICE_FUNC + typename internal::traits::Scalar maxCoeff(IndexType* index) const; + + template + EIGEN_DEVICE_FUNC + Scalar redux(const BinaryOp& func) const; + + template + EIGEN_DEVICE_FUNC + void visit(Visitor& func) const; + + /** \returns a WithFormat proxy object allowing to print a matrix the with given + * format \a fmt. + * + * See class IOFormat for some examples. + * + * \sa class IOFormat, class WithFormat + */ + inline const WithFormat format(const IOFormat& fmt) const + { + return WithFormat(derived(), fmt); + } + + /** \returns the unique coefficient of a 1x1 expression */ + EIGEN_DEVICE_FUNC + CoeffReturnType value() const + { + EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) + eigen_assert(this->rows() == 1 && this->cols() == 1); + return derived().coeff(0,0); + } + + EIGEN_DEVICE_FUNC bool all() const; + EIGEN_DEVICE_FUNC bool any() const; + EIGEN_DEVICE_FUNC Index count() const; + + typedef VectorwiseOp RowwiseReturnType; + typedef const VectorwiseOp ConstRowwiseReturnType; + typedef VectorwiseOp ColwiseReturnType; + typedef const VectorwiseOp ConstColwiseReturnType; + + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_rowwise.cpp + * Output: \verbinclude MatrixBase_rowwise.out + * + * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const { + return ConstRowwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC RowwiseReturnType rowwise(); + + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_colwise.cpp + * Output: \verbinclude MatrixBase_colwise.out + * + * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const { + return ConstColwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC ColwiseReturnType colwise(); + + typedef CwiseNullaryOp,PlainObject> RandomReturnType; + static const RandomReturnType Random(Index rows, Index cols); + static const RandomReturnType Random(Index size); + static const RandomReturnType Random(); + + template + const Select + select(const DenseBase& thenMatrix, + const DenseBase& elseMatrix) const; + + template + inline const Select + select(const DenseBase& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const; + + template + inline const Select + select(const typename ElseDerived::Scalar& thenScalar, const DenseBase& elseMatrix) const; + + template RealScalar lpNorm() const; + + template + EIGEN_DEVICE_FUNC + const Replicate replicate() const; + /** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate_int_int.cpp + * Output: \verbinclude MatrixBase_replicate_int_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC + const Replicate replicate(Index rowFactor, Index colFactor) const + { + return Replicate(derived(), rowFactor, colFactor); + } + + typedef Reverse ReverseReturnType; + typedef const Reverse ConstReverseReturnType; + EIGEN_DEVICE_FUNC ReverseReturnType reverse(); + /** This is the const version of reverse(). */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const + { + return ConstReverseReturnType(derived()); + } + EIGEN_DEVICE_FUNC void reverseInPlace(); + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) +# include "../plugins/BlockMethods.h" +# ifdef EIGEN_DENSEBASE_PLUGIN +# include EIGEN_DENSEBASE_PLUGIN +# endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF + + // disable the use of evalTo for dense objects with a nice compilation error + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& ) const + { + EIGEN_STATIC_ASSERT((internal::is_same::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS); + } + + protected: + /** Default constructor. Do nothing. */ + EIGEN_DEVICE_FUNC DenseBase() + { + /* Just checks for self-consistency of the flags. + * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down + */ +#ifdef EIGEN_INTERNAL_DEBUGGING + EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor)) + && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))), + INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION) +#endif + } + + private: + EIGEN_DEVICE_FUNC explicit DenseBase(int); + EIGEN_DEVICE_FUNC DenseBase(int,int); + template EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase&); +}; + +} // end namespace Eigen + +#endif // EIGEN_DENSEBASE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DenseCoeffsBase.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DenseCoeffsBase.h new file mode 100644 index 0000000..c4af48a --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DenseCoeffsBase.h @@ -0,0 +1,681 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSECOEFFSBASE_H +#define EIGEN_DENSECOEFFSBASE_H + +namespace Eigen { + +namespace internal { +template struct add_const_on_value_type_if_arithmetic +{ + typedef typename conditional::value, T, typename add_const_on_value_type::type>::type type; +}; +} + +/** \brief Base class providing read-only coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #ReadOnlyAccessors Constant indicating read-only access + * + * This class defines the \c operator() \c const function and friends, which can be used to read specific + * entries of a matrix or array. + * + * \sa DenseCoeffsBase, DenseCoeffsBase, + * \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public EigenBase +{ + public: + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + + // Explanation for this CoeffReturnType typedef. + // - This is the return type of the coeff() method. + // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references + // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value). + // - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems + // while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is + // not possible, since the underlying expressions might not offer a valid address the reference could be referring to. + typedef typename internal::conditional::Flags&LvalueBit), + const Scalar&, + typename internal::conditional::value, Scalar, const Scalar>::type + >::type CoeffReturnType; + + typedef typename internal::add_const_on_value_type_if_arithmetic< + typename internal::packet_traits::type + >::type PacketReturnType; + + typedef EigenBase Base; + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const + { + return int(Derived::RowsAtCompileTime) == 1 ? 0 + : int(Derived::ColsAtCompileTime) == 1 ? inner + : int(Derived::Flags)&RowMajorBit ? outer + : inner; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const + { + return int(Derived::ColsAtCompileTime) == 1 ? 0 + : int(Derived::RowsAtCompileTime) == 1 ? inner + : int(Derived::Flags)&RowMajorBit ? inner + : outer; + } + + /** Short version: don't use this function, use + * \link operator()(Index,Index) const \endlink instead. + * + * Long version: this function is similar to + * \link operator()(Index,Index) const \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator()(Index,Index) const \endlink. + * + * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeff(row,col); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { + return coeff(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); + } + + /** \returns the coefficient at given the given row and column. + * + * \sa operator()(Index,Index), operator[](Index) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const + { + eigen_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return coeff(row, col); + } + + /** Short version: don't use this function, use + * \link operator[](Index) const \endlink instead. + * + * Long version: this function is similar to + * \link operator[](Index) const \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameter \a index is in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator[](Index) const \endlink. + * + * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + coeff(Index index) const + { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + eigen_internal_assert(index >= 0 && index < size()); + return internal::evaluator(derived()).coeff(index); + } + + + /** \returns the coefficient at given index. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const, + * z() const, w() const + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + operator[](Index index) const + { + EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) + eigen_assert(index >= 0 && index < size()); + return coeff(index); + } + + /** \returns the coefficient at given index. + * + * This is synonymous to operator[](Index) const. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const, + * z() const, w() const + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + operator()(Index index) const + { + eigen_assert(index >= 0 && index < size()); + return coeff(index); + } + + /** equivalent to operator[](0). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + x() const { return (*this)[0]; } + + /** equivalent to operator[](1). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + y() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; + } + + /** equivalent to operator[](2). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + z() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; + } + + /** equivalent to operator[](3). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + w() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; + } + + /** \internal + * \returns the packet of coefficients starting at the given row and column. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit. + * + * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + + template + EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const + { + typedef typename internal::packet_traits::type DefaultPacketType; + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return internal::evaluator(derived()).template packet(row,col); + } + + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const + { + return packet(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); + } + + /** \internal + * \returns the packet of coefficients starting at the given index. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit and the LinearAccessBit. + * + * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + + template + EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + typedef typename internal::packet_traits::type DefaultPacketType; + eigen_internal_assert(index >= 0 && index < size()); + return internal::evaluator(derived()).template packet(index); + } + + protected: + // explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase. + // But some methods are only available in the DirectAccess case. + // So we add dummy methods here with these names, so that "using... " doesn't fail. + // It's not private so that the child class DenseBase can access them, and it's not public + // either since it's an implementation detail, so has to be protected. + void coeffRef(); + void coeffRefByOuterInner(); + void writePacket(); + void writePacketByOuterInner(); + void copyCoeff(); + void copyCoeffByOuterInner(); + void copyPacket(); + void copyPacketByOuterInner(); + void stride(); + void innerStride(); + void outerStride(); + void rowStride(); + void colStride(); +}; + +/** \brief Base class providing read/write coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #WriteAccessors Constant indicating read/write access + * + * This class defines the non-const \c operator() function and friends, which can be used to write specific + * entries of a matrix or array. This class inherits DenseCoeffsBase which + * defines the const variant for reading specific entries. + * + * \sa DenseCoeffsBase, \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public DenseCoeffsBase +{ + public: + + typedef DenseCoeffsBase Base; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + using Base::coeff; + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + using Base::rowIndexByOuterInner; + using Base::colIndexByOuterInner; + using Base::operator[]; + using Base::operator(); + using Base::x; + using Base::y; + using Base::z; + using Base::w; + + /** Short version: don't use this function, use + * \link operator()(Index,Index) \endlink instead. + * + * Long version: this function is similar to + * \link operator()(Index,Index) \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator()(Index,Index) \endlink. + * + * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeffRef(row,col); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + coeffRefByOuterInner(Index outer, Index inner) + { + return coeffRef(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); + } + + /** \returns a reference to the coefficient at given the given row and column. + * + * \sa operator[](Index) + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + operator()(Index row, Index col) + { + eigen_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return coeffRef(row, col); + } + + + /** Short version: don't use this function, use + * \link operator[](Index) \endlink instead. + * + * Long version: this function is similar to + * \link operator[](Index) \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator[](Index) \endlink. + * + * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index) + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + coeffRef(Index index) + { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + eigen_internal_assert(index >= 0 && index < size()); + return internal::evaluator(derived()).coeffRef(index); + } + + /** \returns a reference to the coefficient at given index. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w() + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + operator[](Index index) + { + EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + /** \returns a reference to the coefficient at given index. + * + * This is synonymous to operator[](Index). + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w() + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + operator()(Index index) + { + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + /** equivalent to operator[](0). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + x() { return (*this)[0]; } + + /** equivalent to operator[](1). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + y() + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; + } + + /** equivalent to operator[](2). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + z() + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; + } + + /** equivalent to operator[](3). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + w() + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; + } +}; + +/** \brief Base class providing direct read-only coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #DirectAccessors Constant indicating direct access + * + * This class defines functions to work with strides which can be used to access entries directly. This class + * inherits DenseCoeffsBase which defines functions to access entries read-only using + * \c operator() . + * + * \sa \blank \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public DenseCoeffsBase +{ + public: + + typedef DenseCoeffsBase Base; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + + /** \returns the pointer increment between two consecutive elements within a slice in the inner direction. + * + * \sa outerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return derived().innerStride(); + } + + /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns + * in a column-major matrix). + * + * \sa innerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return derived().outerStride(); + } + + // FIXME shall we remove it ? + inline Index stride() const + { + return Derived::IsVectorAtCompileTime ? innerStride() : outerStride(); + } + + /** \returns the pointer increment between two consecutive rows. + * + * \sa innerStride(), outerStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index rowStride() const + { + return Derived::IsRowMajor ? outerStride() : innerStride(); + } + + /** \returns the pointer increment between two consecutive columns. + * + * \sa innerStride(), outerStride(), rowStride() + */ + EIGEN_DEVICE_FUNC + inline Index colStride() const + { + return Derived::IsRowMajor ? innerStride() : outerStride(); + } +}; + +/** \brief Base class providing direct read/write coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #DirectWriteAccessors Constant indicating direct access + * + * This class defines functions to work with strides which can be used to access entries directly. This class + * inherits DenseCoeffsBase which defines functions to access entries read/write using + * \c operator(). + * + * \sa \blank \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase + : public DenseCoeffsBase +{ + public: + + typedef DenseCoeffsBase Base; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + + /** \returns the pointer increment between two consecutive elements within a slice in the inner direction. + * + * \sa outerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return derived().innerStride(); + } + + /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns + * in a column-major matrix). + * + * \sa innerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return derived().outerStride(); + } + + // FIXME shall we remove it ? + inline Index stride() const + { + return Derived::IsVectorAtCompileTime ? innerStride() : outerStride(); + } + + /** \returns the pointer increment between two consecutive rows. + * + * \sa innerStride(), outerStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index rowStride() const + { + return Derived::IsRowMajor ? outerStride() : innerStride(); + } + + /** \returns the pointer increment between two consecutive columns. + * + * \sa innerStride(), outerStride(), rowStride() + */ + EIGEN_DEVICE_FUNC + inline Index colStride() const + { + return Derived::IsRowMajor ? innerStride() : outerStride(); + } +}; + +namespace internal { + +template +struct first_aligned_impl +{ + static inline Index run(const Derived&) + { return 0; } +}; + +template +struct first_aligned_impl +{ + static inline Index run(const Derived& m) + { + return internal::first_aligned(m.data(), m.size()); + } +}; + +/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization. + * + * \tparam Alignment requested alignment in Bytes. + * + * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more + * documentation. + */ +template +static inline Index first_aligned(const DenseBase& m) +{ + enum { ReturnZero = (int(evaluator::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) }; + return first_aligned_impl::run(m.derived()); +} + +template +static inline Index first_default_aligned(const DenseBase& m) +{ + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type DefaultPacketType; + return internal::first_aligned::alignment),Derived>(m); +} + +template::ret> +struct inner_stride_at_compile_time +{ + enum { ret = traits::InnerStrideAtCompileTime }; +}; + +template +struct inner_stride_at_compile_time +{ + enum { ret = 0 }; +}; + +template::ret> +struct outer_stride_at_compile_time +{ + enum { ret = traits::OuterStrideAtCompileTime }; +}; + +template +struct outer_stride_at_compile_time +{ + enum { ret = 0 }; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DENSECOEFFSBASE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DenseStorage.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DenseStorage.h new file mode 100644 index 0000000..7958fee --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DenseStorage.h @@ -0,0 +1,570 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2009 Benoit Jacob +// Copyright (C) 2010-2013 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIXSTORAGE_H +#define EIGEN_MATRIXSTORAGE_H + +#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN; +#else + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) +#endif + +namespace Eigen { + +namespace internal { + +struct constructor_without_unaligned_array_assert {}; + +template +EIGEN_DEVICE_FUNC +void check_static_allocation_size() +{ + // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit + #if EIGEN_STACK_ALLOCATION_LIMIT + EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + #endif +} + +/** \internal + * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: + * to 16 bytes boundary if the total size is a multiple of 16 bytes. + */ +template ::value > +struct plain_array +{ + T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) + #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) +#elif EIGEN_GNUC_AT_LEAST(4,7) + // GCC 4.7 is too aggressive in its optimizations and remove the alignement test based on the fact the array is declared to be aligned. + // See this bug report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900 + // Hiding the origin of the array pointer behind a function argument seems to do the trick even if the function is inlined: + template + EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; } + #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ + eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \ + && "this assertion is explained here: " \ + "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ + " **** READ THIS WEB PAGE !!! ****"); +#else + #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ + eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \ + && "this assertion is explained here: " \ + "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ + " **** READ THIS WEB PAGE !!! ****"); +#endif + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + T array[1]; + EIGEN_DEVICE_FUNC plain_array() {} + EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {} +}; + +} // end namespace internal + +/** \internal + * + * \class DenseStorage + * \ingroup Core_Module + * + * \brief Stores the data of a matrix + * + * This class stores the data of fixed-size, dynamic-size or mixed matrices + * in a way as compact as possible. + * + * \sa Matrix + */ +template class DenseStorage; + +// purely fixed-size matrix +template class DenseStorage +{ + internal::plain_array m_data; + public: + EIGEN_DEVICE_FUNC DenseStorage() { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } + EIGEN_DEVICE_FUNC + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()) {} + EIGEN_DEVICE_FUNC + DenseStorage(const DenseStorage& other) : m_data(other.m_data) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) m_data = other.m_data; + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols); + EIGEN_UNUSED_VARIABLE(size); + EIGEN_UNUSED_VARIABLE(rows); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } +}; + +// null matrix +template class DenseStorage +{ + public: + EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; } + EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {} + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return 0; } + EIGEN_DEVICE_FUNC T *data() { return 0; } +}; + +// more specializations for null matrices; these are necessary to resolve ambiguities +template class DenseStorage +: public DenseStorage { }; + +template class DenseStorage +: public DenseStorage { }; + +template class DenseStorage +: public DenseStorage { }; + +// dynamic-size matrix with fixed-size storage +template class DenseStorage +{ + internal::plain_array m_data; + Index m_rows; + Index m_cols; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + m_cols = other.m_cols; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) + { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC Index rows() const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols() const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } +}; + +// dynamic-size matrix with fixed-size storage and fixed width +template class DenseStorage +{ + internal::plain_array m_data; + Index m_rows; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } +}; + +// dynamic-size matrix with fixed-size storage and fixed height +template class DenseStorage +{ + internal::plain_array m_data; + Index m_cols; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_cols = other.m_cols; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + void conservativeResize(Index, Index, Index cols) { m_cols = cols; } + void resize(Index, Index, Index cols) { m_cols = cols; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } +}; + +// purely dynamic matrix. +template class DenseStorage +{ + T *m_data; + Index m_rows; + Index m_cols; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(0), m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) + : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*other.m_cols)) + , m_rows(other.m_rows) + , m_cols(other.m_cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols) + internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + other.m_rows = 0; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + swap(m_cols, other.m_cols); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) + { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + void conservativeResize(Index size, Index rows, Index cols) + { + m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*m_cols); + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols) + { + if(size != m_rows*m_cols) + { + internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC const T *data() const { return m_data; } + EIGEN_DEVICE_FUNC T *data() { return m_data; } +}; + +// matrix with dynamic width and fixed height (so that matrix has dynamic size). +template class DenseStorage +{ + T *m_data; + Index m_cols; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0); + EIGEN_UNUSED_VARIABLE(rows); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(_Rows*other.m_cols)) + , m_cols(other.m_cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows) + internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT + : m_data(std::move(other.m_data)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT + { + using std::swap; + swap(m_data, other.m_data); + swap(m_cols, other.m_cols); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols) + { + m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, _Rows*m_cols); + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols) + { + if(size != _Rows*m_cols) + { + internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + m_cols = cols; + } + EIGEN_DEVICE_FUNC const T *data() const { return m_data; } + EIGEN_DEVICE_FUNC T *data() { return m_data; } +}; + +// matrix with dynamic height and fixed width (so that matrix has dynamic size). +template class DenseStorage +{ + T *m_data; + Index m_rows; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*_Cols)) + , m_rows(other.m_rows) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols) + internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + { + other.m_data = nullptr; + other.m_rows = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + void conservativeResize(Index size, Index rows, Index) + { + m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*_Cols); + m_rows = rows; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index) + { + if(size != m_rows*_Cols) + { + internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + m_rows = rows; + } + EIGEN_DEVICE_FUNC const T *data() const { return m_data; } + EIGEN_DEVICE_FUNC T *data() { return m_data; } +}; + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Diagonal.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Diagonal.h new file mode 100644 index 0000000..49e7112 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Diagonal.h @@ -0,0 +1,257 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2009 Benoit Jacob +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONAL_H +#define EIGEN_DIAGONAL_H + +namespace Eigen { + +/** \class Diagonal + * \ingroup Core_Module + * + * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix + * + * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal + * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal. + * A positive value means a superdiagonal, a negative value means a subdiagonal. + * You can also use DynamicIndex so the index can be set at runtime. + * + * The matrix is not required to be square. + * + * This class represents an expression of the main diagonal, or any sub/super diagonal + * of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(Index) and most of the + * time this is the only way it is used. + * + * \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index) + */ + +namespace internal { +template +struct traits > + : traits +{ + typedef typename ref_selector::type MatrixTypeNested; + typedef typename remove_reference::type _MatrixTypeNested; + typedef typename MatrixType::StorageKind StorageKind; + enum { + RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic + : (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0), + MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), + ColsAtCompileTime = 1, + MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic + : DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime, + MatrixType::MaxColsAtCompileTime) + : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0), + MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), + MaxColsAtCompileTime = 1, + MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions + MatrixTypeOuterStride = outer_stride_at_compile_time::ret, + InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, + OuterStrideAtCompileTime = 0 + }; +}; +} + +template class Diagonal + : public internal::dense_xpr_base< Diagonal >::type +{ + public: + + enum { DiagIndex = _DiagIndex }; + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) + + EIGEN_DEVICE_FUNC + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) + + EIGEN_DEVICE_FUNC + inline Index rows() const + { + return m_index.value()<0 ? numext::mini(m_matrix.cols(),m_matrix.rows()+m_index.value()) + : numext::mini(m_matrix.rows(),m_matrix.cols()-m_index.value()); + } + + EIGEN_DEVICE_FUNC + inline Index cols() const { return 1; } + + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return m_matrix.outerStride() + 1; + } + + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return 0; + } + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC + inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } + + EIGEN_DEVICE_FUNC + inline Scalar& coeffRef(Index row, Index) + { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + return m_matrix.coeffRef(row+rowOffset(), row+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index row, Index) const + { + return m_matrix.coeffRef(row+rowOffset(), row+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline CoeffReturnType coeff(Index row, Index) const + { + return m_matrix.coeff(row+rowOffset(), row+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline Scalar& coeffRef(Index idx) + { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index idx) const + { + return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline CoeffReturnType coeff(Index idx) const + { + return m_matrix.coeff(idx+rowOffset(), idx+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline const typename internal::remove_all::type& + nestedExpression() const + { + return m_matrix; + } + + EIGEN_DEVICE_FUNC + inline Index index() const + { + return m_index.value(); + } + + protected: + typename internal::ref_selector::non_const_type m_matrix; + const internal::variable_if_dynamicindex m_index; + + private: + // some compilers may fail to optimize std::max etc in case of compile-time constants... + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; } + // trigger a compile-time error if someone try to call packet + template typename MatrixType::PacketReturnType packet(Index) const; + template typename MatrixType::PacketReturnType packet(Index,Index) const; +}; + +/** \returns an expression of the main diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * Example: \include MatrixBase_diagonal.cpp + * Output: \verbinclude MatrixBase_diagonal.out + * + * \sa class Diagonal */ +template +inline typename MatrixBase::DiagonalReturnType +MatrixBase::diagonal() +{ + return DiagonalReturnType(derived()); +} + +/** This is the const version of diagonal(). */ +template +inline typename MatrixBase::ConstDiagonalReturnType +MatrixBase::diagonal() const +{ + return ConstDiagonalReturnType(derived()); +} + +/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0 + * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal. + * + * Example: \include MatrixBase_diagonal_int.cpp + * Output: \verbinclude MatrixBase_diagonal_int.out + * + * \sa MatrixBase::diagonal(), class Diagonal */ +template +inline typename MatrixBase::DiagonalDynamicIndexReturnType +MatrixBase::diagonal(Index index) +{ + return DiagonalDynamicIndexReturnType(derived(), index); +} + +/** This is the const version of diagonal(Index). */ +template +inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType +MatrixBase::diagonal(Index index) const +{ + return ConstDiagonalDynamicIndexReturnType(derived(), index); +} + +/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0 + * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal. + * + * Example: \include MatrixBase_diagonal_template_int.cpp + * Output: \verbinclude MatrixBase_diagonal_template_int.out + * + * \sa MatrixBase::diagonal(), class Diagonal */ +template +template +inline typename MatrixBase::template DiagonalIndexReturnType::Type +MatrixBase::diagonal() +{ + return typename DiagonalIndexReturnType::Type(derived()); +} + +/** This is the const version of diagonal(). */ +template +template +inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +MatrixBase::diagonal() const +{ + return typename ConstDiagonalIndexReturnType::Type(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_DIAGONAL_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DiagonalMatrix.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DiagonalMatrix.h new file mode 100644 index 0000000..ecfdce8 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DiagonalMatrix.h @@ -0,0 +1,343 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2007-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONALMATRIX_H +#define EIGEN_DIAGONALMATRIX_H + +namespace Eigen { + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +class DiagonalBase : public EigenBase +{ + public: + typedef typename internal::traits::DiagonalVectorType DiagonalVectorType; + typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::RealScalar RealScalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + enum { + RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + IsVectorAtCompileTime = 0, + Flags = NoPreferredStorageOrderBit + }; + + typedef Matrix DenseMatrixType; + typedef DenseMatrixType DenseType; + typedef DiagonalMatrix PlainObject; + + EIGEN_DEVICE_FUNC + inline const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC + inline Derived& derived() { return *static_cast(this); } + + EIGEN_DEVICE_FUNC + DenseMatrixType toDenseMatrix() const { return derived(); } + + EIGEN_DEVICE_FUNC + inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } + EIGEN_DEVICE_FUNC + inline DiagonalVectorType& diagonal() { return derived().diagonal(); } + + EIGEN_DEVICE_FUNC + inline Index rows() const { return diagonal().size(); } + EIGEN_DEVICE_FUNC + inline Index cols() const { return diagonal().size(); } + + template + EIGEN_DEVICE_FUNC + const Product + operator*(const MatrixBase &matrix) const + { + return Product(derived(),matrix.derived()); + } + + typedef DiagonalWrapper, const DiagonalVectorType> > InverseReturnType; + EIGEN_DEVICE_FUNC + inline const InverseReturnType + inverse() const + { + return InverseReturnType(diagonal().cwiseInverse()); + } + + EIGEN_DEVICE_FUNC + inline const DiagonalWrapper + operator*(const Scalar& scalar) const + { + return DiagonalWrapper(diagonal() * scalar); + } + EIGEN_DEVICE_FUNC + friend inline const DiagonalWrapper + operator*(const Scalar& scalar, const DiagonalBase& other) + { + return DiagonalWrapper(scalar * other.diagonal()); + } +}; + +#endif + +/** \class DiagonalMatrix + * \ingroup Core_Module + * + * \brief Represents a diagonal matrix with its storage + * + * \param _Scalar the type of coefficients + * \param SizeAtCompileTime the dimension of the matrix, or Dynamic + * \param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults + * to SizeAtCompileTime. Most of the time, you do not need to specify it. + * + * \sa class DiagonalWrapper + */ + +namespace internal { +template +struct traits > + : traits > +{ + typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; + typedef DiagonalShape StorageKind; + enum { + Flags = LvalueBit | NoPreferredStorageOrderBit + }; +}; +} +template +class DiagonalMatrix + : public DiagonalBase > +{ + public: + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename internal::traits::DiagonalVectorType DiagonalVectorType; + typedef const DiagonalMatrix& Nested; + typedef _Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + #endif + + protected: + + DiagonalVectorType m_diagonal; + + public: + + /** const version of diagonal(). */ + EIGEN_DEVICE_FUNC + inline const DiagonalVectorType& diagonal() const { return m_diagonal; } + /** \returns a reference to the stored vector of diagonal coefficients. */ + EIGEN_DEVICE_FUNC + inline DiagonalVectorType& diagonal() { return m_diagonal; } + + /** Default constructor without initialization */ + EIGEN_DEVICE_FUNC + inline DiagonalMatrix() {} + + /** Constructs a diagonal matrix with given dimension */ + EIGEN_DEVICE_FUNC + explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} + + /** 2D constructor. */ + EIGEN_DEVICE_FUNC + inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {} + + /** 3D constructor. */ + EIGEN_DEVICE_FUNC + inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {} + + /** Copy constructor. */ + template + EIGEN_DEVICE_FUNC + inline DiagonalMatrix(const DiagonalBase& other) : m_diagonal(other.diagonal()) {} + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */ + inline DiagonalMatrix(const DiagonalMatrix& other) : m_diagonal(other.diagonal()) {} + #endif + + /** generic constructor from expression of the diagonal coefficients */ + template + EIGEN_DEVICE_FUNC + explicit inline DiagonalMatrix(const MatrixBase& other) : m_diagonal(other) + {} + + /** Copy operator. */ + template + EIGEN_DEVICE_FUNC + DiagonalMatrix& operator=(const DiagonalBase& other) + { + m_diagonal = other.diagonal(); + return *this; + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_DEVICE_FUNC + DiagonalMatrix& operator=(const DiagonalMatrix& other) + { + m_diagonal = other.diagonal(); + return *this; + } + #endif + + /** Resizes to given size. */ + EIGEN_DEVICE_FUNC + inline void resize(Index size) { m_diagonal.resize(size); } + /** Sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC + inline void setZero() { m_diagonal.setZero(); } + /** Resizes and sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC + inline void setZero(Index size) { m_diagonal.setZero(size); } + /** Sets this matrix to be the identity matrix of the current size. */ + EIGEN_DEVICE_FUNC + inline void setIdentity() { m_diagonal.setOnes(); } + /** Sets this matrix to be the identity matrix of the given size. */ + EIGEN_DEVICE_FUNC + inline void setIdentity(Index size) { m_diagonal.setOnes(size); } +}; + +/** \class DiagonalWrapper + * \ingroup Core_Module + * + * \brief Expression of a diagonal matrix + * + * \param _DiagonalVectorType the type of the vector of diagonal coefficients + * + * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients, + * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal() + * and most of the time this is the only way that it is used. + * + * \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal() + */ + +namespace internal { +template +struct traits > +{ + typedef _DiagonalVectorType DiagonalVectorType; + typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::StorageIndex StorageIndex; + typedef DiagonalShape StorageKind; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + Flags = (traits::Flags & LvalueBit) | NoPreferredStorageOrderBit + }; +}; +} + +template +class DiagonalWrapper + : public DiagonalBase >, internal::no_assignment_operator +{ + public: + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef _DiagonalVectorType DiagonalVectorType; + typedef DiagonalWrapper Nested; + #endif + + /** Constructor from expression of diagonal coefficients to wrap. */ + EIGEN_DEVICE_FUNC + explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + + /** \returns a const reference to the wrapped expression of diagonal coefficients. */ + EIGEN_DEVICE_FUNC + const DiagonalVectorType& diagonal() const { return m_diagonal; } + + protected: + typename DiagonalVectorType::Nested m_diagonal; +}; + +/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients + * + * \only_for_vectors + * + * Example: \include MatrixBase_asDiagonal.cpp + * Output: \verbinclude MatrixBase_asDiagonal.out + * + * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() + **/ +template +inline const DiagonalWrapper +MatrixBase::asDiagonal() const +{ + return DiagonalWrapper(derived()); +} + +/** \returns true if *this is approximately equal to a diagonal matrix, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isDiagonal.cpp + * Output: \verbinclude MatrixBase_isDiagonal.out + * + * \sa asDiagonal() + */ +template +bool MatrixBase::isDiagonal(const RealScalar& prec) const +{ + if(cols() != rows()) return false; + RealScalar maxAbsOnDiagonal = static_cast(-1); + for(Index j = 0; j < cols(); ++j) + { + RealScalar absOnDiagonal = numext::abs(coeff(j,j)); + if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal; + } + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < j; ++i) + { + if(!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false; + if(!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false; + } + return true; +} + +namespace internal { + +template<> struct storage_kind_to_shape { typedef DiagonalShape Shape; }; + +struct Diagonal2Dense {}; + +template<> struct AssignmentKind { typedef Diagonal2Dense Kind; }; + +// Diagonal matrix to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment +{ + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + dst.setZero(); + dst.diagonal() = src.diagonal(); + } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { dst.diagonal() += src.diagonal(); } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { dst.diagonal() -= src.diagonal(); } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DIAGONALMATRIX_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DiagonalProduct.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DiagonalProduct.h new file mode 100644 index 0000000..d372b93 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/DiagonalProduct.h @@ -0,0 +1,28 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2007-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONALPRODUCT_H +#define EIGEN_DIAGONALPRODUCT_H + +namespace Eigen { + +/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. + */ +template +template +inline const Product +MatrixBase::operator*(const DiagonalBase &a_diagonal) const +{ + return Product(derived(),a_diagonal.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_DIAGONALPRODUCT_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Dot.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Dot.h new file mode 100644 index 0000000..06ef18b --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Dot.h @@ -0,0 +1,315 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008, 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DOT_H +#define EIGEN_DOT_H + +namespace Eigen { + +namespace internal { + +// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot +// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE +// looking at the static assertions. Thus this is a trick to get better compile errors. +template +struct dot_nocheck +{ + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; + EIGEN_DEVICE_FUNC + static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + { + return a.template binaryExpr(b).sum(); + } +}; + +template +struct dot_nocheck +{ + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; + EIGEN_DEVICE_FUNC + static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + { + return a.transpose().template binaryExpr(b).sum(); + } +}; + +} // end namespace internal + +/** \fn MatrixBase::dot + * \returns the dot product of *this with other. + * + * \only_for_vectors + * + * \note If the scalar type is complex numbers, then this function returns the hermitian + * (sesquilinear) dot product, conjugate-linear in the first variable and linear in the + * second variable. + * + * \sa squaredNorm(), norm() + */ +template +template +EIGEN_DEVICE_FUNC +typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType +MatrixBase::dot(const MatrixBase& other) const +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) +#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG)) + typedef internal::scalar_conj_product_op func; + EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar); +#endif + + eigen_assert(size() == other.size()); + + return internal::dot_nocheck::run(*this, other); +} + +//---------- implementation of L2 norm and related functions ---------- + +/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm. + * In both cases, it consists in the sum of the square of all the matrix entries. + * For vectors, this is also equals to the dot product of \c *this with itself. + * + * \sa dot(), norm(), lpNorm() + */ +template +EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const +{ + return numext::real((*this).cwiseAbs2().sum()); +} + +/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm. + * In both cases, it consists in the square root of the sum of the square of all the matrix entries. + * For vectors, this is also equals to the square root of the dot product of \c *this with itself. + * + * \sa lpNorm(), dot(), squaredNorm() + */ +template +inline typename NumTraits::Scalar>::Real MatrixBase::norm() const +{ + return numext::sqrt(squaredNorm()); +} + +/** \returns an expression of the quotient of \c *this by its own norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. + * + * \only_for_vectors + * + * \sa norm(), normalize() + */ +template +inline const typename MatrixBase::PlainObject +MatrixBase::normalized() const +{ + typedef typename internal::nested_eval::type _Nested; + _Nested n(derived()); + RealScalar z = n.squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + return n / numext::sqrt(z); + else + return n; +} + +/** Normalizes the vector, i.e. divides it by its own norm. + * + * \only_for_vectors + * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * + * \sa norm(), normalized() + */ +template +inline void MatrixBase::normalize() +{ + RealScalar z = squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + derived() /= numext::sqrt(z); +} + +/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow. + * + * \only_for_vectors + * + * This method is analogue to the normalized() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. + * + * \sa stableNorm(), stableNormalize(), normalized() + */ +template +inline const typename MatrixBase::PlainObject +MatrixBase::stableNormalized() const +{ + typedef typename internal::nested_eval::type _Nested; + _Nested n(derived()); + RealScalar w = n.cwiseAbs().maxCoeff(); + RealScalar z = (n/w).squaredNorm(); + if(z>RealScalar(0)) + return n / (numext::sqrt(z)*w); + else + return n; +} + +/** Normalizes the vector while avoid underflow and overflow + * + * \only_for_vectors + * + * This method is analogue to the normalize() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * + * \sa stableNorm(), stableNormalized(), normalize() + */ +template +inline void MatrixBase::stableNormalize() +{ + RealScalar w = cwiseAbs().maxCoeff(); + RealScalar z = (derived()/w).squaredNorm(); + if(z>RealScalar(0)) + derived() /= numext::sqrt(z)*w; +} + +//---------- implementation of other norms ---------- + +namespace internal { + +template +struct lpNorm_selector +{ + typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const MatrixBase& m) + { + EIGEN_USING_STD_MATH(pow) + return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p); + } +}; + +template +struct lpNorm_selector +{ + EIGEN_DEVICE_FUNC + static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) + { + return m.cwiseAbs().sum(); + } +}; + +template +struct lpNorm_selector +{ + EIGEN_DEVICE_FUNC + static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) + { + return m.norm(); + } +}; + +template +struct lpNorm_selector +{ + typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const MatrixBase& m) + { + if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0)) + return RealScalar(0); + return m.cwiseAbs().maxCoeff(); + } +}; + +} // end namespace internal + +/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values + * of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ + * norm, that is the maximum of the absolute values of the coefficients of \c *this. + * + * In all cases, if \c *this is empty, then the value 0 is returned. + * + * \note For matrices, this function does not compute the operator-norm. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. + * + * \sa norm() + */ +template +template +#ifndef EIGEN_PARSED_BY_DOXYGEN +inline typename NumTraits::Scalar>::Real +#else +MatrixBase::RealScalar +#endif +MatrixBase::lpNorm() const +{ + return internal::lpNorm_selector::run(*this); +} + +//---------- implementation of isOrthogonal / isUnitary ---------- + +/** \returns true if *this is approximately orthogonal to \a other, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isOrthogonal.cpp + * Output: \verbinclude MatrixBase_isOrthogonal.out + */ +template +template +bool MatrixBase::isOrthogonal +(const MatrixBase& other, const RealScalar& prec) const +{ + typename internal::nested_eval::type nested(derived()); + typename internal::nested_eval::type otherNested(other.derived()); + return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); +} + +/** \returns true if *this is approximately an unitary matrix, + * within the precision given by \a prec. In the case where the \a Scalar + * type is real numbers, a unitary matrix is an orthogonal matrix, whence the name. + * + * \note This can be used to check whether a family of vectors forms an orthonormal basis. + * Indeed, \c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an + * orthonormal basis. + * + * Example: \include MatrixBase_isUnitary.cpp + * Output: \verbinclude MatrixBase_isUnitary.out + */ +template +bool MatrixBase::isUnitary(const RealScalar& prec) const +{ + typename internal::nested_eval::type self(derived()); + for(Index i = 0; i < cols(); ++i) + { + if(!internal::isApprox(self.col(i).squaredNorm(), static_cast(1), prec)) + return false; + for(Index j = 0; j < i; ++j) + if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast(1), prec)) + return false; + } + return true; +} + +} // end namespace Eigen + +#endif // EIGEN_DOT_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/EigenBase.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/EigenBase.h new file mode 100644 index 0000000..b195506 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/EigenBase.h @@ -0,0 +1,159 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EIGENBASE_H +#define EIGEN_EIGENBASE_H + +namespace Eigen { + +/** \class EigenBase + * \ingroup Core_Module + * + * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). + * + * In other words, an EigenBase object is an object that can be copied into a MatrixBase. + * + * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc. + * + * Notice that this class is trivial, it is only used to disambiguate overloaded functions. + * + * \sa \blank \ref TopicClassHierarchy + */ +template struct EigenBase +{ +// typedef typename internal::plain_matrix_type::type PlainObject; + + /** \brief The interface type of indices + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead. + * \sa StorageIndex, \ref TopicPreprocessorDirectives. + */ + typedef Eigen::Index Index; + + // FIXME is it needed? + typedef typename internal::traits::StorageKind StorageKind; + + /** \returns a reference to the derived object */ + EIGEN_DEVICE_FUNC + Derived& derived() { return *static_cast(this); } + /** \returns a const reference to the derived object */ + EIGEN_DEVICE_FUNC + const Derived& derived() const { return *static_cast(this); } + + EIGEN_DEVICE_FUNC + inline Derived& const_cast_derived() const + { return *static_cast(const_cast(this)); } + EIGEN_DEVICE_FUNC + inline const Derived& const_derived() const + { return *static_cast(this); } + + /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ + EIGEN_DEVICE_FUNC + inline Index rows() const { return derived().rows(); } + /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ + EIGEN_DEVICE_FUNC + inline Index cols() const { return derived().cols(); } + /** \returns the number of coefficients, which is rows()*cols(). + * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC + inline Index size() const { return rows() * cols(); } + + /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */ + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& dst) const + { derived().evalTo(dst); } + + /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */ + template + EIGEN_DEVICE_FUNC + inline void addTo(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + typename Dest::PlainObject res(rows(),cols()); + evalTo(res); + dst += res; + } + + /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */ + template + EIGEN_DEVICE_FUNC + inline void subTo(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + typename Dest::PlainObject res(rows(),cols()); + evalTo(res); + dst -= res; + } + + /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */ + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + dst = dst * this->derived(); + } + + /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */ + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + dst = this->derived() * dst; + } + +}; + +/*************************************************************************** +* Implementation of matrix base methods +***************************************************************************/ + +/** \brief Copies the generic expression \a other into *this. + * + * \details The expression must provide a (templated) evalTo(Derived& dst) const + * function which does the actual job. In practice, this allows any user to write + * its own special matrix without having to modify MatrixBase + * + * \returns a reference to *this. + */ +template +template +EIGEN_DEVICE_FUNC +Derived& DenseBase::operator=(const EigenBase &other) +{ + call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +Derived& DenseBase::operator+=(const EigenBase &other) +{ + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +Derived& DenseBase::operator-=(const EigenBase &other) +{ + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_EIGENBASE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ForceAlignedAccess.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ForceAlignedAccess.h new file mode 100644 index 0000000..7b08b45 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ForceAlignedAccess.h @@ -0,0 +1,146 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FORCEALIGNEDACCESS_H +#define EIGEN_FORCEALIGNEDACCESS_H + +namespace Eigen { + +/** \class ForceAlignedAccess + * \ingroup Core_Module + * + * \brief Enforce aligned packet loads and stores regardless of what is requested + * + * \param ExpressionType the type of the object of which we are forcing aligned packet access + * + * This class is the return type of MatrixBase::forceAlignedAccess() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::forceAlignedAccess() + */ + +namespace internal { +template +struct traits > : public traits +{}; +} + +template class ForceAlignedAccess + : public internal::dense_xpr_base< ForceAlignedAccess >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) + + EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const + { + return m_expression.coeff(row, col); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) + { + return m_expression.const_cast_derived().coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const + { + return m_expression.coeff(index); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(Index row, Index col) const + { + return m_expression.template packet(row, col); + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(row, col, x); + } + + template + inline const PacketScalar packet(Index index) const + { + return m_expression.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(index, x); + } + + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } + + protected: + const ExpressionType& m_expression; + + private: + ForceAlignedAccess& operator=(const ForceAlignedAccess&); +}; + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(),class ForceAlignedAccess + */ +template +inline const ForceAlignedAccess +MatrixBase::forceAlignedAccess() const +{ + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(), class ForceAlignedAccess + */ +template +inline ForceAlignedAccess +MatrixBase::forceAlignedAccess() +{ + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline typename internal::add_const_on_value_type,Derived&>::type>::type +MatrixBase::forceAlignedAccessIf() const +{ + return derived(); // FIXME This should not work but apparently is never used +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline typename internal::conditional,Derived&>::type +MatrixBase::forceAlignedAccessIf() +{ + return derived(); // FIXME This should not work but apparently is never used +} + +} // end namespace Eigen + +#endif // EIGEN_FORCEALIGNEDACCESS_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Fuzzy.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Fuzzy.h new file mode 100644 index 0000000..3e403a0 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Fuzzy.h @@ -0,0 +1,155 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FUZZY_H +#define EIGEN_FUZZY_H + +namespace Eigen { + +namespace internal +{ + +template::IsInteger> +struct isApprox_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) + { + typename internal::nested_eval::type nested(x); + typename internal::nested_eval::type otherNested(y); + return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); + } +}; + +template +struct isApprox_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&) + { + return x.matrix() == y.matrix(); + } +}; + +template::IsInteger> +struct isMuchSmallerThan_object_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) + { + return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum(); + } +}; + +template +struct isMuchSmallerThan_object_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&) + { + return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); + } +}; + +template::IsInteger> +struct isMuchSmallerThan_scalar_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec) + { + return x.cwiseAbs2().sum() <= numext::abs2(prec * y); + } +}; + +template +struct isMuchSmallerThan_scalar_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&) + { + return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); + } +}; + +} // end namespace internal + + +/** \returns \c true if \c *this is approximately equal to \a other, within the precision + * determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$ + * are considered to be approximately equal within precision \f$ p \f$ if + * \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f] + * For matrices, the comparison is done using the Hilbert-Schmidt norm (aka Frobenius norm + * L2 norm). + * + * \note Because of the multiplicativeness of this comparison, one can't use this function + * to check whether \c *this is approximately equal to the zero matrix or vector. + * Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix + * or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const + * RealScalar&, RealScalar) instead. + * + * \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const + */ +template +template +bool DenseBase::isApprox( + const DenseBase& other, + const RealScalar& prec +) const +{ + return internal::isApprox_selector::run(derived(), other.derived(), prec); +} + +/** \returns \c true if the norm of \c *this is much smaller than \a other, + * within the precision determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is + * considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if + * \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f] + * + * For matrices, the comparison is done using the Hilbert-Schmidt norm. For this reason, + * the value of the reference scalar \a other should come from the Hilbert-Schmidt norm + * of a reference matrix of same dimensions. + * + * \sa isApprox(), isMuchSmallerThan(const DenseBase&, RealScalar) const + */ +template +bool DenseBase::isMuchSmallerThan( + const typename NumTraits::Real& other, + const RealScalar& prec +) const +{ + return internal::isMuchSmallerThan_scalar_selector::run(derived(), other, prec); +} + +/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other, + * within the precision determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is + * considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if + * \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f] + * For matrices, the comparison is done using the Hilbert-Schmidt norm. + * + * \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const + */ +template +template +bool DenseBase::isMuchSmallerThan( + const DenseBase& other, + const RealScalar& prec +) const +{ + return internal::isMuchSmallerThan_object_selector::run(derived(), other.derived(), prec); +} + +} // end namespace Eigen + +#endif // EIGEN_FUZZY_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/GeneralProduct.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/GeneralProduct.h new file mode 100644 index 0000000..0f16cd8 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/GeneralProduct.h @@ -0,0 +1,454 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERAL_PRODUCT_H +#define EIGEN_GENERAL_PRODUCT_H + +namespace Eigen { + +enum { + Large = 2, + Small = 3 +}; + +namespace internal { + +template struct product_type_selector; + +template struct product_size_category +{ + enum { is_large = MaxSize == Dynamic || + Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || + (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), + value = is_large ? Large + : Size == 1 ? 1 + : Small + }; +}; + +template struct product_type +{ + typedef typename remove_all::type _Lhs; + typedef typename remove_all::type _Rhs; + enum { + MaxRows = traits<_Lhs>::MaxRowsAtCompileTime, + Rows = traits<_Lhs>::RowsAtCompileTime, + MaxCols = traits<_Rhs>::MaxColsAtCompileTime, + Cols = traits<_Rhs>::ColsAtCompileTime, + MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime, + traits<_Rhs>::MaxRowsAtCompileTime), + Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime, + traits<_Rhs>::RowsAtCompileTime) + }; + + // the splitting into different lines of code here, introducing the _select enums and the typedef below, + // is to work around an internal compiler error with gcc 4.1 and 4.2. +private: + enum { + rows_select = product_size_category::value, + cols_select = product_size_category::value, + depth_select = product_size_category::value + }; + typedef product_type_selector selector; + +public: + enum { + value = selector::ret, + ret = selector::ret + }; +#ifdef EIGEN_DEBUG_PRODUCT + static void debug() + { + EIGEN_DEBUG_VAR(Rows); + EIGEN_DEBUG_VAR(Cols); + EIGEN_DEBUG_VAR(Depth); + EIGEN_DEBUG_VAR(rows_select); + EIGEN_DEBUG_VAR(cols_select); + EIGEN_DEBUG_VAR(depth_select); + EIGEN_DEBUG_VAR(value); + } +#endif +}; + +/* The following allows to select the kind of product at compile time + * based on the three dimensions of the product. + * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ +// FIXME I'm not sure the current mapping is the ideal one. +template struct product_type_selector { enum { ret = OuterProduct }; }; +template struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; }; +template struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; +template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; }; +template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = GemvProduct }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; + +} // end namespace internal + +/*********************************************************************** +* Implementation of Inner Vector Vector Product +***********************************************************************/ + +// FIXME : maybe the "inner product" could return a Scalar +// instead of a 1x1 matrix ?? +// Pro: more natural for the user +// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix +// product ends up to a row-vector times col-vector product... To tackle this use +// case, we could have a specialization for Block with: operator=(Scalar x); + +/*********************************************************************** +* Implementation of Outer Vector Vector Product +***********************************************************************/ + +/*********************************************************************** +* Implementation of General Matrix Vector Product +***********************************************************************/ + +/* According to the shape/flags of the matrix we have to distinghish 3 different cases: + * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine + * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine + * 3 - all other cases are handled using a simple loop along the outer-storage direction. + * Therefore we need a lower level meta selector. + * Furthermore, if the matrix is the rhs, then the product has to be transposed. + */ +namespace internal { + +template +struct gemv_dense_selector; + +} // end namespace internal + +namespace internal { + +template struct gemv_static_vector_if; + +template +struct gemv_static_vector_if +{ + EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } +}; + +template +struct gemv_static_vector_if +{ + EIGEN_STRONG_INLINE Scalar* data() { return 0; } +}; + +template +struct gemv_static_vector_if +{ + enum { + ForceAlignment = internal::packet_traits::Vectorizable, + PacketSize = internal::packet_traits::size + }; + #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 + internal::plain_array m_data; + EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } + #else + // Some architectures cannot align on the stack, + // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. + internal::plain_array m_data; + EIGEN_STRONG_INLINE Scalar* data() { + return ForceAlignment + ? reinterpret_cast((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) + : m_data.array; + } + #endif +}; + +// The vector is on the left => transposition +template +struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_dense_selector + ::run(rhs.transpose(), lhs.transpose(), destT, alpha); + } +}; + +template<> struct gemv_dense_selector +{ + template + static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; + + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); + + // make sure Dest is a compile-time vector type (bug 1166) + typedef typename conditional::type ActualDest; + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), + ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), + MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal + }; + + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); + + if(!MightCannotUseDest) + { + // shortcut if we are sure to be able to use dest directly, + // this ease the compiler to generate cleaner and more optimzized code for most common cases + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + dest.data(), 1, + compatibleAlpha); + } + else + { + gemv_static_vector_if static_dest; + + const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); + + if(!evalToDest) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; + } + + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + actualDestPtr, 1, + compatibleAlpha); + + if (!evalToDest) + { + if(!alphaIsCompatible) + dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } + } + } +}; + +template<> struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; + + typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 + }; + + gemv_static_vector_if static_rhs; + + ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), + DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); + + if(!DirectlyUseRhs) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = actualRhs.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + Map(actualRhsPtr, actualRhs.size()) = actualRhs; + } + + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhsPtr, 1), + dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166) + actualAlpha); + } +}; + +template<> struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + EIGEN_STATIC_ASSERT((!nested_eval::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); + // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp + typename nested_eval::type actual_rhs(rhs); + const Index size = rhs.rows(); + for(Index k=0; k struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + EIGEN_STATIC_ASSERT((!nested_eval::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); + typename nested_eval::type actual_rhs(rhs); + const Index rows = dest.rows(); + for(Index i=0; i +template +inline const Product +MatrixBase::operator*(const MatrixBase &other) const +{ + // A note regarding the function declaration: In MSVC, this function will sometimes + // not be inlined since DenseStorage is an unwindable object for dynamic + // matrices and product types are holding a member to store the result. + // Thus it does not help tagging this function with EIGEN_STRONG_INLINE. + enum { + ProductIsValid = Derived::ColsAtCompileTime==Dynamic + || OtherDerived::RowsAtCompileTime==Dynamic + || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), + AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwiseProduct(v2) + EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) +#ifdef EIGEN_DEBUG_PRODUCT + internal::product_type::debug(); +#endif + + return Product(derived(), other.derived()); +} + +#endif // __CUDACC__ + +/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. + * + * The returned product will behave like any other expressions: the coefficients of the product will be + * computed once at a time as requested. This might be useful in some extremely rare cases when only + * a small and no coherent fraction of the result's coefficients have to be computed. + * + * \warning This version of the matrix product can be much much slower. So use it only if you know + * what you are doing and that you measured a true speed improvement. + * + * \sa operator*(const MatrixBase&) + */ +template +template +const Product +MatrixBase::lazyProduct(const MatrixBase &other) const +{ + enum { + ProductIsValid = Derived::ColsAtCompileTime==Dynamic + || OtherDerived::RowsAtCompileTime==Dynamic + || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), + AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwiseProduct(v2) + EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) + + return Product(derived(), other.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/GenericPacketMath.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/GenericPacketMath.h new file mode 100644 index 0000000..029f8ac --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/GenericPacketMath.h @@ -0,0 +1,593 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERIC_PACKET_MATH_H +#define EIGEN_GENERIC_PACKET_MATH_H + +namespace Eigen { + +namespace internal { + +/** \internal + * \file GenericPacketMath.h + * + * Default implementation for types not supported by the vectorization. + * In practice these functions are provided to make easier the writing + * of generic vectorized code. + */ + +#ifndef EIGEN_DEBUG_ALIGNED_LOAD +#define EIGEN_DEBUG_ALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_LOAD +#define EIGEN_DEBUG_UNALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_ALIGNED_STORE +#define EIGEN_DEBUG_ALIGNED_STORE +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_STORE +#define EIGEN_DEBUG_UNALIGNED_STORE +#endif + +struct default_packet_traits +{ + enum { + HasHalfPacket = 0, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 1, + HasMin = 1, + HasMax = 1, + HasConj = 1, + HasSetLinear = 1, + HasBlend = 0, + + HasDiv = 0, + HasSqrt = 0, + HasRsqrt = 0, + HasExp = 0, + HasLog = 0, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 0, + + HasSin = 0, + HasCos = 0, + HasTan = 0, + HasASin = 0, + HasACos = 0, + HasATan = 0, + HasSinh = 0, + HasCosh = 0, + HasTanh = 0, + HasLGamma = 0, + HasDiGamma = 0, + HasZeta = 0, + HasPolygamma = 0, + HasErf = 0, + HasErfc = 0, + HasIGamma = 0, + HasIGammac = 0, + HasBetaInc = 0, + + HasRound = 0, + HasFloor = 0, + HasCeil = 0, + + HasSign = 0 + }; +}; + +template struct packet_traits : default_packet_traits +{ + typedef T type; + typedef T half; + enum { + Vectorizable = 0, + size = 1, + AlignedOnScalar = 0, + HasHalfPacket = 0 + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0 + }; +}; + +template struct packet_traits : packet_traits { }; + +template struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +/** \internal \returns static_cast(a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a) { + return static_cast(a); +} +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/) { + return static_cast(a); +} + +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) { + return static_cast(a); +} + +/** \internal \returns a + b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +padd(const Packet& a, + const Packet& b) { return a+b; } + +/** \internal \returns a - b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +psub(const Packet& a, + const Packet& b) { return a-b; } + +/** \internal \returns -a (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pnegate(const Packet& a) { return -a; } + +/** \internal \returns conj(a) (coeff-wise) */ + +template EIGEN_DEVICE_FUNC inline Packet +pconj(const Packet& a) { return numext::conj(a); } + +/** \internal \returns a * b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pmul(const Packet& a, + const Packet& b) { return a*b; } + +/** \internal \returns a / b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pdiv(const Packet& a, + const Packet& b) { return a/b; } + +/** \internal \returns the min of \a a and \a b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pmin(const Packet& a, + const Packet& b) { return numext::mini(a, b); } + +/** \internal \returns the max of \a a and \a b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pmax(const Packet& a, + const Packet& b) { return numext::maxi(a, b); } + +/** \internal \returns the absolute value of \a a */ +template EIGEN_DEVICE_FUNC inline Packet +pabs(const Packet& a) { using std::abs; return abs(a); } + +/** \internal \returns the phase angle of \a a */ +template EIGEN_DEVICE_FUNC inline Packet +parg(const Packet& a) { using numext::arg; return arg(a); } + +/** \internal \returns the bitwise and of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pand(const Packet& a, const Packet& b) { return a & b; } + +/** \internal \returns the bitwise or of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +por(const Packet& a, const Packet& b) { return a | b; } + +/** \internal \returns the bitwise xor of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pxor(const Packet& a, const Packet& b) { return a ^ b; } + +/** \internal \returns the bitwise andnot of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pandnot(const Packet& a, const Packet& b) { return a & (!b); } + +/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ +template EIGEN_DEVICE_FUNC inline Packet +pload(const typename unpacket_traits::type* from) { return *from; } + +/** \internal \returns a packet version of \a *from, (un-aligned load) */ +template EIGEN_DEVICE_FUNC inline Packet +ploadu(const typename unpacket_traits::type* from) { return *from; } + +/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ +template EIGEN_DEVICE_FUNC inline Packet +pset1(const typename unpacket_traits::type& a) { return a; } + +/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */ +template EIGEN_DEVICE_FUNC inline Packet +pload1(const typename unpacket_traits::type *a) { return pset1(*a); } + +/** \internal \returns a packet with elements of \a *from duplicated. + * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and + * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} + * Currently, this function is only used for scalar * complex products. + */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet +ploaddup(const typename unpacket_traits::type* from) { return *from; } + +/** \internal \returns a packet with elements of \a *from quadrupled. + * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and + * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]} + * Currently, this function is only used in matrix products. + * For packet-size smaller or equal to 4, this function is equivalent to pload1 + */ +template EIGEN_DEVICE_FUNC inline Packet +ploadquad(const typename unpacket_traits::type* from) +{ return pload1(from); } + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * a2 = pload1(a+2); + * a3 = pload1(a+3); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast2 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast4(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1, Packet& a2, Packet& a3) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); + a2 = pload1(a+2); + a3 = pload1(a+3); +} + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast4 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast2(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); +} + +/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet +plset(const typename unpacket_traits::type& a) { return a; } + +/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ +template EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) +{ (*to) = from; } + +/** \internal copy the packet \a from to \a *to, (un-aligned store) */ +template EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) +{ (*to) = from; } + + template EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) + { return ploadu(from); } + + template EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) + { pstore(to, from); } + +/** \internal tries to do cache prefetching of \a addr */ +template EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) +{ +#ifdef __CUDA_ARCH__ +#if defined(__LP64__) + // 64-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); +#else + // 32-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); +#endif +#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC) + __builtin_prefetch(addr); +#endif +} + +/** \internal \returns the first element of a packet */ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type pfirst(const Packet& a) +{ return a; } + +/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ +template EIGEN_DEVICE_FUNC inline Packet +preduxp(const Packet* vecs) { return vecs[0]; } + +/** \internal \returns the sum of the elements of \a a*/ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux(const Packet& a) +{ return a; } + +/** \internal \returns the sum of the elements of \a a by block of 4 elements. + * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7} + * For packet-size smaller or equal to 4, this boils down to a noop. + */ +template EIGEN_DEVICE_FUNC inline +typename conditional<(unpacket_traits::size%8)==0,typename unpacket_traits::half,Packet>::type +predux_downto4(const Packet& a) +{ return a; } + +/** \internal \returns the product of the elements of \a a*/ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_mul(const Packet& a) +{ return a; } + +/** \internal \returns the min of the elements of \a a*/ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) +{ return a; } + +/** \internal \returns the max of the elements of \a a*/ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) +{ return a; } + +/** \internal \returns the reversed elements of \a a*/ +template EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) +{ return a; } + +/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ +template EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) +{ + // FIXME: uncomment the following in case we drop the internal imag and real functions. +// using std::imag; +// using std::real; + return Packet(imag(a),real(a)); +} + +/************************** +* Special math functions +***************************/ + +/** \internal \returns the sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psin(const Packet& a) { using std::sin; return sin(a); } + +/** \internal \returns the cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pcos(const Packet& a) { using std::cos; return cos(a); } + +/** \internal \returns the tan of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ptan(const Packet& a) { using std::tan; return tan(a); } + +/** \internal \returns the arc sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pasin(const Packet& a) { using std::asin; return asin(a); } + +/** \internal \returns the arc cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pacos(const Packet& a) { using std::acos; return acos(a); } + +/** \internal \returns the arc tangent of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet patan(const Packet& a) { using std::atan; return atan(a); } + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psinh(const Packet& a) { using std::sinh; return sinh(a); } + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); } + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } + +/** \internal \returns the exp of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pexp(const Packet& a) { using std::exp; return exp(a); } + +/** \internal \returns the log of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog(const Packet& a) { using std::log; return log(a); } + +/** \internal \returns the log1p of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog1p(const Packet& a) { return numext::log1p(a); } + +/** \internal \returns the log10 of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog10(const Packet& a) { using std::log10; return log10(a); } + +/** \internal \returns the square-root of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } + +/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet prsqrt(const Packet& a) { + return pdiv(pset1(1), psqrt(a)); +} + +/** \internal \returns the rounded value of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pround(const Packet& a) { using numext::round; return round(a); } + +/** \internal \returns the floor of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } + +/** \internal \returns the ceil of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } + +/*************************************************************************** +* The following functions might not have to be overwritten for vectorized types +***************************************************************************/ + +/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */ +// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type) +template +inline void pstore1(typename unpacket_traits::type* to, const typename unpacket_traits::type& a) +{ + pstore(to, pset1(a)); +} + +/** \internal \returns a * b + c (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pmadd(const Packet& a, + const Packet& b, + const Packet& c) +{ return padd(pmul(a, b),c); } + +/** \internal \returns a packet version of \a *from. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits::type* from) +{ + if(Alignment >= unpacket_traits::alignment) + return pload(from); + else + return ploadu(from); +} + +/** \internal copy the packet \a from to \a *to. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) +{ + if(Alignment >= unpacket_traits::alignment) + pstore(to, from); + else + pstoreu(to, from); +} + +/** \internal \returns a packet version of \a *from. + * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the + * hardware if available to speedup the loading of data that won't be modified + * by the current computation. + */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits::type* from) +{ + return ploadt(from); +} + +/** \internal default implementation of palign() allowing partial specialization */ +template +struct palign_impl +{ + // by default data are aligned, so there is nothing to be done :) + static inline void run(PacketType&, const PacketType&) {} +}; + +/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements + * of \a first and \a Offset first elements of \a second. + * + * This function is currently only used to optimize matrix-vector products on unligned matrices. + * It takes 2 packets that represent a contiguous memory array, and returns a packet starting + * at the position \a Offset. For instance, for packets of 4 elements, we have: + * Input: + * - first = {f0,f1,f2,f3} + * - second = {s0,s1,s2,s3} + * Output: + * - if Offset==0 then {f0,f1,f2,f3} + * - if Offset==1 then {f1,f2,f3,s0} + * - if Offset==2 then {f2,f3,s0,s1} + * - if Offset==3 then {f3,s0,s1,s3} + */ +template +inline void palign(PacketType& first, const PacketType& second) +{ + palign_impl::run(first,second); +} + +/*************************************************************************** +* Fast complex products (GCC generates a function call which is very slow) +***************************************************************************/ + +// Eigen+CUDA does not support complexes. +#ifndef __CUDACC__ + +template<> inline std::complex pmul(const std::complex& a, const std::complex& b) +{ return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } + +template<> inline std::complex pmul(const std::complex& a, const std::complex& b) +{ return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } + +#endif + + +/*************************************************************************** + * PacketBlock, that is a collection of N packets where the number of words + * in the packet is a multiple of N. +***************************************************************************/ +template ::size> struct PacketBlock { + Packet packet[N]; +}; + +template EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock& /*kernel*/) { + // Nothing to do in the scalar case, i.e. a 1x1 matrix. +} + +/*************************************************************************** + * Selector, i.e. vector of N boolean values used to select (i.e. blend) + * words from 2 packets. +***************************************************************************/ +template struct Selector { + bool select[N]; +}; + +template EIGEN_DEVICE_FUNC inline Packet +pblend(const Selector::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) { + return ifPacket.select[0] ? thenPacket : elsePacket; +} + +/** \internal \returns \a a with the first coefficient replaced by the scalar b */ +template EIGEN_DEVICE_FUNC inline Packet +pinsertfirst(const Packet& a, typename unpacket_traits::type b) +{ + // Default implementation based on pblend. + // It must be specialized for higher performance. + Selector::size> mask; + mask.select[0] = true; + // This for loop should be optimized away by the compiler. + for(Index i=1; i::size; ++i) + mask.select[i] = false; + return pblend(mask, pset1(b), a); +} + +/** \internal \returns \a a with the last coefficient replaced by the scalar b */ +template EIGEN_DEVICE_FUNC inline Packet +pinsertlast(const Packet& a, typename unpacket_traits::type b) +{ + // Default implementation based on pblend. + // It must be specialized for higher performance. + Selector::size> mask; + // This for loop should be optimized away by the compiler. + for(Index i=0; i::size-1; ++i) + mask.select[i] = false; + mask.select[unpacket_traits::size-1] = true; + return pblend(mask, pset1(b), a); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_GENERIC_PACKET_MATH_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/GlobalFunctions.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/GlobalFunctions.h new file mode 100644 index 0000000..769dc25 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/GlobalFunctions.h @@ -0,0 +1,187 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010-2016 Gael Guennebaud +// Copyright (C) 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GLOBAL_FUNCTIONS_H +#define EIGEN_GLOBAL_FUNCTIONS_H + +#ifdef EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \ + /** \returns an expression of the coefficient-wise DOC_OP of \a x + + DOC_DETAILS + + \sa Math functions, class CwiseUnaryOp + */ \ + template \ + inline const Eigen::CwiseUnaryOp, const Derived> \ + NAME(const Eigen::ArrayBase& x); + +#else + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \ + template \ + inline const Eigen::CwiseUnaryOp, const Derived> \ + (NAME)(const Eigen::ArrayBase& x) { \ + return Eigen::CwiseUnaryOp, const Derived>(x.derived()); \ + } + +#endif // EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ + \ + template \ + struct NAME##_retval > \ + { \ + typedef const Eigen::CwiseUnaryOp, const Derived> type; \ + }; \ + template \ + struct NAME##_impl > \ + { \ + static inline typename NAME##_retval >::type run(const Eigen::ArrayBase& x) \ + { \ + return typename NAME##_retval >::type(x.derived()); \ + } \ + }; + +namespace Eigen +{ + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign) + + /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent. + * + * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar). + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Derived,Constant > + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent), + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent) { + return x.derived().pow(exponent); + } + + template + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow) + pow(const Eigen::ArrayBase& x, const typename Derived::Scalar& exponent) { + return x.derived().pow(exponent); + } +#endif + + /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power. + * + * Example: \include Cwise_array_power_array.cpp + * Output: \verbinclude Cwise_array_power_array.out + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ + template + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) + { + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + x.derived(), + exponents.derived() + ); + } + + /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power between a scalar and an array of exponents. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). + * + * Example: \include Cwise_scalar_power_array.cpp + * Output: \verbinclude Cwise_scalar_power_array.out + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Constant,Derived> + pow(const Scalar& x,const Eigen::ArrayBase& x); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar), + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type + pow(const Scalar& x, const Eigen::ArrayBase& exponents) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); + } + + template + inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow) + pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); + } +#endif + + + namespace internal + { + EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op) + EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op) + EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op) + } +} + +// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...) + +#endif // EIGEN_GLOBAL_FUNCTIONS_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/IO.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/IO.h new file mode 100644 index 0000000..da7fd6c --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/IO.h @@ -0,0 +1,225 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_IO_H +#define EIGEN_IO_H + +namespace Eigen { + +enum { DontAlignCols = 1 }; +enum { StreamPrecision = -1, + FullPrecision = -2 }; + +namespace internal { +template +std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt); +} + +/** \class IOFormat + * \ingroup Core_Module + * + * \brief Stores a set of parameters controlling the way matrices are printed + * + * List of available parameters: + * - \b precision number of digits for floating point values, or one of the special constants \c StreamPrecision and \c FullPrecision. + * The default is the special value \c StreamPrecision which means to use the + * stream's own precision setting, as set for instance using \c cout.precision(3). The other special value + * \c FullPrecision means that the number of digits will be computed to match the full precision of each floating-point + * type. + * - \b flags an OR-ed combination of flags, the default value is 0, the only currently available flag is \c DontAlignCols which + * allows to disable the alignment of columns, resulting in faster code. + * - \b coeffSeparator string printed between two coefficients of the same row + * - \b rowSeparator string printed between two rows + * - \b rowPrefix string printed at the beginning of each row + * - \b rowSuffix string printed at the end of each row + * - \b matPrefix string printed at the beginning of the matrix + * - \b matSuffix string printed at the end of the matrix + * + * Example: \include IOFormat.cpp + * Output: \verbinclude IOFormat.out + * + * \sa DenseBase::format(), class WithFormat + */ +struct IOFormat +{ + /** Default constructor, see class IOFormat for the meaning of the parameters */ + IOFormat(int _precision = StreamPrecision, int _flags = 0, + const std::string& _coeffSeparator = " ", + const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="", + const std::string& _matPrefix="", const std::string& _matSuffix="") + : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator), + rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags) + { + // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline + // don't add rowSpacer if columns are not to be aligned + if((flags & DontAlignCols)) + return; + int i = int(matSuffix.length())-1; + while (i>=0 && matSuffix[i]!='\n') + { + rowSpacer += ' '; + i--; + } + } + std::string matPrefix, matSuffix; + std::string rowPrefix, rowSuffix, rowSeparator, rowSpacer; + std::string coeffSeparator; + int precision; + int flags; +}; + +/** \class WithFormat + * \ingroup Core_Module + * + * \brief Pseudo expression providing matrix output with given format + * + * \tparam ExpressionType the type of the object on which IO stream operations are performed + * + * This class represents an expression with stream operators controlled by a given IOFormat. + * It is the return type of DenseBase::format() + * and most of the time this is the only way it is used. + * + * See class IOFormat for some examples. + * + * \sa DenseBase::format(), class IOFormat + */ +template +class WithFormat +{ + public: + + WithFormat(const ExpressionType& matrix, const IOFormat& format) + : m_matrix(matrix), m_format(format) + {} + + friend std::ostream & operator << (std::ostream & s, const WithFormat& wf) + { + return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format); + } + + protected: + typename ExpressionType::Nested m_matrix; + IOFormat m_format; +}; + +namespace internal { + +// NOTE: This helper is kept for backward compatibility with previous code specializing +// this internal::significant_decimals_impl structure. In the future we should directly +// call digits10() which has been introduced in July 2016 in 3.3. +template +struct significant_decimals_impl +{ + static inline int run() + { + return NumTraits::digits10(); + } +}; + +/** \internal + * print the matrix \a _m to the output stream \a s using the output format \a fmt */ +template +std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt) +{ + if(_m.size() == 0) + { + s << fmt.matPrefix << fmt.matSuffix; + return s; + } + + typename Derived::Nested m = _m; + typedef typename Derived::Scalar Scalar; + + Index width = 0; + + std::streamsize explicit_precision; + if(fmt.precision == StreamPrecision) + { + explicit_precision = 0; + } + else if(fmt.precision == FullPrecision) + { + if (NumTraits::IsInteger) + { + explicit_precision = 0; + } + else + { + explicit_precision = significant_decimals_impl::run(); + } + } + else + { + explicit_precision = fmt.precision; + } + + std::streamsize old_precision = 0; + if(explicit_precision) old_precision = s.precision(explicit_precision); + + bool align_cols = !(fmt.flags & DontAlignCols); + if(align_cols) + { + // compute the largest width + for(Index j = 0; j < m.cols(); ++j) + for(Index i = 0; i < m.rows(); ++i) + { + std::stringstream sstr; + sstr.copyfmt(s); + sstr << m.coeff(i,j); + width = std::max(width, Index(sstr.str().length())); + } + } + s << fmt.matPrefix; + for(Index i = 0; i < m.rows(); ++i) + { + if (i) + s << fmt.rowSpacer; + s << fmt.rowPrefix; + if(width) s.width(width); + s << m.coeff(i, 0); + for(Index j = 1; j < m.cols(); ++j) + { + s << fmt.coeffSeparator; + if (width) s.width(width); + s << m.coeff(i, j); + } + s << fmt.rowSuffix; + if( i < m.rows() - 1) + s << fmt.rowSeparator; + } + s << fmt.matSuffix; + if(explicit_precision) s.precision(old_precision); + return s; +} + +} // end namespace internal + +/** \relates DenseBase + * + * Outputs the matrix, to the given stream. + * + * If you wish to print the matrix with a format different than the default, use DenseBase::format(). + * + * It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers. + * If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default parameters. + * + * \sa DenseBase::format() + */ +template +std::ostream & operator << +(std::ostream & s, + const DenseBase & m) +{ + return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT); +} + +} // end namespace Eigen + +#endif // EIGEN_IO_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Inverse.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Inverse.h new file mode 100644 index 0000000..b76f043 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Inverse.h @@ -0,0 +1,118 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_H +#define EIGEN_INVERSE_H + +namespace Eigen { + +template class InverseImpl; + +namespace internal { + +template +struct traits > + : traits +{ + typedef typename XprType::PlainObject PlainObject; + typedef traits BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit + }; +}; + +} // end namespace internal + +/** \class Inverse + * + * \brief Expression of the inverse of another expression + * + * \tparam XprType the type of the expression we are taking the inverse + * + * This class represents an abstract expression of A.inverse() + * and most of the time this is the only way it is used. + * + */ +template +class Inverse : public InverseImpl::StorageKind> +{ +public: + typedef typename XprType::StorageIndex StorageIndex; + typedef typename XprType::PlainObject PlainObject; + typedef typename XprType::Scalar Scalar; + typedef typename internal::ref_selector::type XprTypeNested; + typedef typename internal::remove_all::type XprTypeNestedCleaned; + typedef typename internal::ref_selector::type Nested; + typedef typename internal::remove_all::type NestedExpression; + + explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr) + : m_xpr(xpr) + {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + + EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } + +protected: + XprTypeNested m_xpr; +}; + +// Generic API dispatcher +template +class InverseImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; + typedef typename XprType::Scalar Scalar; +private: + + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +namespace internal { + +/** \internal + * \brief Default evaluator for Inverse expression. + * + * This default evaluator for Inverse expression simply evaluate the inverse into a temporary + * by a call to internal::call_assignment_no_alias. + * Therefore, inverse implementers only have to specialize Assignment, ...> for + * there own nested expression. + * + * \sa class Inverse + */ +template +struct unary_evaluator > + : public evaluator::PlainObject> +{ + typedef Inverse InverseType; + typedef typename InverseType::PlainObject PlainObject; + typedef evaluator Base; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + unary_evaluator(const InverseType& inv_xpr) + : m_result(inv_xpr.rows(), inv_xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + internal::call_assignment_no_alias(m_result, inv_xpr); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Map.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Map.h new file mode 100644 index 0000000..06d1967 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Map.h @@ -0,0 +1,164 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MAP_H +#define EIGEN_MAP_H + +namespace Eigen { + +namespace internal { +template +struct traits > + : public traits +{ + typedef traits TraitsBase; + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + Alignment = int(MapOptions)&int(AlignedMask), + Flags0 = TraitsBase::Flags & (~NestByRefBit), + Flags = is_lvalue::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) + }; +private: + enum { Options }; // Expressions don't have Options +}; +} + +/** \class Map + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing array of data. + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. + * The default is \c #Unaligned. + * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout + * of an ordinary, contiguous array. This can be overridden by specifying strides. + * The type passed here must be a specialization of the Stride template, see examples below. + * + * This class represents a matrix or vector expression mapping an existing array of data. + * It can be used to let Eigen interface without any overhead with non-Eigen data structures, + * such as plain C arrays or structures from other libraries. By default, it assumes that the + * data is laid out contiguously in memory. You can however override this by explicitly specifying + * inner and outer strides. + * + * Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix: + * \include Map_simple.cpp + * Output: \verbinclude Map_simple.out + * + * If you need to map non-contiguous arrays, you can do so by specifying strides: + * + * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer + * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time + * fixed value. + * \include Map_inner_stride.cpp + * Output: \verbinclude Map_inner_stride.out + * + * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping + * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns. + * Here, we're specifying the outer stride as a runtime parameter. Note that here \c OuterStride<> is + * a short version of \c OuterStride because the default template parameter of OuterStride + * is \c Dynamic + * \include Map_outer_stride.cpp + * Output: \verbinclude Map_outer_stride.out + * + * For more details and for an example of specifying both an inner and an outer stride, see class Stride. + * + * \b Tip: to change the array of data mapped by a Map object, you can use the C++ + * placement new syntax: + * + * Example: \include Map_placement_new.cpp + * Output: \verbinclude Map_placement_new.out + * + * This class is the return type of PlainObjectBase::Map() but can also be used directly. + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ +template class Map + : public MapBase > +{ + public: + + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Map) + + typedef typename Base::PointerType PointerType; + typedef PointerType PointerArgType; + EIGEN_DEVICE_FUNC + inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; } + + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; + } + + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() + : IsVectorAtCompileTime ? this->size() + : int(Flags)&RowMajorBit ? this->cols() + : this->rows(); + } + + /** Constructor in the fixed-size case. + * + * \param dataPtr pointer to the array to map + * \param stride optional Stride object, passing the strides. + */ + EIGEN_DEVICE_FUNC + explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr)), m_stride(stride) + { + PlainObjectType::Base::_check_template_params(); + } + + /** Constructor in the dynamic-size vector case. + * + * \param dataPtr pointer to the array to map + * \param size the size of the vector expression + * \param stride optional Stride object, passing the strides. + */ + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride) + { + PlainObjectType::Base::_check_template_params(); + } + + /** Constructor in the dynamic-size matrix case. + * + * \param dataPtr pointer to the array to map + * \param rows the number of rows of the matrix expression + * \param cols the number of columns of the matrix expression + * \param stride optional Stride object, passing the strides. + */ + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride) + { + PlainObjectType::Base::_check_template_params(); + } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map) + + protected: + StrideType m_stride; +}; + + +} // end namespace Eigen + +#endif // EIGEN_MAP_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MapBase.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MapBase.h new file mode 100644 index 0000000..020f939 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MapBase.h @@ -0,0 +1,299 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MAPBASE_H +#define EIGEN_MAPBASE_H + +#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \ + EIGEN_STATIC_ASSERT((int(internal::evaluator::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ + YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT) + +namespace Eigen { + +/** \ingroup Core_Module + * + * \brief Base class for dense Map and Block expression with direct access + * + * This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense + * Map and Block objects with direct access. + * Typical users do not have to directly deal with this class. + * + * This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN. + * See \link TopicCustomizing_Plugins customizing Eigen \endlink for details. + * + * The \c Derived class has to provide the following two methods describing the memory layout: + * \code Index innerStride() const; \endcode + * \code Index outerStride() const; \endcode + * + * \sa class Map, class Block + */ +template class MapBase + : public internal::dense_xpr_base::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + enum { + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + SizeAtCompileTime = Base::SizeAtCompileTime + }; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + typedef typename internal::conditional< + bool(internal::is_lvalue::value), + Scalar *, + const Scalar *>::type + PointerType; + + using Base::derived; +// using Base::RowsAtCompileTime; +// using Base::ColsAtCompileTime; +// using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + using Base::IsRowMajor; + + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + using Base::lazyAssign; + using Base::eval; + + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; + + // bug 217 - compile error on ICC 11.1 + using Base::operator=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + + /** \copydoc DenseBase::rows() */ + EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); } + /** \copydoc DenseBase::cols() */ + EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); } + + /** Returns a pointer to the first coefficient of the matrix or vector. + * + * \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride(). + * + * \sa innerStride(), outerStride() + */ + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } + + /** \copydoc PlainObjectBase::coeff(Index,Index) const */ + EIGEN_DEVICE_FUNC + inline const Scalar& coeff(Index rowId, Index colId) const + { + return m_data[colId * colStride() + rowId * rowStride()]; + } + + /** \copydoc PlainObjectBase::coeff(Index) const */ + EIGEN_DEVICE_FUNC + inline const Scalar& coeff(Index index) const + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return m_data[index * innerStride()]; + } + + /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */ + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return this->m_data[colId * colStride() + rowId * rowStride()]; + } + + /** \copydoc PlainObjectBase::coeffRef(Index) const */ + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index index) const + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return this->m_data[index * innerStride()]; + } + + /** \internal */ + template + inline PacketScalar packet(Index rowId, Index colId) const + { + return internal::ploadt + (m_data + (colId * colStride() + rowId * rowStride())); + } + + /** \internal */ + template + inline PacketScalar packet(Index index) const + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return internal::ploadt(m_data + index * innerStride()); + } + + /** \internal Constructor for fixed size matrices or vectors */ + EIGEN_DEVICE_FUNC + explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) + { + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + checkSanity(); + } + + /** \internal Constructor for dynamically sized vectors */ + EIGEN_DEVICE_FUNC + inline MapBase(PointerType dataPtr, Index vecSize) + : m_data(dataPtr), + m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)), + m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime)) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + eigen_assert(vecSize >= 0); + eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize); + checkSanity(); + } + + /** \internal Constructor for dynamically sized matrices */ + EIGEN_DEVICE_FUNC + inline MapBase(PointerType dataPtr, Index rows, Index cols) + : m_data(dataPtr), m_rows(rows), m_cols(cols) + { + eigen_assert( (dataPtr == 0) + || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); + checkSanity(); + } + + #ifdef EIGEN_MAPBASE_PLUGIN + #include EIGEN_MAPBASE_PLUGIN + #endif + + protected: + + template + EIGEN_DEVICE_FUNC + void checkSanity(typename internal::enable_if<(internal::traits::Alignment>0),void*>::type = 0) const + { +#if EIGEN_MAX_ALIGN_BYTES>0 + eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits::Alignment) == 0) + || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits::Alignment ) && "data is not aligned"); +#endif + } + + template + EIGEN_DEVICE_FUNC + void checkSanity(typename internal::enable_if::Alignment==0,void*>::type = 0) const + {} + + PointerType m_data; + const internal::variable_if_dynamic m_rows; + const internal::variable_if_dynamic m_cols; +}; + +/** \ingroup Core_Module + * + * \brief Base class for non-const dense Map and Block expression with direct access + * + * This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of + * dense Map and Block objects with direct access. + * It inherits MapBase which defines the const variant for reading specific entries. + * + * \sa class Map, class Block + */ +template class MapBase + : public MapBase +{ + typedef MapBase ReadOnlyMapBase; + public: + + typedef MapBase Base; + + typedef typename Base::Scalar Scalar; + typedef typename Base::PacketScalar PacketScalar; + typedef typename Base::StorageIndex StorageIndex; + typedef typename Base::PointerType PointerType; + + using Base::derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + EIGEN_DEVICE_FUNC + inline const Scalar* data() const { return this->m_data; } + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) + { + return this->m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue& coeffRef(Index index) + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return this->m_data[index * innerStride()]; + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& val) + { + internal::pstoret + (this->m_data + (col * colStride() + row * rowStride()), val); + } + + template + inline void writePacket(Index index, const PacketScalar& val) + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + internal::pstoret + (this->m_data + index * innerStride(), val); + } + + EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {} + + EIGEN_DEVICE_FUNC + Derived& operator=(const MapBase& other) + { + ReadOnlyMapBase::Base::operator=(other); + return derived(); + } + + // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base, + // see bugs 821 and 920. + using ReadOnlyMapBase::Base::operator=; +}; + +#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS + +} // end namespace Eigen + +#endif // EIGEN_MAPBASE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MathFunctions.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MathFunctions.h new file mode 100644 index 0000000..a648aa0 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MathFunctions.h @@ -0,0 +1,1431 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONS_H +#define EIGEN_MATHFUNCTIONS_H + +// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html +// TODO this should better be moved to NumTraits +#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L + + +namespace Eigen { + +// On WINCE, std::abs is defined for int only, so let's defined our own overloads: +// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too. +#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500 +long abs(long x) { return (labs(x)); } +double abs(double x) { return (fabs(x)); } +float abs(float x) { return (fabsf(x)); } +long double abs(long double x) { return (fabsl(x)); } +#endif + +namespace internal { + +/** \internal \class global_math_functions_filtering_base + * + * What it does: + * Defines a typedef 'type' as follows: + * - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then + * global_math_functions_filtering_base::type is a typedef for it. + * - otherwise, global_math_functions_filtering_base::type is a typedef for T. + * + * How it's used: + * To allow to defined the global math functions (like sin...) in certain cases, like the Array expressions. + * When you do sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know + * is that it inherits ArrayBase. So we implement a partial specialization of sin_impl for ArrayBase. + * So we must make sure to use sin_impl > and not sin_impl, otherwise our partial specialization + * won't be used. How does sin know that? That's exactly what global_math_functions_filtering_base tells it. + * + * How it's implemented: + * SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you replace + * the typename dummy by an integer template parameter, it doesn't work anymore! + */ + +template +struct global_math_functions_filtering_base +{ + typedef T type; +}; + +template struct always_void { typedef void type; }; + +template +struct global_math_functions_filtering_base + ::type + > +{ + typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type; +}; + +#define EIGEN_MATHFUNC_IMPL(func, scalar) Eigen::internal::func##_impl::type> +#define EIGEN_MATHFUNC_RETVAL(func, scalar) typename Eigen::internal::func##_retval::type>::type + +/**************************************************************************** +* Implementation of real * +****************************************************************************/ + +template::IsComplex> +struct real_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return x; + } +}; + +template +struct real_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + using std::real; + return real(x); + } +}; + +template struct real_impl : real_default_impl {}; + +#ifdef __CUDA_ARCH__ +template +struct real_impl > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex& x) + { + return x.real(); + } +}; +#endif + +template +struct real_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of imag * +****************************************************************************/ + +template::IsComplex> +struct imag_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar&) + { + return RealScalar(0); + } +}; + +template +struct imag_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + using std::imag; + return imag(x); + } +}; + +template struct imag_impl : imag_default_impl {}; + +#ifdef __CUDA_ARCH__ +template +struct imag_impl > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex& x) + { + return x.imag(); + } +}; +#endif + +template +struct imag_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of real_ref * +****************************************************************************/ + +template +struct real_ref_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar& run(Scalar& x) + { + return reinterpret_cast(&x)[0]; + } + EIGEN_DEVICE_FUNC + static inline const RealScalar& run(const Scalar& x) + { + return reinterpret_cast(&x)[0]; + } +}; + +template +struct real_ref_retval +{ + typedef typename NumTraits::Real & type; +}; + +/**************************************************************************** +* Implementation of imag_ref * +****************************************************************************/ + +template +struct imag_ref_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar& run(Scalar& x) + { + return reinterpret_cast(&x)[1]; + } + EIGEN_DEVICE_FUNC + static inline const RealScalar& run(const Scalar& x) + { + return reinterpret_cast(&x)[1]; + } +}; + +template +struct imag_ref_default_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(Scalar&) + { + return Scalar(0); + } + EIGEN_DEVICE_FUNC + static inline const Scalar run(const Scalar&) + { + return Scalar(0); + } +}; + +template +struct imag_ref_impl : imag_ref_default_impl::IsComplex> {}; + +template +struct imag_ref_retval +{ + typedef typename NumTraits::Real & type; +}; + +/**************************************************************************** +* Implementation of conj * +****************************************************************************/ + +template::IsComplex> +struct conj_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(const Scalar& x) + { + return x; + } +}; + +template +struct conj_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(const Scalar& x) + { + using std::conj; + return conj(x); + } +}; + +template +struct conj_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of abs2 * +****************************************************************************/ + +template +struct abs2_impl_default +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return x*x; + } +}; + +template +struct abs2_impl_default // IsComplex +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return real(x)*real(x) + imag(x)*imag(x); + } +}; + +template +struct abs2_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return abs2_impl_default::IsComplex>::run(x); + } +}; + +template +struct abs2_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of norm1 * +****************************************************************************/ + +template +struct norm1_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(abs); + return abs(real(x)) + abs(imag(x)); + } +}; + +template +struct norm1_default_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(abs); + return abs(x); + } +}; + +template +struct norm1_impl : norm1_default_impl::IsComplex> {}; + +template +struct norm1_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of hypot * +****************************************************************************/ + +template +struct hypot_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x, const Scalar& y) + { + EIGEN_USING_STD_MATH(abs); + EIGEN_USING_STD_MATH(sqrt); + RealScalar _x = abs(x); + RealScalar _y = abs(y); + Scalar p, qp; + if(_x>_y) + { + p = _x; + qp = _y / p; + } + else + { + p = _y; + qp = _x / p; + } + if(p==RealScalar(0)) return RealScalar(0); + return p * sqrt(RealScalar(1) + qp*qp); + } +}; + +template +struct hypot_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of cast * +****************************************************************************/ + +template +struct cast_impl +{ + EIGEN_DEVICE_FUNC + static inline NewType run(const OldType& x) + { + return static_cast(x); + } +}; + +// here, for once, we're plainly returning NewType: we don't want cast to do weird things. + +template +EIGEN_DEVICE_FUNC +inline NewType cast(const OldType& x) +{ + return cast_impl::run(x); +} + +/**************************************************************************** +* Implementation of round * +****************************************************************************/ + +#if EIGEN_HAS_CXX11_MATH + template + struct round_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + using std::round; + return round(x); + } + }; +#else + template + struct round_impl + { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + EIGEN_USING_STD_MATH(floor); + EIGEN_USING_STD_MATH(ceil); + return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5)); + } + }; +#endif + +template +struct round_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of arg * +****************************************************************************/ + +#if EIGEN_HAS_CXX11_MATH + template + struct arg_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; +#else + template::IsComplex> + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); } + }; + + template + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; + + template struct arg_impl : arg_default_impl {}; +#endif + +template +struct arg_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of log1p * +****************************************************************************/ + +namespace std_fallback { + // fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar, + // or that there is no suitable std::log1p function available + template + EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + typedef typename NumTraits::Real RealScalar; + EIGEN_USING_STD_MATH(log); + Scalar x1p = RealScalar(1) + x; + return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); + } +} + +template +struct log1p_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + #if EIGEN_HAS_CXX11_MATH + using std::log1p; + #endif + using std_fallback::log1p; + return log1p(x); + } +}; + + +template +struct log1p_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of pow * +****************************************************************************/ + +template::IsInteger&&NumTraits::IsInteger> +struct pow_impl +{ + //typedef Scalar retval; + typedef typename ScalarBinaryOpTraits >::ReturnType result_type; + static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) + { + EIGEN_USING_STD_MATH(pow); + return pow(x, y); + } +}; + +template +struct pow_impl +{ + typedef ScalarX result_type; + static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y) + { + ScalarX res(1); + eigen_assert(!NumTraits::IsSigned || y >= 0); + if(y & 1) res *= x; + y >>= 1; + while(y) + { + x *= x; + if(y&1) res *= x; + y >>= 1; + } + return res; + } +}; + +/**************************************************************************** +* Implementation of random * +****************************************************************************/ + +template +struct random_default_impl {}; + +template +struct random_impl : random_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template +struct random_retval +{ + typedef Scalar type; +}; + +template inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y); +template inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(); + +template +struct random_default_impl +{ + static inline Scalar run(const Scalar& x, const Scalar& y) + { + return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX); + } + static inline Scalar run() + { + return run(Scalar(NumTraits::IsSigned ? -1 : 0), Scalar(1)); + } +}; + +enum { + meta_floor_log2_terminate, + meta_floor_log2_move_up, + meta_floor_log2_move_down, + meta_floor_log2_bogus +}; + +template struct meta_floor_log2_selector +{ + enum { middle = (lower + upper) / 2, + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n==0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) + }; +}; + +template::value> +struct meta_floor_log2 {}; + +template +struct meta_floor_log2 +{ + enum { value = meta_floor_log2::middle>::value }; +}; + +template +struct meta_floor_log2 +{ + enum { value = meta_floor_log2::middle, upper>::value }; +}; + +template +struct meta_floor_log2 +{ + enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; +}; + +template +struct meta_floor_log2 +{ + // no value, error at compile time +}; + +template +struct random_default_impl +{ + static inline Scalar run(const Scalar& x, const Scalar& y) + { + typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; + if(y=x the result converted to an unsigned long is still correct. + std::size_t range = ScalarX(y)-ScalarX(x); + std::size_t offset = 0; + // rejection sampling + std::size_t divisor = 1; + std::size_t multiplier = 1; + if(range range); + return Scalar(ScalarX(x) + offset); + } + + static inline Scalar run() + { +#ifdef EIGEN_MAKING_DOCS + return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); +#else + enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value, + scalar_bits = sizeof(Scalar) * CHAR_BIT, + shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), + offset = NumTraits::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 + }; + return Scalar((std::rand() >> shift) - offset); +#endif + } +}; + +template +struct random_default_impl +{ + static inline Scalar run(const Scalar& x, const Scalar& y) + { + return Scalar(random(real(x), real(y)), + random(imag(x), imag(y))); + } + static inline Scalar run() + { + typedef typename NumTraits::Real RealScalar; + return Scalar(random(), random()); + } +}; + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) +{ + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y); +} + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() +{ + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); +} + +// Implementatin of is* functions + +// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. +#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isnan_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isinf_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isfinite_impl(const T&) { return true; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isfinite_impl(const T& x) +{ + #ifdef __CUDA_ARCH__ + return (::isfinite)(x); + #elif EIGEN_USE_STD_FPCLASSIFY + using std::isfinite; + return isfinite EIGEN_NOT_A_MACRO (x); + #else + return x<=NumTraits::highest() && x>=NumTraits::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isinf_impl(const T& x) +{ + #ifdef __CUDA_ARCH__ + return (::isinf)(x); + #elif EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); + #else + return x>NumTraits::highest() || x::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isnan_impl(const T& x) +{ + #ifdef __CUDA_ARCH__ + return (::isnan)(x); + #elif EIGEN_USE_STD_FPCLASSIFY + using std::isnan; + return isnan EIGEN_NOT_A_MACRO (x); + #else + return x != x; + #endif +} + +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) +{ + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; +} + +//MSVC defines a _isnan builtin function, but for double only +EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; } + +EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) +#else + // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), + // while the second prevent too aggressive optimizations in fast-math mode: + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); } + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + +// The following overload are defined at the end of this file +template EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& x); +template EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x); +template EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x); + +template T generic_fast_tanh_float(const T& a_x); + +} // end namespace internal + +/**************************************************************************** +* Generic math functions * +****************************************************************************/ + +namespace numext { + +#ifndef __CUDA_ARCH__ +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(min); + return min EIGEN_NOT_A_MACRO (x,y); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(max); + return max EIGEN_NOT_A_MACRO (x,y); +} +#else +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + return y < x ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) +{ + return fminf(x, y); +} +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + return x < y ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) +{ + return fmaxf(x, y); +} +#endif + + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x) +{ + return internal::real_ref_impl::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x) +{ + return internal::imag_ref_impl::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y) +{ + return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float log1p(const float &x) { return ::log1pf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double log1p(const double &x) { return ::log1p(x); } +#endif + +template +EIGEN_DEVICE_FUNC +inline typename internal::pow_impl::result_type pow(const ScalarX& x, const ScalarY& y) +{ + return internal::pow_impl::run(x, y); +} + +template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } +template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } +template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +T (floor)(const T& x) +{ + EIGEN_USING_STD_MATH(floor); + return floor(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float floor(const float &x) { return ::floorf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double floor(const double &x) { return ::floor(x); } +#endif + +template +EIGEN_DEVICE_FUNC +T (ceil)(const T& x) +{ + EIGEN_USING_STD_MATH(ceil); + return ceil(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float ceil(const float &x) { return ::ceilf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double ceil(const double &x) { return ::ceil(x); } +#endif + + +/** Log base 2 for 32 bits positive integers. + * Conveniently returns 0 for x==0. */ +inline int log2(int x) +{ + eigen_assert(x>=0); + unsigned int v(x); + static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return table[(v * 0x07C4ACDDU) >> 27]; +} + +/** \returns the square root of \a x. + * + * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode, + * but slightly faster for float/double and some compilers (e.g., gcc), thanks to + * specializations when SSE is enabled. + * + * It's usage is justified in performance critical functions, like norm/normalize. + */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sqrt(const T &x) +{ + EIGEN_USING_STD_MATH(sqrt); + return sqrt(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T log(const T &x) { + EIGEN_USING_STD_MATH(log); + return log(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float log(const float &x) { return ::logf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double log(const double &x) { return ::log(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if::IsSigned || NumTraits::IsComplex,typename NumTraits::Real>::type +abs(const T &x) { + EIGEN_USING_STD_MATH(abs); + return abs(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if::IsSigned || NumTraits::IsComplex),typename NumTraits::Real>::type +abs(const T &x) { + return x; +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } +EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float abs(const float &x) { return ::fabsf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double abs(const double &x) { return ::fabs(x); } + +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float abs(const std::complex& x) { + return ::hypotf(x.real(), x.imag()); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double abs(const std::complex& x) { + return ::hypot(x.real(), x.imag()); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T exp(const T &x) { + EIGEN_USING_STD_MATH(exp); + return exp(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float exp(const float &x) { return ::expf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double exp(const double &x) { return ::exp(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T cos(const T &x) { + EIGEN_USING_STD_MATH(cos); + return cos(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float cos(const float &x) { return ::cosf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double cos(const double &x) { return ::cos(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sin(const T &x) { + EIGEN_USING_STD_MATH(sin); + return sin(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float sin(const float &x) { return ::sinf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double sin(const double &x) { return ::sin(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T tan(const T &x) { + EIGEN_USING_STD_MATH(tan); + return tan(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tan(const float &x) { return ::tanf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double tan(const double &x) { return ::tan(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T acos(const T &x) { + EIGEN_USING_STD_MATH(acos); + return acos(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float acos(const float &x) { return ::acosf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double acos(const double &x) { return ::acos(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T asin(const T &x) { + EIGEN_USING_STD_MATH(asin); + return asin(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float asin(const float &x) { return ::asinf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double asin(const double &x) { return ::asin(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T atan(const T &x) { + EIGEN_USING_STD_MATH(atan); + return atan(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float atan(const float &x) { return ::atanf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double atan(const double &x) { return ::atan(x); } +#endif + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T cosh(const T &x) { + EIGEN_USING_STD_MATH(cosh); + return cosh(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float cosh(const float &x) { return ::coshf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double cosh(const double &x) { return ::cosh(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sinh(const T &x) { + EIGEN_USING_STD_MATH(sinh); + return sinh(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float sinh(const float &x) { return ::sinhf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double sinh(const double &x) { return ::sinh(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T tanh(const T &x) { + EIGEN_USING_STD_MATH(tanh); + return tanh(x); +} + +#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tanh(float x) { return internal::generic_fast_tanh_float(x); } +#endif + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tanh(const float &x) { return ::tanhf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double tanh(const double &x) { return ::tanh(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T fmod(const T& a, const T& b) { + EIGEN_USING_STD_MATH(fmod); + return fmod(a, b); +} + +#ifdef __CUDACC__ +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float fmod(const float& a, const float& b) { + return ::fmodf(a, b); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double fmod(const double& a, const double& b) { + return ::fmod(a, b); +} +#endif + +} // end namespace numext + +namespace internal { + +template +EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& x) +{ + return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); +} + +template +EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x) +{ + return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); +} + +template +EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x) +{ + return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); +} + +/**************************************************************************** +* Implementation of fuzzy comparisons * +****************************************************************************/ + +template +struct scalar_fuzzy_default_impl {}; + +template +struct scalar_fuzzy_default_impl +{ + typedef typename NumTraits::Real RealScalar; + template EIGEN_DEVICE_FUNC + static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) + { + return numext::abs(x) <= numext::abs(y) * prec; + } + EIGEN_DEVICE_FUNC + static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) + { + return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec; + } + EIGEN_DEVICE_FUNC + static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) + { + return x <= y || isApprox(x, y, prec); + } +}; + +template +struct scalar_fuzzy_default_impl +{ + typedef typename NumTraits::Real RealScalar; + template EIGEN_DEVICE_FUNC + static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&) + { + return x == Scalar(0); + } + EIGEN_DEVICE_FUNC + static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&) + { + return x == y; + } + EIGEN_DEVICE_FUNC + static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&) + { + return x <= y; + } +}; + +template +struct scalar_fuzzy_default_impl +{ + typedef typename NumTraits::Real RealScalar; + template EIGEN_DEVICE_FUNC + static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) + { + return numext::abs2(x) <= numext::abs2(y) * prec * prec; + } + EIGEN_DEVICE_FUNC + static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) + { + return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; + } +}; + +template +struct scalar_fuzzy_impl : scalar_fuzzy_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template EIGEN_DEVICE_FUNC +inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, + const typename NumTraits::Real &precision = NumTraits::dummy_precision()) +{ + return scalar_fuzzy_impl::template isMuchSmallerThan(x, y, precision); +} + +template EIGEN_DEVICE_FUNC +inline bool isApprox(const Scalar& x, const Scalar& y, + const typename NumTraits::Real &precision = NumTraits::dummy_precision()) +{ + return scalar_fuzzy_impl::isApprox(x, y, precision); +} + +template EIGEN_DEVICE_FUNC +inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, + const typename NumTraits::Real &precision = NumTraits::dummy_precision()) +{ + return scalar_fuzzy_impl::isApproxOrLessThan(x, y, precision); +} + +/****************************************** +*** The special case of the bool type *** +******************************************/ + +template<> struct random_impl +{ + static inline bool run() + { + return random(0,1)==0 ? false : true; + } +}; + +template<> struct scalar_fuzzy_impl +{ + typedef bool RealScalar; + + template EIGEN_DEVICE_FUNC + static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) + { + return !x; + } + + EIGEN_DEVICE_FUNC + static inline bool isApprox(bool x, bool y, bool) + { + return x == y; + } + + EIGEN_DEVICE_FUNC + static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&) + { + return (!x) || y; + } + +}; + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONS_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MathFunctionsImpl.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MathFunctionsImpl.h new file mode 100644 index 0000000..3c9ef22 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MathFunctionsImpl.h @@ -0,0 +1,78 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONSIMPL_H +#define EIGEN_MATHFUNCTIONSIMPL_H + +namespace Eigen { + +namespace internal { + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/6-degree rational interpolant which + is accurate up to a couple of ulp in the range [-9, 9], outside of which + the tanh(x) = +/-1. + + This implementation works on both scalars and packets. +*/ +template +T generic_fast_tanh_float(const T& a_x) +{ + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + const T plus_9 = pset1(9.f); + const T minus_9 = pset1(-9.f); + // NOTE GCC prior to 6.3 might improperly optimize this max/min + // step such that if a_x is nan, x will be either 9 or -9, + // and tanh will return 1 or -1 instead of nan. + // This is supposed to be fixed in gcc6.3, + // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 + const T x = pmax(minus_9,pmin(plus_9,a_x)); + // The monomial coefficients of the numerator polynomial (odd). + const T alpha_1 = pset1(4.89352455891786e-03f); + const T alpha_3 = pset1(6.37261928875436e-04f); + const T alpha_5 = pset1(1.48572235717979e-05f); + const T alpha_7 = pset1(5.12229709037114e-08f); + const T alpha_9 = pset1(-8.60467152213735e-11f); + const T alpha_11 = pset1(2.00018790482477e-13f); + const T alpha_13 = pset1(-2.76076847742355e-16f); + + // The monomial coefficients of the denominator polynomial (even). + const T beta_0 = pset1(4.89352518554385e-03f); + const T beta_2 = pset1(2.26843463243900e-03f); + const T beta_4 = pset1(1.18534705686654e-04f); + const T beta_6 = pset1(1.19825839466702e-06f); + + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + T p = pmadd(x2, alpha_13, alpha_11); + p = pmadd(x2, p, alpha_9); + p = pmadd(x2, p, alpha_7); + p = pmadd(x2, p, alpha_5); + p = pmadd(x2, p, alpha_3); + p = pmadd(x2, p, alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial p. + T q = pmadd(x2, beta_6, beta_4); + q = pmadd(x2, q, beta_2); + q = pmadd(x2, q, beta_0); + + // Divide the numerator by the denominator. + return pdiv(p, q); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONSIMPL_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Matrix.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Matrix.h new file mode 100644 index 0000000..90c336d --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Matrix.h @@ -0,0 +1,461 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_H +#define EIGEN_MATRIX_H + +namespace Eigen { + +namespace internal { +template +struct traits > +{ +private: + enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret }; + typedef typename find_best_packet<_Scalar,size>::type PacketScalar; + enum { + row_major_bit = _Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, + max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, + default_alignment = compute_default_alignment<_Scalar,max_size>::value, + actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, + required_alignment = unpacket_traits::alignment, + packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0 + }; + +public: + typedef _Scalar Scalar; + typedef Dense StorageKind; + typedef Eigen::Index StorageIndex; + typedef MatrixXpr XprKind; + enum { + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _MaxRows, + MaxColsAtCompileTime = _MaxCols, + Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, + Options = _Options, + InnerStrideAtCompileTime = 1, + OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, + + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, + Alignment = actual_alignment + }; +}; +} + +/** \class Matrix + * \ingroup Core_Module + * + * \brief The matrix class, also used for vectors and row-vectors + * + * The %Matrix class is the work-horse for all \em dense (\ref dense "note") matrices and vectors within Eigen. + * Vectors are matrices with one column, and row-vectors are matrices with one row. + * + * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note"). + * + * The first three template parameters are required: + * \tparam _Scalar Numeric type, e.g. float, double, int or std::complex. + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam _Rows Number of rows, or \b Dynamic + * \tparam _Cols Number of columns, or \b Dynamic + * + * The remaining template parameters are optional -- in most cases you don't have to worry about them. + * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either + * \b #AutoAlign or \b #DontAlign. + * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required + * for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size. + * \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note"). + * \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note"). + * + * Eigen provides a number of typedefs covering the usual cases. Here are some examples: + * + * \li \c Matrix2d is a 2x2 square matrix of doubles (\c Matrix) + * \li \c Vector4f is a vector of 4 floats (\c Matrix) + * \li \c RowVector3i is a row-vector of 3 ints (\c Matrix) + * + * \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix) + * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix) + * + * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix) + * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix) + * + * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs. + * + * You can access elements of vectors and matrices using normal subscripting: + * + * \code + * Eigen::VectorXd v(10); + * v[0] = 0.1; + * v[1] = 0.2; + * v(0) = 0.3; + * v(1) = 0.4; + * + * Eigen::MatrixXi m(10, 10); + * m(0, 1) = 1; + * m(0, 2) = 2; + * m(0, 3) = 3; + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN. + * + * Some notes: + * + *
+ *
\anchor dense Dense versus sparse:
+ *
This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the Sparse module. + * + * Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary contiguous array. + * This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero coefficients.
+ * + *
\anchor fixedsize Fixed-size versus dynamic-size:
+ *
Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates the array + * of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, typically up to 4x4, sometimes up + * to 16x16. Larger matrices should be declared as dynamic-size even if one happens to know their size at compile-time. + * + * Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they are runtime + * variables, and the array of coefficients is allocated dynamically on the heap. + * + * Note that \em dense matrices, be they Fixed-size or Dynamic-size, do not expand dynamically in the sense of a std::map. + * If you want this behavior, see the Sparse module.
+ * + *
\anchor maxrows _MaxRows and _MaxCols:
+ *
In most cases, one just leaves these parameters to the default values. + * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases + * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot + * exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols + * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.
+ *
+ * + * ABI and storage layout + * + * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3. + * + * + * + * + * + * + *
Matrix typeEquivalent C structure
\code Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index rows, cols; + * }; + * \endcode
\code + * Matrix + * Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index size; + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[Rows*Cols]; // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0 + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[MaxRows*MaxCols]; // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0 + * Eigen::Index rows, cols; + * }; + * \endcode
+ * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two + * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES. + * + * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, + * \ref TopicStorageOrders + */ + +template +class Matrix + : public PlainObjectBase > +{ + public: + + /** \brief Base class typedef. + * \sa PlainObjectBase + */ + typedef PlainObjectBase Base; + + enum { Options = _Options }; + + EIGEN_DENSE_PUBLIC_INTERFACE(Matrix) + + typedef typename Base::PlainObject PlainObject; + + using Base::base; + using Base::coeffRef; + + /** + * \brief Assigns matrices to each other. + * + * \note This is a special case of the templated operator=. Its purpose is + * to prevent a default operator= from hiding the templated operator=. + * + * \callgraph + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other) + { + return Base::_set(other); + } + + /** \internal + * \brief Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase& other) + { + return Base::_set(other); + } + + /* Here, doxygen failed to copy the brief information when using \copydoc */ + + /** + * \brief Copies the generic expression \a other into *this. + * \copydetails DenseBase::operator=(const EigenBase &other) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase &other) + { + return Base::operator=(other); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue& func) + { + return Base::operator=(func); + } + + /** \brief Default constructor. + * + * For fixed-size matrices, does nothing. + * + * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix + * is called a null matrix. This constructor is the unique way to create null matrices: resizing + * a matrix to 0 is not supported. + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix() : Base() + { + Base::_check_template_params(); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + // FIXME is it still needed + EIGEN_DEVICE_FUNC + explicit Matrix(internal::constructor_without_unaligned_array_assert) + : Base(internal::constructor_without_unaligned_array_assert()) + { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) + : Base(std::move(other)) + { + Base::_check_template_params(); + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); + } + EIGEN_DEVICE_FUNC + Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) + { + other.swap(*this); + return *this; + } +#endif + + #ifndef EIGEN_PARSED_BY_DOXYGEN + + // This constructor is for both 1x1 matrices and dynamic vectors + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Matrix(const T& x) + { + Base::_check_template_params(); + Base::template _init1(x); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) + { + Base::_check_template_params(); + Base::template _init2(x, y); + } + #else + /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC + explicit Matrix(const Scalar *data); + + /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * This is useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance, + * calling Matrix(1) will call the initialization constructor: Matrix(const Scalar&). + * For fixed-size \c 1x1 matrices it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_STRONG_INLINE explicit Matrix(Index dim); + /** \brief Constructs an initialized 1x1 matrix with the given coefficient */ + Matrix(const Scalar& x); + /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size matrices. For fixed-size matrices, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance, + * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y). + * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_DEVICE_FUNC + Matrix(Index rows, Index cols); + + /** \brief Constructs an initialized 2D vector with given coefficients */ + Matrix(const Scalar& x, const Scalar& y); + #endif + + /** \brief Constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3) + m_storage.data()[0] = x; + m_storage.data()[1] = y; + m_storage.data()[2] = z; + } + /** \brief Constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4) + m_storage.data()[0] = x; + m_storage.data()[1] = y; + m_storage.data()[2] = z; + m_storage.data()[3] = w; + } + + + /** \brief Copy constructor */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other) + { } + + /** \brief Copy constructor for generic expressions. + * \sa MatrixBase::operator=(const EigenBase&) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const EigenBase &other) + : Base(other.derived()) + { } + + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } + + /////////// Geometry module /////////// + + template + EIGEN_DEVICE_FUNC + explicit Matrix(const RotationBase& r); + template + EIGEN_DEVICE_FUNC + Matrix& operator=(const RotationBase& r); + + // allow to extend Matrix outside Eigen + #ifdef EIGEN_MATRIX_PLUGIN + #include EIGEN_MATRIX_PLUGIN + #endif + + protected: + template + friend struct internal::conservative_resize_like_impl; + + using Base::m_storage; +}; + +/** \defgroup matrixtypedefs Global matrix typedefs + * + * \ingroup Core_Module + * + * Eigen defines several typedef shortcuts for most common matrix and vector types. + * + * The general patterns are the following: + * + * \c MatrixSizeType where \c Size can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size, + * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd + * for complex double. + * + * For example, \c Matrix3d is a fixed-size 3x3 matrix type of doubles, and \c MatrixXf is a dynamic-size matrix of floats. + * + * There are also \c VectorSizeType and \c RowVectorSizeType which are self-explanatory. For example, \c Vector4cf is + * a fixed-size vector of 4 complex floats. + * + * \sa class Matrix + */ + +#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##SizeSuffix##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Vector##SizeSuffix##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix RowVector##SizeSuffix##TypeSuffix; + +#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##Size##X##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##X##Size##TypeSuffix; + +#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4) + +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex, cf) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex, cd) + +#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES +#undef EIGEN_MAKE_TYPEDEFS +#undef EIGEN_MAKE_FIXED_TYPEDEFS + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MatrixBase.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MatrixBase.h new file mode 100644 index 0000000..ce41218 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/MatrixBase.h @@ -0,0 +1,530 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2009 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIXBASE_H +#define EIGEN_MATRIXBASE_H + +namespace Eigen { + +/** \class MatrixBase + * \ingroup Core_Module + * + * \brief Base class for all dense matrices, vectors, and expressions + * + * This class is the base that is inherited by all matrix, vector, and related expression + * types. Most of the Eigen API is contained in this class, and its base classes. Other important + * classes for the Eigen API are Matrix, and VectorwiseOp. + * + * Note that some methods are defined in other modules such as the \ref LU_Module LU module + * for all functions related to matrix inversions. + * + * \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc. + * + * When writing a function taking Eigen objects as argument, if you want your function + * to take as argument any matrix, vector, or expression, just let it take a + * MatrixBase argument. As an example, here is a function printFirstRow which, given + * a matrix, vector, or expression \a x, prints the first row of \a x. + * + * \code + template + void printFirstRow(const Eigen::MatrixBase& x) + { + cout << x.row(0) << endl; + } + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. + * + * \sa \blank \ref TopicClassHierarchy + */ +template class MatrixBase + : public DenseBase +{ + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef MatrixBase StorageBaseType; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + typedef DenseBase Base; + using Base::RowsAtCompileTime; + using Base::ColsAtCompileTime; + using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + + using Base::derived; + using Base::const_cast_derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + using Base::lazyAssign; + using Base::eval; + using Base::operator+=; + using Base::operator-=; + using Base::operator*=; + using Base::operator/=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType; + typedef typename Base::RowXpr RowXpr; + typedef typename Base::ColXpr ColXpr; +#endif // not EIGEN_PARSED_BY_DOXYGEN + + + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** type of the equivalent square matrix */ + typedef Matrix SquareMatrixType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + + /** \returns the size of the main diagonal, which is min(rows(),cols()). + * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC + inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); } + + typedef typename Base::PlainObject PlainObject; + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; + /** \internal the return type of MatrixBase::adjoint() */ + typedef typename internal::conditional::IsComplex, + CwiseUnaryOp, ConstTransposeReturnType>, + ConstTransposeReturnType + >::type AdjointReturnType; + /** \internal Return type of eigenvalues() */ + typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; + /** \internal the return type of identity */ + typedef CwiseNullaryOp,PlainObject> IdentityReturnType; + /** \internal the return type of unit vectors */ + typedef Block, SquareMatrixType>, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime> BasisReturnType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" +# include "../plugins/CommonCwiseBinaryOps.h" +# include "../plugins/MatrixCwiseUnaryOps.h" +# include "../plugins/MatrixCwiseBinaryOps.h" +# ifdef EIGEN_MATRIXBASE_PLUGIN +# include EIGEN_MATRIXBASE_PLUGIN +# endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const MatrixBase& other); + + // We cannot inherit here via Base::operator= since it is causing + // trouble with MSVC. + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const DenseBase& other); + + template + EIGEN_DEVICE_FUNC + Derived& operator=(const EigenBase& other); + + template + EIGEN_DEVICE_FUNC + Derived& operator=(const ReturnByValue& other); + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const MatrixBase& other); + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const MatrixBase& other); + +#ifdef __CUDACC__ + template + EIGEN_DEVICE_FUNC + const Product + operator*(const MatrixBase &other) const + { return this->lazyProduct(other); } +#else + + template + const Product + operator*(const MatrixBase &other) const; + +#endif + + template + EIGEN_DEVICE_FUNC + const Product + lazyProduct(const MatrixBase &other) const; + + template + Derived& operator*=(const EigenBase& other); + + template + void applyOnTheLeft(const EigenBase& other); + + template + void applyOnTheRight(const EigenBase& other); + + template + EIGEN_DEVICE_FUNC + const Product + operator*(const DiagonalBase &diagonal) const; + + template + EIGEN_DEVICE_FUNC + typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType + dot(const MatrixBase& other) const; + + EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; + EIGEN_DEVICE_FUNC RealScalar norm() const; + RealScalar stableNorm() const; + RealScalar blueNorm() const; + RealScalar hypotNorm() const; + EIGEN_DEVICE_FUNC const PlainObject normalized() const; + EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const; + EIGEN_DEVICE_FUNC void normalize(); + EIGEN_DEVICE_FUNC void stableNormalize(); + + EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; + EIGEN_DEVICE_FUNC void adjointInPlace(); + + typedef Diagonal DiagonalReturnType; + EIGEN_DEVICE_FUNC + DiagonalReturnType diagonal(); + + typedef typename internal::add_const >::type ConstDiagonalReturnType; + EIGEN_DEVICE_FUNC + ConstDiagonalReturnType diagonal() const; + + template struct DiagonalIndexReturnType { typedef Diagonal Type; }; + template struct ConstDiagonalIndexReturnType { typedef const Diagonal Type; }; + + template + EIGEN_DEVICE_FUNC + typename DiagonalIndexReturnType::Type diagonal(); + + template + EIGEN_DEVICE_FUNC + typename ConstDiagonalIndexReturnType::Type diagonal() const; + + typedef Diagonal DiagonalDynamicIndexReturnType; + typedef typename internal::add_const >::type ConstDiagonalDynamicIndexReturnType; + + EIGEN_DEVICE_FUNC + DiagonalDynamicIndexReturnType diagonal(Index index); + EIGEN_DEVICE_FUNC + ConstDiagonalDynamicIndexReturnType diagonal(Index index) const; + + template struct TriangularViewReturnType { typedef TriangularView Type; }; + template struct ConstTriangularViewReturnType { typedef const TriangularView Type; }; + + template + EIGEN_DEVICE_FUNC + typename TriangularViewReturnType::Type triangularView(); + template + EIGEN_DEVICE_FUNC + typename ConstTriangularViewReturnType::Type triangularView() const; + + template struct SelfAdjointViewReturnType { typedef SelfAdjointView Type; }; + template struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView Type; }; + + template + EIGEN_DEVICE_FUNC + typename SelfAdjointViewReturnType::Type selfadjointView(); + template + EIGEN_DEVICE_FUNC + typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; + + const SparseView sparseView(const Scalar& m_reference = Scalar(0), + const typename NumTraits::Real& m_epsilon = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(); + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitX(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitY(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitW(); + + EIGEN_DEVICE_FUNC + const DiagonalWrapper asDiagonal() const; + const PermutationWrapper asPermutation() const; + + EIGEN_DEVICE_FUNC + Derived& setIdentity(); + EIGEN_DEVICE_FUNC + Derived& setIdentity(Index rows, Index cols); + + bool isIdentity(const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isDiagonal(const RealScalar& prec = NumTraits::dummy_precision()) const; + + bool isUpperTriangular(const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isLowerTriangular(const RealScalar& prec = NumTraits::dummy_precision()) const; + + template + bool isOrthogonal(const MatrixBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isUnitary(const RealScalar& prec = NumTraits::dummy_precision()) const; + + /** \returns true if each coefficients of \c *this and \a other are all exactly equal. + * \warning When using floating point scalar values you probably should rather use a + * fuzzy comparison such as isApprox() + * \sa isApprox(), operator!= */ + template + EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase& other) const + { return cwiseEqual(other).all(); } + + /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. + * \warning When using floating point scalar values you probably should rather use a + * fuzzy comparison such as isApprox() + * \sa isApprox(), operator== */ + template + EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase& other) const + { return cwiseNotEqual(other).any(); } + + NoAlias noalias(); + + // TODO forceAlignedAccess is temporarily disabled + // Need to find a nicer workaround. + inline const Derived& forceAlignedAccess() const { return derived(); } + inline Derived& forceAlignedAccess() { return derived(); } + template inline const Derived& forceAlignedAccessIf() const { return derived(); } + template inline Derived& forceAlignedAccessIf() { return derived(); } + + EIGEN_DEVICE_FUNC Scalar trace() const; + + template EIGEN_DEVICE_FUNC RealScalar lpNorm() const; + + EIGEN_DEVICE_FUNC MatrixBase& matrix() { return *this; } + EIGEN_DEVICE_FUNC const MatrixBase& matrix() const { return *this; } + + /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper array() { return ArrayWrapper(derived()); } + /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper array() const { return ArrayWrapper(derived()); } + +/////////// LU module /////////// + + inline const FullPivLU fullPivLu() const; + inline const PartialPivLU partialPivLu() const; + + inline const PartialPivLU lu() const; + + inline const Inverse inverse() const; + + template + inline void computeInverseAndDetWithCheck( + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible, + const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() + ) const; + template + inline void computeInverseWithCheck( + ResultType& inverse, + bool& invertible, + const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() + ) const; + Scalar determinant() const; + +/////////// Cholesky module /////////// + + inline const LLT llt() const; + inline const LDLT ldlt() const; + +/////////// QR module /////////// + + inline const HouseholderQR householderQr() const; + inline const ColPivHouseholderQR colPivHouseholderQr() const; + inline const FullPivHouseholderQR fullPivHouseholderQr() const; + inline const CompleteOrthogonalDecomposition completeOrthogonalDecomposition() const; + +/////////// Eigenvalues module /////////// + + inline EigenvaluesReturnType eigenvalues() const; + inline RealScalar operatorNorm() const; + +/////////// SVD module /////////// + + inline JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; + inline BDCSVD bdcSvd(unsigned int computationOptions = 0) const; + +/////////// Geometry module /////////// + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /// \internal helper struct to form the return type of the cross product + template struct cross_product_return_type { + typedef typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; + typedef Matrix type; + }; + #endif // EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC +#ifndef EIGEN_PARSED_BY_DOXYGEN + inline typename cross_product_return_type::type +#else + inline PlainObject +#endif + cross(const MatrixBase& other) const; + + template + EIGEN_DEVICE_FUNC + inline PlainObject cross3(const MatrixBase& other) const; + + EIGEN_DEVICE_FUNC + inline PlainObject unitOrthogonal(void) const; + + EIGEN_DEVICE_FUNC + inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; + + // put this as separate enum value to work around possible GCC 4.3 bug (?) + enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) + : ColsAtCompileTime==1 ? Vertical : Horizontal }; + typedef Homogeneous HomogeneousReturnType; + EIGEN_DEVICE_FUNC + inline HomogeneousReturnType homogeneous() const; + + enum { + SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 + }; + typedef Block::ColsAtCompileTime==1 ? SizeMinusOne : 1, + internal::traits::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; + typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType; + EIGEN_DEVICE_FUNC + inline const HNormalizedReturnType hnormalized() const; + +////////// Householder module /////////// + + void makeHouseholderInPlace(Scalar& tau, RealScalar& beta); + template + void makeHouseholder(EssentialPart& essential, + Scalar& tau, RealScalar& beta) const; + template + void applyHouseholderOnTheLeft(const EssentialPart& essential, + const Scalar& tau, + Scalar* workspace); + template + void applyHouseholderOnTheRight(const EssentialPart& essential, + const Scalar& tau, + Scalar* workspace); + +///////// Jacobi module ///////// + + template + void applyOnTheLeft(Index p, Index q, const JacobiRotation& j); + template + void applyOnTheRight(Index p, Index q, const JacobiRotation& j); + +///////// SparseCore module ///////// + + template + EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type + cwiseProduct(const SparseMatrixBase &other) const + { + return other.cwiseProduct(derived()); + } + +///////// MatrixFunctions module ///////// + + typedef typename internal::stem_function::type StemFunction; + const MatrixExponentialReturnValue exp() const; + const MatrixFunctionReturnValue matrixFunction(StemFunction f) const; + const MatrixFunctionReturnValue cosh() const; + const MatrixFunctionReturnValue sinh() const; + const MatrixFunctionReturnValue cos() const; + const MatrixFunctionReturnValue sin() const; + const MatrixSquareRootReturnValue sqrt() const; + const MatrixLogarithmReturnValue log() const; + const MatrixPowerReturnValue pow(const RealScalar& p) const; + const MatrixComplexPowerReturnValue pow(const std::complex& p) const; + + protected: + EIGEN_DEVICE_FUNC MatrixBase() : Base() {} + + private: + EIGEN_DEVICE_FUNC explicit MatrixBase(int); + EIGEN_DEVICE_FUNC MatrixBase(int,int); + template EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase&); + protected: + // mixing arrays and matrices is not legal + template Derived& operator+=(const ArrayBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} + // mixing arrays and matrices is not legal + template Derived& operator-=(const ArrayBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} +}; + + +/*************************************************************************** +* Implementation of matrix base methods +***************************************************************************/ + +/** replaces \c *this by \c *this * \a other. + * + * \returns a reference to \c *this + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline Derived& +MatrixBase::operator*=(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); + return derived(); +} + +/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=(). + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline void MatrixBase::applyOnTheRight(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); +} + +/** replaces \c *this by \a other * \c *this. + * + * Example: \include MatrixBase_applyOnTheLeft.cpp + * Output: \verbinclude MatrixBase_applyOnTheLeft.out + */ +template +template +inline void MatrixBase::applyOnTheLeft(const EigenBase &other) +{ + other.derived().applyThisOnTheLeft(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_MATRIXBASE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/NestByValue.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/NestByValue.h new file mode 100644 index 0000000..13adf07 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/NestByValue.h @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NESTBYVALUE_H +#define EIGEN_NESTBYVALUE_H + +namespace Eigen { + +namespace internal { +template +struct traits > : public traits +{}; +} + +/** \class NestByValue + * \ingroup Core_Module + * + * \brief Expression which must be nested by value + * + * \tparam ExpressionType the type of the object of which we are requiring nesting-by-value + * + * This class is the return type of MatrixBase::nestByValue() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::nestByValue() + */ +template class NestByValue + : public internal::dense_xpr_base< NestByValue >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) + + EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const + { + return m_expression.coeff(row, col); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) + { + return m_expression.const_cast_derived().coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const + { + return m_expression.coeff(index); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(Index row, Index col) const + { + return m_expression.template packet(row, col); + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(row, col, x); + } + + template + inline const PacketScalar packet(Index index) const + { + return m_expression.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(index, x); + } + + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } + + protected: + const ExpressionType m_expression; +}; + +/** \returns an expression of the temporary version of *this. + */ +template +inline const NestByValue +DenseBase::nestByValue() const +{ + return NestByValue(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_NESTBYVALUE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/NoAlias.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/NoAlias.h new file mode 100644 index 0000000..3390801 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/NoAlias.h @@ -0,0 +1,108 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NOALIAS_H +#define EIGEN_NOALIAS_H + +namespace Eigen { + +/** \class NoAlias + * \ingroup Core_Module + * + * \brief Pseudo expression providing an operator = assuming no aliasing + * + * \tparam ExpressionType the type of the object on which to do the lazy assignment + * + * This class represents an expression with special assignment operators + * assuming no aliasing between the target expression and the source expression. + * More precisely it alloas to bypass the EvalBeforeAssignBit flag of the source expression. + * It is the return type of MatrixBase::noalias() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::noalias() + */ +template class StorageBase> +class NoAlias +{ + public: + typedef typename ExpressionType::Scalar Scalar; + + explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) + { + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + return m_expression; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) + { + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); + return m_expression; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) + { + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); + return m_expression; + } + + EIGEN_DEVICE_FUNC + ExpressionType& expression() const + { + return m_expression; + } + + protected: + ExpressionType& m_expression; +}; + +/** \returns a pseudo expression of \c *this with an operator= assuming + * no aliasing between \c *this and the source expression. + * + * More precisely, noalias() allows to bypass the EvalBeforeAssignBit flag. + * Currently, even though several expressions may alias, only product + * expressions have this flag. Therefore, noalias() is only usefull when + * the source expression contains a matrix product. + * + * Here are some examples where noalias is usefull: + * \code + * D.noalias() = A * B; + * D.noalias() += A.transpose() * B; + * D.noalias() -= 2 * A * B.adjoint(); + * \endcode + * + * On the other hand the following example will lead to a \b wrong result: + * \code + * A.noalias() = A * B; + * \endcode + * because the result matrix A is also an operand of the matrix product. Therefore, + * there is no alternative than evaluating A * B in a temporary, that is the default + * behavior when you write: + * \code + * A = A * B; + * \endcode + * + * \sa class NoAlias + */ +template +NoAlias MatrixBase::noalias() +{ + return NoAlias(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_NOALIAS_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/NumTraits.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/NumTraits.h new file mode 100644 index 0000000..daf4898 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/NumTraits.h @@ -0,0 +1,248 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NUMTRAITS_H +#define EIGEN_NUMTRAITS_H + +namespace Eigen { + +namespace internal { + +// default implementation of digits10(), based on numeric_limits if specialized, +// 0 for integer types, and log10(epsilon()) otherwise. +template< typename T, + bool use_numeric_limits = std::numeric_limits::is_specialized, + bool is_integer = NumTraits::IsInteger> +struct default_digits10_impl +{ + static int run() { return std::numeric_limits::digits10; } +}; + +template +struct default_digits10_impl // Floating point +{ + static int run() { + using std::log10; + using std::ceil; + typedef typename NumTraits::Real Real; + return int(ceil(-log10(NumTraits::epsilon()))); + } +}; + +template +struct default_digits10_impl // Integer +{ + static int run() { return 0; } +}; + +} // end namespace internal + +/** \class NumTraits + * \ingroup Core_Module + * + * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen. + * + * \tparam T the numeric type at hand + * + * This class stores enums, typedefs and static methods giving information about a numeric type. + * + * The provided data consists of: + * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real, + * then \c Real is just a typedef to \a T. If \a T is \c std::complex then \c Real + * is a typedef to \a U. + * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values, + * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives + * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to + * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is + * only intended as a helper for code that needs to explicitly promote types. + * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex, Literal is defined as \c U. + * Of course, this type must be fully compatible with \a T. In doubt, just use \a T here. + * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what + * this means, just use \a T here. + * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex + * type, and to 0 otherwise. + * \li An enum value \a IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int, + * and to \c 0 otherwise. + * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed + * to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers. + * Stay vague here. No need to do architecture-specific stuff. + * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. + * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must + * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. + * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), + * it returns a \a Real instead of a \a T. + * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default + * value by the fuzzy comparison operators. + * \li highest() and lowest() functions returning the highest and lowest possible values respectively. + * \li digits10() function returning the number of decimal digits that can be represented without change. This is + * the analogue of std::numeric_limits::digits10 + * which is used as the default implementation if specialized. + */ + +template struct GenericNumTraits +{ + enum { + IsInteger = std::numeric_limits::is_integer, + IsSigned = std::numeric_limits::is_signed, + IsComplex = 0, + RequireInitialization = internal::is_arithmetic::value ? 0 : 1, + ReadCost = 1, + AddCost = 1, + MulCost = 1 + }; + + typedef T Real; + typedef typename internal::conditional< + IsInteger, + typename internal::conditional::type, + T + >::type NonInteger; + typedef T Nested; + typedef T Literal; + + EIGEN_DEVICE_FUNC + static inline Real epsilon() + { + return numext::numeric_limits::epsilon(); + } + + EIGEN_DEVICE_FUNC + static inline int digits10() + { + return internal::default_digits10_impl::run(); + } + + EIGEN_DEVICE_FUNC + static inline Real dummy_precision() + { + // make sure to override this for floating-point types + return Real(0); + } + + + EIGEN_DEVICE_FUNC + static inline T highest() { + return (numext::numeric_limits::max)(); + } + + EIGEN_DEVICE_FUNC + static inline T lowest() { + return IsInteger ? (numext::numeric_limits::min)() : (-(numext::numeric_limits::max)()); + } + + EIGEN_DEVICE_FUNC + static inline T infinity() { + return numext::numeric_limits::infinity(); + } + + EIGEN_DEVICE_FUNC + static inline T quiet_NaN() { + return numext::numeric_limits::quiet_NaN(); + } +}; + +template struct NumTraits : GenericNumTraits +{}; + +template<> struct NumTraits + : GenericNumTraits +{ + EIGEN_DEVICE_FUNC + static inline float dummy_precision() { return 1e-5f; } +}; + +template<> struct NumTraits : GenericNumTraits +{ + EIGEN_DEVICE_FUNC + static inline double dummy_precision() { return 1e-12; } +}; + +template<> struct NumTraits + : GenericNumTraits +{ + static inline long double dummy_precision() { return 1e-15l; } +}; + +template struct NumTraits > + : GenericNumTraits > +{ + typedef _Real Real; + typedef typename NumTraits<_Real>::Literal Literal; + enum { + IsComplex = 1, + RequireInitialization = NumTraits<_Real>::RequireInitialization, + ReadCost = 2 * NumTraits<_Real>::ReadCost, + AddCost = 2 * NumTraits::AddCost, + MulCost = 4 * NumTraits::MulCost + 2 * NumTraits::AddCost + }; + + EIGEN_DEVICE_FUNC + static inline Real epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC + static inline Real dummy_precision() { return NumTraits::dummy_precision(); } + EIGEN_DEVICE_FUNC + static inline int digits10() { return NumTraits::digits10(); } +}; + +template +struct NumTraits > +{ + typedef Array ArrayType; + typedef typename NumTraits::Real RealScalar; + typedef Array Real; + typedef typename NumTraits::NonInteger NonIntegerScalar; + typedef Array NonInteger; + typedef ArrayType & Nested; + typedef typename NumTraits::Literal Literal; + + enum { + IsComplex = NumTraits::IsComplex, + IsInteger = NumTraits::IsInteger, + IsSigned = NumTraits::IsSigned, + RequireInitialization = 1, + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::MulCost + }; + + EIGEN_DEVICE_FUNC + static inline RealScalar epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC + static inline RealScalar dummy_precision() { return NumTraits::dummy_precision(); } + + static inline int digits10() { return NumTraits::digits10(); } +}; + +template<> struct NumTraits + : GenericNumTraits +{ + enum { + RequireInitialization = 1, + ReadCost = HugeCost, + AddCost = HugeCost, + MulCost = HugeCost + }; + + static inline int digits10() { return 0; } + +private: + static inline std::string epsilon(); + static inline std::string dummy_precision(); + static inline std::string lowest(); + static inline std::string highest(); + static inline std::string infinity(); + static inline std::string quiet_NaN(); +}; + +// Empty specialization for void to allow template specialization based on NumTraits::Real with T==void and SFINAE. +template<> struct NumTraits {}; + +} // end namespace Eigen + +#endif // EIGEN_NUMTRAITS_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/PermutationMatrix.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/PermutationMatrix.h new file mode 100644 index 0000000..b1fb455 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/PermutationMatrix.h @@ -0,0 +1,633 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2009-2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PERMUTATIONMATRIX_H +#define EIGEN_PERMUTATIONMATRIX_H + +namespace Eigen { + +namespace internal { + +enum PermPermProduct_t {PermPermProduct}; + +} // end namespace internal + +/** \class PermutationBase + * \ingroup Core_Module + * + * \brief Base class for permutations + * + * \tparam Derived the derived class + * + * This class is the base class for all expressions representing a permutation matrix, + * internally stored as a vector of integers. + * The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix + * \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have: + * \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f] + * This convention ensures that for any two permutations \f$ \sigma, \tau \f$, we have: + * \f[ P_{\sigma\circ\tau} = P_\sigma P_\tau. \f] + * + * Permutation matrices are square and invertible. + * + * Notice that in addition to the member functions and operators listed here, there also are non-member + * operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase) + * on either side. + * + * \sa class PermutationMatrix, class PermutationWrapper + */ +template +class PermutationBase : public EigenBase +{ + typedef internal::traits Traits; + typedef EigenBase Base; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + enum { + Flags = Traits::Flags, + RowsAtCompileTime = Traits::RowsAtCompileTime, + ColsAtCompileTime = Traits::ColsAtCompileTime, + MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + }; + typedef typename Traits::StorageIndex StorageIndex; + typedef Matrix + DenseMatrixType; + typedef PermutationMatrix + PlainPermutationType; + typedef PlainPermutationType PlainObject; + using Base::derived; + typedef Inverse InverseReturnType; + typedef void Scalar; + #endif + + /** Copies the other permutation into *this */ + template + Derived& operator=(const PermutationBase& other) + { + indices() = other.indices(); + return derived(); + } + + /** Assignment from the Transpositions \a tr */ + template + Derived& operator=(const TranspositionsBase& tr) + { + setIdentity(tr.size()); + for(Index k=size()-1; k>=0; --k) + applyTranspositionOnTheRight(k,tr.coeff(k)); + return derived(); + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + Derived& operator=(const PermutationBase& other) + { + indices() = other.indices(); + return derived(); + } + #endif + + /** \returns the number of rows */ + inline Index rows() const { return Index(indices().size()); } + + /** \returns the number of columns */ + inline Index cols() const { return Index(indices().size()); } + + /** \returns the size of a side of the respective square matrix, i.e., the number of indices */ + inline Index size() const { return Index(indices().size()); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + void evalTo(MatrixBase& other) const + { + other.setZero(); + for (Index i=0; i=0 && j>=0 && i=0 && j>=0 && i + void assignTranspose(const PermutationBase& other) + { + for (Index i=0; i + void assignProduct(const Lhs& lhs, const Rhs& rhs) + { + eigen_assert(lhs.cols() == rhs.rows()); + for (Index i=0; i + inline PlainPermutationType operator*(const PermutationBase& other) const + { return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); } + + /** \returns the product of a permutation with another inverse permutation. + * + * \note \blank \note_try_to_help_rvo + */ + template + inline PlainPermutationType operator*(const InverseImpl& other) const + { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); } + + /** \returns the product of an inverse permutation with another permutation. + * + * \note \blank \note_try_to_help_rvo + */ + template friend + inline PlainPermutationType operator*(const InverseImpl& other, const PermutationBase& perm) + { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); } + + /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation. + * + * This function is O(\c n) procedure allocating a buffer of \c n booleans. + */ + Index determinant() const + { + Index res = 1; + Index n = size(); + Matrix mask(n); + mask.fill(false); + Index r = 0; + while(r < n) + { + // search for the next seed + while(r=n) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + mask.coeffRef(k0) = true; + for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k)) + { + mask.coeffRef(k) = true; + res = -res; + } + } + return res; + } + + protected: + +}; + +namespace internal { +template +struct traits > + : traits > +{ + typedef PermutationStorage StorageKind; + typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; +}; +} + +/** \class PermutationMatrix + * \ingroup Core_Module + * + * \brief Permutation matrix + * + * \tparam SizeAtCompileTime the number of rows/cols, or Dynamic + * \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. + * \tparam _StorageIndex the integer type of the indices + * + * This class represents a permutation matrix, internally stored as a vector of integers. + * + * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix + */ +template +class PermutationMatrix : public PermutationBase > +{ + typedef PermutationBase Base; + typedef internal::traits Traits; + public: + + typedef const PermutationMatrix& Nested; + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + typedef typename Traits::StorageIndex StorageIndex; + #endif + + inline PermutationMatrix() + {} + + /** Constructs an uninitialized permutation matrix of given size. + */ + explicit inline PermutationMatrix(Index size) : m_indices(size) + { + eigen_internal_assert(size <= NumTraits::highest()); + } + + /** Copy constructor. */ + template + inline PermutationMatrix(const PermutationBase& other) + : m_indices(other.indices()) {} + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** Standard copy constructor. Defined only to prevent a default copy constructor + * from hiding the other templated constructor */ + inline PermutationMatrix(const PermutationMatrix& other) : m_indices(other.indices()) {} + #endif + + /** Generic constructor from expression of the indices. The indices + * array has the meaning that the permutations sends each integer i to indices[i]. + * + * \warning It is your responsibility to check that the indices array that you passes actually + * describes a permutation, i.e., each value between 0 and n-1 occurs exactly once, where n is the + * array's size. + */ + template + explicit inline PermutationMatrix(const MatrixBase& indices) : m_indices(indices) + {} + + /** Convert the Transpositions \a tr to a permutation matrix */ + template + explicit PermutationMatrix(const TranspositionsBase& tr) + : m_indices(tr.size()) + { + *this = tr; + } + + /** Copies the other permutation into *this */ + template + PermutationMatrix& operator=(const PermutationBase& other) + { + m_indices = other.indices(); + return *this; + } + + /** Assignment from the Transpositions \a tr */ + template + PermutationMatrix& operator=(const TranspositionsBase& tr) + { + return Base::operator=(tr.derived()); + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + PermutationMatrix& operator=(const PermutationMatrix& other) + { + m_indices = other.m_indices; + return *this; + } + #endif + + /** const version of indices(). */ + const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the permutation. */ + IndicesType& indices() { return m_indices; } + + + /**** multiplication helpers to hopefully get RVO ****/ + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + PermutationMatrix(const InverseImpl& other) + : m_indices(other.derived().nestedExpression().size()) + { + eigen_internal_assert(m_indices.size() <= NumTraits::highest()); + StorageIndex end = StorageIndex(m_indices.size()); + for (StorageIndex i=0; i + PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) + : m_indices(lhs.indices().size()) + { + Base::assignProduct(lhs,rhs); + } +#endif + + protected: + + IndicesType m_indices; +}; + + +namespace internal { +template +struct traits,_PacketAccess> > + : traits > +{ + typedef PermutationStorage StorageKind; + typedef Map, _PacketAccess> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; +}; +} + +template +class Map,_PacketAccess> + : public PermutationBase,_PacketAccess> > +{ + typedef PermutationBase Base; + typedef internal::traits Traits; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + typedef typename IndicesType::Scalar StorageIndex; + #endif + + inline Map(const StorageIndex* indicesPtr) + : m_indices(indicesPtr) + {} + + inline Map(const StorageIndex* indicesPtr, Index size) + : m_indices(indicesPtr,size) + {} + + /** Copies the other permutation into *this */ + template + Map& operator=(const PermutationBase& other) + { return Base::operator=(other.derived()); } + + /** Assignment from the Transpositions \a tr */ + template + Map& operator=(const TranspositionsBase& tr) + { return Base::operator=(tr.derived()); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + Map& operator=(const Map& other) + { + m_indices = other.m_indices; + return *this; + } + #endif + + /** const version of indices(). */ + const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the permutation. */ + IndicesType& indices() { return m_indices; } + + protected: + + IndicesType m_indices; +}; + +template class TranspositionsWrapper; +namespace internal { +template +struct traits > +{ + typedef PermutationStorage StorageKind; + typedef void Scalar; + typedef typename _IndicesType::Scalar StorageIndex; + typedef _IndicesType IndicesType; + enum { + RowsAtCompileTime = _IndicesType::SizeAtCompileTime, + ColsAtCompileTime = _IndicesType::SizeAtCompileTime, + MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + Flags = 0 + }; +}; +} + +/** \class PermutationWrapper + * \ingroup Core_Module + * + * \brief Class to view a vector of integers as a permutation matrix + * + * \tparam _IndicesType the type of the vector of integer (can be any compatible expression) + * + * This class allows to view any vector expression of integers as a permutation matrix. + * + * \sa class PermutationBase, class PermutationMatrix + */ +template +class PermutationWrapper : public PermutationBase > +{ + typedef PermutationBase Base; + typedef internal::traits Traits; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + #endif + + inline PermutationWrapper(const IndicesType& indices) + : m_indices(indices) + {} + + /** const version of indices(). */ + const typename internal::remove_all::type& + indices() const { return m_indices; } + + protected: + + typename IndicesType::Nested m_indices; +}; + + +/** \returns the matrix with the permutation applied to the columns. + */ +template +EIGEN_DEVICE_FUNC +const Product +operator*(const MatrixBase &matrix, + const PermutationBase& permutation) +{ + return Product + (matrix.derived(), permutation.derived()); +} + +/** \returns the matrix with the permutation applied to the rows. + */ +template +EIGEN_DEVICE_FUNC +const Product +operator*(const PermutationBase &permutation, + const MatrixBase& matrix) +{ + return Product + (permutation.derived(), matrix.derived()); +} + + +template +class InverseImpl + : public EigenBase > +{ + typedef typename PermutationType::PlainPermutationType PlainPermutationType; + typedef internal::traits PermTraits; + protected: + InverseImpl() {} + public: + typedef Inverse InverseType; + using EigenBase >::derived; + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename PermutationType::DenseMatrixType DenseMatrixType; + enum { + RowsAtCompileTime = PermTraits::RowsAtCompileTime, + ColsAtCompileTime = PermTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime + }; + #endif + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + void evalTo(MatrixBase& other) const + { + other.setZero(); + for (Index i=0; i friend + const Product + operator*(const MatrixBase& matrix, const InverseType& trPerm) + { + return Product(matrix.derived(), trPerm.derived()); + } + + /** \returns the matrix with the inverse permutation applied to the rows. + */ + template + const Product + operator*(const MatrixBase& matrix) const + { + return Product(derived(), matrix.derived()); + } +}; + +template +const PermutationWrapper MatrixBase::asPermutation() const +{ + return derived(); +} + +namespace internal { + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/PlainObjectBase.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/PlainObjectBase.h new file mode 100644 index 0000000..77f4f60 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/PlainObjectBase.h @@ -0,0 +1,1031 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSESTORAGEBASE_H +#define EIGEN_DENSESTORAGEBASE_H + +#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO) +# define EIGEN_INITIALIZE_COEFFS +# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i::quiet_NaN(); +#else +# undef EIGEN_INITIALIZE_COEFFS +# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED +#endif + +namespace Eigen { + +namespace internal { + +template struct check_rows_cols_for_overflow { + template + EIGEN_DEVICE_FUNC + static EIGEN_ALWAYS_INLINE void run(Index, Index) + { + } +}; + +template<> struct check_rows_cols_for_overflow { + template + EIGEN_DEVICE_FUNC + static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols) + { + // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242 + // we assume Index is signed + Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed + bool error = (rows == 0 || cols == 0) ? false + : (rows > max_index / cols); + if (error) + throw_std_bad_alloc(); + } +}; + +template +struct conservative_resize_like_impl; + +template struct matrix_swap_impl; + +} // end namespace internal + +#ifdef EIGEN_PARSED_BY_DOXYGEN +namespace doxygen { + +// This is a workaround to doxygen not being able to understand the inheritance logic +// when it is hidden by the dense_xpr_base helper struct. +// Moreover, doxygen fails to include members that are not documented in the declaration body of +// MatrixBase if we inherits MatrixBase >, +// this is why we simply inherits MatrixBase, though this does not make sense. + +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template struct dense_xpr_base_dispatcher; +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template +struct dense_xpr_base_dispatcher > + : public MatrixBase {}; +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template +struct dense_xpr_base_dispatcher > + : public ArrayBase {}; + +} // namespace doxygen + +/** \class PlainObjectBase + * \ingroup Core_Module + * \brief %Dense storage base class for matrices and arrays. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. + * + * \tparam Derived is the derived type, e.g., a Matrix or Array + * + * \sa \ref TopicClassHierarchy + */ +template +class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher +#else +template +class PlainObjectBase : public internal::dense_xpr_base::type +#endif +{ + public: + enum { Options = internal::traits::Options }; + typedef typename internal::dense_xpr_base::type Base; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + typedef Derived DenseType; + + using Base::RowsAtCompileTime; + using Base::ColsAtCompileTime; + using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + + template friend class Eigen::Map; + friend class Eigen::Map; + typedef Eigen::Map MapType; + friend class Eigen::Map; + typedef const Eigen::Map ConstMapType; +#if EIGEN_MAX_ALIGN_BYTES>0 + // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice. + friend class Eigen::Map; + friend class Eigen::Map; +#endif + typedef Eigen::Map AlignedMapType; + typedef const Eigen::Map ConstAlignedMapType; + template struct StridedMapType { typedef Eigen::Map type; }; + template struct StridedConstMapType { typedef Eigen::Map type; }; + template struct StridedAlignedMapType { typedef Eigen::Map type; }; + template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; + + protected: + DenseStorage m_storage; + + public: + enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits::Alignment>0) }; + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) + + EIGEN_DEVICE_FUNC + Base& base() { return *static_cast(this); } + EIGEN_DEVICE_FUNC + const Base& base() const { return *static_cast(this); } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); } + + /** This is an overloaded version of DenseCoeffsBase::coeff(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const + { + if(Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + /** This is an overloaded version of DenseCoeffsBase::coeff(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const + { + return m_storage.data()[index]; + } + + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index,Index) const for details. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId) + { + if(Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index) const for details. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + return m_storage.data()[index]; + } + + /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index). + * It is provided for convenience. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const + { + if(Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index). + * It is provided for convenience. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const + { + return m_storage.data()[index]; + } + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const + { + return internal::ploadt + (m_storage.data() + (Flags & RowMajorBit + ? colId + rowId * m_storage.cols() + : rowId + colId * m_storage.rows())); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index index) const + { + return internal::ploadt(m_storage.data() + index); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val) + { + internal::pstoret + (m_storage.data() + (Flags & RowMajorBit + ? colId + rowId * m_storage.cols() + : rowId + colId * m_storage.rows()), val); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val) + { + internal::pstoret(m_storage.data() + index, val); + } + + /** \returns a const pointer to the data array of this matrix */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const + { return m_storage.data(); } + + /** \returns a pointer to the data array of this matrix */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() + { return m_storage.data(); } + + /** Resizes \c *this to a \a rows x \a cols matrix. + * + * This method is intended for dynamic-size matrices, although it is legal to call it on any + * matrix as long as fixed dimensions are left unchanged. If you only want to change the number + * of rows and/or of columns, you can use resize(NoChange_t, Index), resize(Index, NoChange_t). + * + * If the current number of coefficients of \c *this exactly matches the + * product \a rows * \a cols, then no memory allocation is performed and + * the current values are left unchanged. In all other cases, including + * shrinking, the data is reallocated and all previous values are lost. + * + * Example: \include Matrix_resize_int_int.cpp + * Output: \verbinclude Matrix_resize_int_int.out + * + * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void resize(Index rows, Index cols) + { + eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime) + && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime) + && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array."); + internal::check_rows_cols_for_overflow::run(rows, cols); + #ifdef EIGEN_INITIALIZE_COEFFS + Index size = rows*cols; + bool size_changed = size != this->size(); + m_storage.resize(size, rows, cols); + if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + #else + m_storage.resize(rows*cols, rows, cols); + #endif + } + + /** Resizes \c *this to a vector of length \a size + * + * \only_for_vectors. This method does not work for + * partially dynamic matrices when the static dimension is anything other + * than 1. For example it will not work with Matrix. + * + * Example: \include Matrix_resize_int.cpp + * Output: \verbinclude Matrix_resize_int.out + * + * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t) + */ + EIGEN_DEVICE_FUNC + inline void resize(Index size) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase) + eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0); + #ifdef EIGEN_INITIALIZE_COEFFS + bool size_changed = size != this->size(); + #endif + if(RowsAtCompileTime == 1) + m_storage.resize(size, 1, size); + else + m_storage.resize(size, size, 1); + #ifdef EIGEN_INITIALIZE_COEFFS + if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + #endif + } + + /** Resizes the matrix, changing only the number of columns. For the parameter of type NoChange_t, just pass the special value \c NoChange + * as in the example below. + * + * Example: \include Matrix_resize_NoChange_int.cpp + * Output: \verbinclude Matrix_resize_NoChange_int.out + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC + inline void resize(NoChange_t, Index cols) + { + resize(rows(), cols); + } + + /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange + * as in the example below. + * + * Example: \include Matrix_resize_int_NoChange.cpp + * Output: \verbinclude Matrix_resize_int_NoChange.out + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC + inline void resize(Index rows, NoChange_t) + { + resize(rows, cols()); + } + + /** Resizes \c *this to have the same dimensions as \a other. + * Takes care of doing all the checking that's needed. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) + { + const OtherDerived& other = _other.derived(); + internal::check_rows_cols_for_overflow::run(other.rows(), other.cols()); + const Index othersize = other.rows()*other.cols(); + if(RowsAtCompileTime == 1) + { + eigen_assert(other.rows() == 1 || other.cols() == 1); + resize(1, othersize); + } + else if(ColsAtCompileTime == 1) + { + eigen_assert(other.rows() == 1 || other.cols() == 1); + resize(othersize, 1); + } + else resize(other.rows(), other.cols()); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * The method is intended for matrices of dynamic size. If you only want to change the number + * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or + * conservativeResize(Index, NoChange_t). + * + * Matrices are resized relative to the top-left element. In case values need to be + * appended to the matrix they will be uninitialized. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols) + { + internal::conservative_resize_like_impl::run(*this, rows, cols); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * As opposed to conservativeResize(Index rows, Index cols), this version leaves + * the number of columns unchanged. + * + * In case the matrix is growing, new rows will be uninitialized. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t) + { + // Note: see the comment in conservativeResize(Index,Index) + conservativeResize(rows, cols()); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * As opposed to conservativeResize(Index rows, Index cols), this version leaves + * the number of rows unchanged. + * + * In case the matrix is growing, new columns will be uninitialized. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols) + { + // Note: see the comment in conservativeResize(Index,Index) + conservativeResize(rows(), cols); + } + + /** Resizes the vector to \a size while retaining old values. + * + * \only_for_vectors. This method does not work for + * partially dynamic matrices when the static dimension is anything other + * than 1. For example it will not work with Matrix. + * + * When values are appended, they will be uninitialized. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index size) + { + internal::conservative_resize_like_impl::run(*this, size); + } + + /** Resizes the matrix to \a rows x \a cols of \c other, while leaving old values untouched. + * + * The method is intended for matrices of dynamic size. If you only want to change the number + * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or + * conservativeResize(Index, NoChange_t). + * + * Matrices are resized relative to the top-left element. In case values need to be + * appended to the matrix they will copied from \c other. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase& other) + { + internal::conservative_resize_like_impl::run(*this, other); + } + + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other) + { + return _set(other); + } + + /** \sa MatrixBase::lazyAssign() */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase& other) + { + _resize_to_match(other); + return Base::lazyAssign(other.derived()); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue& func) + { + resize(func.rows(), func.cols()); + return Base::operator=(func); + } + + // Prevent user from trying to instantiate PlainObjectBase objects + // by making all its constructor protected. See bug 1074. + protected: + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() + { +// _check_template_params(); +// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + // FIXME is it still needed ? + /** \internal */ + EIGEN_DEVICE_FUNC + explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) + : m_storage(internal::constructor_without_unaligned_array_assert()) + { +// _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } +#endif + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT + : m_storage( std::move(other.m_storage) ) + { + } + + EIGEN_DEVICE_FUNC + PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT + { + using std::swap; + swap(m_storage, other.m_storage); + return *this; + } +#endif + + /** Copy constructor */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other) + : Base(), m_storage(other.m_storage) { } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) + : m_storage(size, rows, cols) + { +// _check_template_params(); +// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + /** \sa PlainObjectBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase &other) + : m_storage() + { + _check_template_params(); + resizeLike(other); + _set_noalias(other); + } + + /** \sa PlainObjectBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase &other) + : m_storage() + { + _check_template_params(); + resizeLike(other); + *this = other.derived(); + } + /** \brief Copy constructor with in-place evaluation */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue& other) + { + _check_template_params(); + // FIXME this does not automatically transpose vectors if necessary + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + + public: + + /** \brief Copies the generic expression \a other into *this. + * \copydetails DenseBase::operator=(const EigenBase &other) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) + { + _resize_to_match(other); + Base::operator=(other.derived()); + return this->derived(); + } + + /** \name Map + * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects, + * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned + * \a data pointers. + * + * \see class Map + */ + //@{ + static inline ConstMapType Map(const Scalar* data) + { return ConstMapType(data); } + static inline MapType Map(Scalar* data) + { return MapType(data); } + static inline ConstMapType Map(const Scalar* data, Index size) + { return ConstMapType(data, size); } + static inline MapType Map(Scalar* data, Index size) + { return MapType(data, size); } + static inline ConstMapType Map(const Scalar* data, Index rows, Index cols) + { return ConstMapType(data, rows, cols); } + static inline MapType Map(Scalar* data, Index rows, Index cols) + { return MapType(data, rows, cols); } + + static inline ConstAlignedMapType MapAligned(const Scalar* data) + { return ConstAlignedMapType(data); } + static inline AlignedMapType MapAligned(Scalar* data) + { return AlignedMapType(data); } + static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size) + { return ConstAlignedMapType(data, size); } + static inline AlignedMapType MapAligned(Scalar* data, Index size) + { return AlignedMapType(data, size); } + static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols) + { return ConstAlignedMapType(data, rows, cols); } + static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols) + { return AlignedMapType(data, rows, cols); } + + template + static inline typename StridedConstMapType >::type Map(const Scalar* data, const Stride& stride) + { return typename StridedConstMapType >::type(data, stride); } + template + static inline typename StridedMapType >::type Map(Scalar* data, const Stride& stride) + { return typename StridedMapType >::type(data, stride); } + template + static inline typename StridedConstMapType >::type Map(const Scalar* data, Index size, const Stride& stride) + { return typename StridedConstMapType >::type(data, size, stride); } + template + static inline typename StridedMapType >::type Map(Scalar* data, Index size, const Stride& stride) + { return typename StridedMapType >::type(data, size, stride); } + template + static inline typename StridedConstMapType >::type Map(const Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedConstMapType >::type(data, rows, cols, stride); } + template + static inline typename StridedMapType >::type Map(Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedMapType >::type(data, rows, cols, stride); } + + template + static inline typename StridedConstAlignedMapType >::type MapAligned(const Scalar* data, const Stride& stride) + { return typename StridedConstAlignedMapType >::type(data, stride); } + template + static inline typename StridedAlignedMapType >::type MapAligned(Scalar* data, const Stride& stride) + { return typename StridedAlignedMapType >::type(data, stride); } + template + static inline typename StridedConstAlignedMapType >::type MapAligned(const Scalar* data, Index size, const Stride& stride) + { return typename StridedConstAlignedMapType >::type(data, size, stride); } + template + static inline typename StridedAlignedMapType >::type MapAligned(Scalar* data, Index size, const Stride& stride) + { return typename StridedAlignedMapType >::type(data, size, stride); } + template + static inline typename StridedConstAlignedMapType >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedConstAlignedMapType >::type(data, rows, cols, stride); } + template + static inline typename StridedAlignedMapType >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedAlignedMapType >::type(data, rows, cols, stride); } + //@} + + using Base::setConstant; + EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val); + EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val); + + using Base::setZero; + EIGEN_DEVICE_FUNC Derived& setZero(Index size); + EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols); + + using Base::setOnes; + EIGEN_DEVICE_FUNC Derived& setOnes(Index size); + EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols); + + using Base::setRandom; + Derived& setRandom(Index size); + Derived& setRandom(Index rows, Index cols); + + #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN + #include EIGEN_PLAINOBJECTBASE_PLUGIN + #endif + + protected: + /** \internal Resizes *this in preparation for assigning \a other to it. + * Takes care of doing all the checking that's needed. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase& other) + { + #ifdef EIGEN_NO_AUTOMATIC_RESIZING + eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size()) + : (rows() == other.rows() && cols() == other.cols()))) + && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + EIGEN_ONLY_USED_FOR_DEBUG(other); + #else + resizeLike(other); + #endif + } + + /** + * \brief Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + * + * \sa operator=(const MatrixBase&), _set_noalias() + * + * \internal + */ + // aliasing is dealt once in internall::call_assignment + // so at this stage we have to assume aliasing... and resising has to be done later. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& _set(const DenseBase& other) + { + internal::call_assignment(this->derived(), other.derived()); + return this->derived(); + } + + /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which + * is the case when creating a new matrix) so one can enforce lazy evaluation. + * + * \sa operator=(const MatrixBase&), _set() + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase& other) + { + // I don't think we need this resize call since the lazyAssign will anyways resize + // and lazyAssign will be called by the assign selector. + //_resize_to_match(other); + // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because + // it wouldn't allow to copy a row-vector into a column-vector. + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + return this->derived(); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if::type* = 0) + { + EIGEN_STATIC_ASSERT(bool(NumTraits::IsInteger) && + bool(NumTraits::IsInteger), + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + resize(rows,cols); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==2,T1>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } + + // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array, + // then the argument is meant to be the size of the object. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible::value) + && ((!internal::is_same::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0) + { + // NOTE MSVC 2008 complains if we directly put bool(NumTraits::IsInteger) as the EIGEN_STATIC_ASSERT argument. + const bool is_integer = NumTraits::IsInteger; + EIGEN_UNUSED_VARIABLE(is_integer); + EIGEN_STATIC_ASSERT(is_integer, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + resize(size); + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if::value,T>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = val0; + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==1 + && internal::is_convertible::value,T*>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = Scalar(val0); + } + + // Initialize a fixed size matrix from a pointer to raw data + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar* data){ + this->_set_noalias(ConstMapType(data)); + } + + // Initialize an arbitrary matrix from a dense expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const DenseBase& other){ + this->_set_noalias(other); + } + + // Initialize an arbitrary matrix from an object convertible to the Derived type. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Derived& other){ + this->_set_noalias(other); + } + + // Initialize an arbitrary matrix from a generic Eigen expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const EigenBase& other){ + this->derived() = other; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const ReturnByValue& other) + { + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const RotationBase& r) + { + this->derived() = r; + } + + // For fixed-size Array + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, + typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T>::type* = 0) + { + Base::setConstant(val0); + } + + // For fixed-size Array + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T*>::type* = 0) + { + Base::setConstant(val0); + } + + template + friend struct internal::matrix_swap_impl; + + public: + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal + * \brief Override DenseBase::swap() since for dynamic-sized matrices + * of same type it is enough to swap the data pointers. + */ + template + EIGEN_DEVICE_FUNC + void swap(DenseBase & other) + { + enum { SwapPointers = internal::is_same::value && Base::SizeAtCompileTime==Dynamic }; + internal::matrix_swap_impl::run(this->derived(), other.derived()); + } + + /** \internal + * \brief const version forwarded to DenseBase::swap + */ + template + EIGEN_DEVICE_FUNC + void swap(DenseBase const & other) + { Base::swap(other.derived()); } + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void _check_template_params() + { + EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor) + && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0) + && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0)) + && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0)) + && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0)) + && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0)) + && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic) + && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic) + && (Options & (DontAlign|RowMajor)) == Options), + INVALID_MATRIX_TEMPLATE_PARAMETERS) + } + + enum { IsPlainObjectBase = 1 }; +#endif +}; + +namespace internal { + +template +struct conservative_resize_like_impl +{ + static void run(DenseBase& _this, Index rows, Index cols) + { + if (_this.rows() == rows && _this.cols() == cols) return; + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) + + if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns + { + internal::check_rows_cols_for_overflow::run(rows, cols); + _this.derived().m_storage.conservativeResize(rows*cols,rows,cols); + } + else + { + // The storage order does not allow us to use reallocation. + typename Derived::PlainObject tmp(rows,cols); + const Index common_rows = numext::mini(rows, _this.rows()); + const Index common_cols = numext::mini(cols, _this.cols()); + tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); + _this.derived().swap(tmp); + } + } + + static void run(DenseBase& _this, const DenseBase& other) + { + if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; + + // Note: Here is space for improvement. Basically, for conservativeResize(Index,Index), + // neither RowsAtCompileTime or ColsAtCompileTime must be Dynamic. If only one of the + // dimensions is dynamic, one could use either conservativeResize(Index rows, NoChange_t) or + // conservativeResize(NoChange_t, Index cols). For these methods new static asserts like + // EIGEN_STATIC_ASSERT_DYNAMIC_ROWS and EIGEN_STATIC_ASSERT_DYNAMIC_COLS would be good. + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived) + + if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && _this.rows() == other.rows()) ) // column-major and we change only the number of columns + { + const Index new_rows = other.rows() - _this.rows(); + const Index new_cols = other.cols() - _this.cols(); + _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols()); + if (new_rows>0) + _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows); + else if (new_cols>0) + _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols); + } + else + { + // The storage order does not allow us to use reallocation. + typename Derived::PlainObject tmp(other); + const Index common_rows = numext::mini(tmp.rows(), _this.rows()); + const Index common_cols = numext::mini(tmp.cols(), _this.cols()); + tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); + _this.derived().swap(tmp); + } + } +}; + +// Here, the specialization for vectors inherits from the general matrix case +// to allow calling .conservativeResize(rows,cols) on vectors. +template +struct conservative_resize_like_impl + : conservative_resize_like_impl +{ + using conservative_resize_like_impl::run; + + static void run(DenseBase& _this, Index size) + { + const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size; + const Index new_cols = Derived::RowsAtCompileTime==1 ? size : 1; + _this.derived().m_storage.conservativeResize(size,new_rows,new_cols); + } + + static void run(DenseBase& _this, const DenseBase& other) + { + if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; + + const Index num_new_elements = other.size() - _this.size(); + + const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows(); + const Index new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1; + _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols); + + if (num_new_elements > 0) + _this.tail(num_new_elements) = other.tail(num_new_elements); + } +}; + +template +struct matrix_swap_impl +{ + EIGEN_DEVICE_FUNC + static inline void run(MatrixTypeA& a, MatrixTypeB& b) + { + a.base().swap(b); + } +}; + +template +struct matrix_swap_impl +{ + EIGEN_DEVICE_FUNC + static inline void run(MatrixTypeA& a, MatrixTypeB& b) + { + static_cast(a).m_storage.swap(static_cast(b).m_storage); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DENSESTORAGEBASE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Product.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Product.h new file mode 100644 index 0000000..ae0c94b --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Product.h @@ -0,0 +1,186 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PRODUCT_H +#define EIGEN_PRODUCT_H + +namespace Eigen { + +template class ProductImpl; + +namespace internal { + +template +struct traits > +{ + typedef typename remove_all::type LhsCleaned; + typedef typename remove_all::type RhsCleaned; + typedef traits LhsTraits; + typedef traits RhsTraits; + + typedef MatrixXpr XprKind; + + typedef typename ScalarBinaryOpTraits::Scalar, typename traits::Scalar>::ReturnType Scalar; + typedef typename product_promote_storage_type::ret>::ret StorageKind; + typedef typename promote_index_type::type StorageIndex; + + enum { + RowsAtCompileTime = LhsTraits::RowsAtCompileTime, + ColsAtCompileTime = RhsTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime, + + // FIXME: only needed by GeneralMatrixMatrixTriangular + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime), + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit)) + || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit + : NoPreferredStorageOrderBit + }; +}; + +} // end namespace internal + +/** \class Product + * \ingroup Core_Module + * + * \brief Expression of the product of two arbitrary matrices or vectors + * + * \tparam _Lhs the type of the left-hand side expression + * \tparam _Rhs the type of the right-hand side expression + * + * This class represents an expression of the product of two arbitrary matrices. + * + * The other template parameters are: + * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct + * + */ +template +class Product : public ProductImpl<_Lhs,_Rhs,Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits<_Rhs>::StorageKind, + internal::product_type<_Lhs,_Rhs>::ret>::ret> +{ + public: + + typedef _Lhs Lhs; + typedef _Rhs Rhs; + + typedef typename ProductImpl< + Lhs, Rhs, Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + internal::product_type::ret>::ret>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Product) + + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) + { + eigen_assert(lhs.cols() == rhs.rows() + && "invalid matrix product" + && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } + + EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } + + protected: + + LhsNested m_lhs; + RhsNested m_rhs; +}; + +namespace internal { + +template::ret> +class dense_product_base + : public internal::dense_xpr_base >::type +{}; + +/** Convertion to scalar for inner-products */ +template +class dense_product_base + : public internal::dense_xpr_base >::type +{ + typedef Product ProductXpr; + typedef typename internal::dense_xpr_base::type Base; +public: + using Base::derived; + typedef typename Base::Scalar Scalar; + + operator const Scalar() const + { + return internal::evaluator(derived()).coeff(0,0); + } +}; + +} // namespace internal + +// Generic API dispatcher +template +class ProductImpl : public internal::generic_xpr_base, MatrixXpr, StorageKind>::type +{ + public: + typedef typename internal::generic_xpr_base, MatrixXpr, StorageKind>::type Base; +}; + +template +class ProductImpl + : public internal::dense_product_base +{ + typedef Product Derived; + + public: + + typedef typename internal::dense_product_base Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + protected: + enum { + IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && + (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic), + EnableCoeff = IsOneByOne || Option==LazyProduct + }; + + public: + + EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(row,col); + } + + EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(i); + } + + +}; + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ProductEvaluators.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ProductEvaluators.h new file mode 100644 index 0000000..c42725d --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ProductEvaluators.h @@ -0,0 +1,1105 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2011 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_PRODUCTEVALUATORS_H +#define EIGEN_PRODUCTEVALUATORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * Evaluator of a product expression. + * Since products require special treatments to handle all possible cases, + * we simply deffer the evaluation logic to a product_evaluator class + * which offers more partial specialization possibilities. + * + * \sa class product_evaluator + */ +template +struct evaluator > + : public product_evaluator > +{ + typedef Product XprType; + typedef product_evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" +// TODO we should apply that rule only if that's really helpful +template +struct evaluator_assume_aliasing, + const CwiseNullaryOp, Plain1>, + const Product > > +{ + static const bool value = true; +}; +template +struct evaluator, + const CwiseNullaryOp, Plain1>, + const Product > > + : public evaluator > +{ + typedef CwiseBinaryOp, + const CwiseNullaryOp, Plain1>, + const Product > XprType; + typedef evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) + {} +}; + + +template +struct evaluator, DiagIndex> > + : public evaluator, DiagIndex> > +{ + typedef Diagonal, DiagIndex> XprType; + typedef evaluator, DiagIndex> > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(Diagonal, DiagIndex>( + Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), + xpr.index() )) + {} +}; + + +// Helper class to perform a matrix product with the destination at hand. +// Depending on the sizes of the factors, there are different evaluation strategies +// as controlled by internal::product_type. +template< typename Lhs, typename Rhs, + typename LhsShape = typename evaluator_traits::Shape, + typename RhsShape = typename evaluator_traits::Shape, + int ProductType = internal::product_type::value> +struct generic_product_impl; + +template +struct evaluator_assume_aliasing > { + static const bool value = true; +}; + +// This is the default evaluator implementation for products: +// It creates a temporary and call generic_product_impl +template +struct product_evaluator, ProductTag, LhsShape, RhsShape> + : public evaluator::PlainObject> +{ + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + +// FIXME shall we handle nested_eval here?, +// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) +// typedef typename internal::nested_eval::type LhsNested; +// typedef typename internal::nested_eval::type RhsNested; +// typedef typename internal::remove_all::type LhsNestedCleaned; +// typedef typename internal::remove_all::type RhsNestedCleaned; +// +// const LhsNested lhs(xpr.lhs()); +// const RhsNested rhs(xpr.rhs()); +// +// generic_product_impl::evalTo(m_result, lhs, rhs); + + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +// The following three shortcuts are enabled only if the scalar types match excatly. +// TODO: we could enable them for different scalar types when the product is not vectorized. + +// Dense = Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> +{ + typedef Product SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + // FIXME shall we handle nested_eval here? + generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense += Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::add_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> +{ + typedef Product SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + // FIXME shall we handle nested_eval here? + generic_product_impl::addTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense -= Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::sub_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> +{ + typedef Product SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + // FIXME shall we handle nested_eval here? + generic_product_impl::subTo(dst, src.lhs(), src.rhs()); + } +}; + + +// Dense ?= scalar * Product +// TODO we should apply that rule if that's really helpful +// for instance, this is not good for inner products +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> +struct Assignment, const CwiseNullaryOp,Plain>, + const Product >, AssignFunc, Dense2Dense> +{ + typedef CwiseBinaryOp, + const CwiseNullaryOp,Plain>, + const Product > SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) + { + call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); + } +}; + +//---------------------------------------- +// Catch "Dense ?= xpr + Product<>" expression to save one temporary +// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct + +template +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, + const Product >, DenseShape > { + static const bool value = true; +}; + +template +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, + const Product >, DenseShape > { + static const bool value = true; +}; + +template +struct assignment_from_xpr_op_product +{ + template + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) + { + call_assignment_no_alias(dst, src.lhs(), Func1()); + call_assignment_no_alias(dst, src.rhs(), Func2()); + } +}; + +#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \ + template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \ + struct Assignment, const OtherXpr, \ + const Product >, internal::ASSIGN_OP, Dense2Dense> \ + : assignment_from_xpr_op_product, internal::ASSIGN_OP, internal::ASSIGN_OP2 > \ + {} + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op); + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op); + +//---------------------------------------- + +template +struct generic_product_impl +{ + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } +}; + + +/*********************************************************************** +* Implementation of outer dense * dense vector product +***********************************************************************/ + +// Column major result +template +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +{ + evaluator rhsEval(rhs); + typename nested_eval::type actual_lhs(lhs); + // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + const Index cols = dst.cols(); + for (Index j=0; j +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) +{ + evaluator lhsEval(lhs); + typename nested_eval::type actual_rhs(rhs); + // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + const Index rows = dst.rows(); + for (Index i=0; i +struct generic_product_impl +{ + template struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; + typedef typename Product::Scalar Scalar; + + // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose + struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; + struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; + struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; + struct adds { + Scalar m_scale; + explicit adds(const Scalar& s) : m_scale(s) {} + template void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += m_scale * src; + } + }; + + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); + } + + template + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); + } + + template + static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); + } + +}; + + +// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo +template +struct generic_product_impl_base +{ + typedef typename Product::Scalar Scalar; + + template + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } + + template + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } + + template + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } + + template + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } + +}; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; + typedef typename Product::Scalar Scalar; + enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; + typedef typename internal::remove_all::type>::type MatrixType; + + template + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + LhsNested actual_lhs(lhs); + RhsNested actual_rhs(rhs); + internal::gemv_dense_selector::HasUsableDirectAccess) + >::run(actual_lhs, actual_rhs, dst, alpha); + } +}; + +template +struct generic_product_impl +{ + typedef typename Product::Scalar Scalar; + + template + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // Same as: dst.noalias() = lhs.lazyProduct(rhs); + // but easier on the compiler side + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + } + + template + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() += lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + } + + template + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() -= lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + } + +// template +// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) +// { dst.noalias() += alpha * lhs.lazyProduct(rhs); } +}; + +// This specialization enforces the use of a coefficient-based evaluation strategy +template +struct generic_product_impl + : generic_product_impl {}; + +// Case 2: Evaluate coeff by coeff +// +// This is mostly taken from CoeffBasedProduct.h +// The main difference is that we add an extra argument to the etor_product_*_impl::run() function +// for the inner dimension of the product, because evaluator object do not know their size. + +template +struct etor_product_coeff_impl; + +template +struct etor_product_packet_impl; + +template +struct product_evaluator, ProductTag, DenseShape, DenseShape> + : evaluator_base > +{ + typedef Product XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit product_evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! + m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, + // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. + m_innerDim(xpr.lhs().cols()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); +#if 0 + std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n"; + std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n"; + std::cerr << "LhsAlignment= " << LhsAlignment << "\n"; + std::cerr << "RhsAlignment= " << RhsAlignment << "\n"; + std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n"; + std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n"; + std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n"; + std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n"; + std::cerr << "Alignment= " << Alignment << "\n"; + std::cerr << "Flags= " << Flags << "\n"; +#endif + } + + // Everything below here is taken from CoeffBasedProduct.h + + typedef typename internal::nested_eval::type LhsNested; + typedef typename internal::nested_eval::type RhsNested; + + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + typedef evaluator LhsEtorType; + typedef evaluator RhsEtorType; + + enum { + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime + }; + + typedef typename find_best_packet::type LhsVecPacketType; + typedef typename find_best_packet::type RhsVecPacketType; + + enum { + + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, + RhsCoeffReadCost = RhsEtorType::CoeffReadCost, + CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost + : InnerSize == Dynamic ? HugeCost + : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits::AddCost, + + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + LhsVecPacketSize = unpacket_traits::size, + RhsVecPacketSize = unpacket_traits::size, + + // Here, we don't care about alignment larger than the usable packet size. + LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))), + RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))), + + SameType = is_same::value, + + CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (bool(RhsRowMajor) && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) + | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), + + LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), + RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), + + Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) + : 0, + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (InnerSize % packet_traits::size == 0) + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const + { + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + * TODO: this seems possible when the result is a vector + */ + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const + { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + template + const PacketType packet(Index row, Index col) const + { + PacketType res; + typedef etor_product_packet_impl PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + template + const PacketType packet(Index index) const + { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; + return packet(row,col); + } + +protected: + typename internal::add_const_on_value_type::type m_lhs; + typename internal::add_const_on_value_type::type m_rhs; + + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; + + // TODO: Get rid of m_innerDim if known at compile time + Index m_innerDim; +}; + +template +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape> + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape> +{ + typedef Product XprType; + typedef Product BaseProduct; + typedef product_evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(BaseProduct(xpr.lhs(),xpr.rhs())) + {} +}; + +/**************************************** +*** Coeff based product, Packet path *** +****************************************/ + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(pset1(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet(Index(UnrollingIndex-1), col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(lhs.template packet(row, Index(UnrollingIndex-1)), pset1(rhs.coeff(Index(UnrollingIndex-1), col)), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(pset1(lhs.coeff(row, Index(0))),rhs.template packet(Index(0), col)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(lhs.template packet(row, Index(0)), pset1(rhs.coeff(Index(0), col))); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(typename unpacket_traits::type(0)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(typename unpacket_traits::type(0)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(typename unpacket_traits::type(0)); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(typename unpacket_traits::type(0)); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); + } +}; + + +/*************************************************************************** +* Triangular products +***************************************************************************/ +template +struct triangular_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl + ::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* SelfAdjoint products +***************************************************************************/ +template +struct selfadjoint_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* Diagonal products +***************************************************************************/ + +template +struct diagonal_product_evaluator_base + : evaluator_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; +public: + enum { + CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, + + MatrixFlags = evaluator::Flags, + DiagFlags = evaluator::Flags, + _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), + Alignment = evaluator::Alignment + }; + + diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) + : m_diagImpl(diag), m_matImpl(mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + { + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + } + +protected: + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const + { + return internal::pmul(m_matImpl.template packet(row, col), + internal::pset1(m_diagImpl.coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const + { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + }; + return internal::pmul(m_matImpl.template packet(row, col), + m_diagImpl.template packet(id)); + } + + evaluator m_diagImpl; + evaluator m_matImpl; +}; + +// diagonal * dense +template +struct product_evaluator, ProductTag, DiagonalShape, DenseShape> + : diagonal_product_evaluator_base, OnTheLeft> +{ + typedef diagonal_product_evaluator_base, OnTheLeft> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { + StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.rhs(), xpr.lhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. + // See also similar calls below. + return this->template packet_impl(row,col, row, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +// dense * diagonal +template +struct product_evaluator, ProductTag, DenseShape, DiagonalShape> + : diagonal_product_evaluator_base, OnTheRight> +{ + typedef diagonal_product_evaluator_base, OnTheRight> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.lhs(), xpr.rhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + return this->template packet_impl(row,col, col, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +/*************************************************************************** +* Products with permutation matrices +***************************************************************************/ + +/** \internal + * \class permutation_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h + */ +template +struct permutation_matrix_product; + +template +struct permutation_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) + { + MatrixType mat(xpr); + const Index n = Side==OnTheLeft ? mat.rows() : mat.cols(); + // FIXME we need an is_same for expression that is not sensitive to constness. For instance + // is_same_xpr, Block >::value should be true. + //if(is_same::value && extract_data(dst) == extract_data(mat)) + if(is_same_dense(dst, mat)) + { + // apply the permutation inplace + Matrix mask(perm.size()); + mask.fill(false); + Index r = 0; + while(r < perm.size()) + { + // search for the next seed + while(r=perm.size()) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + Index kPrev = k0; + mask.coeffRef(k0) = true; + for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k)) + { + Block(dst, k) + .swap(Block + (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); + + mask.coeffRef(k) = true; + kPrev = k; + } + } + } + else + { + for(Index i = 0; i < n; ++i) + { + Block + (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i) + + = + + Block + (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i); + } + } + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, rhs, lhs); + } +}; + +template +struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) + { + permutation_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + + +/*************************************************************************** +* Products with transpositions matrices +***************************************************************************/ + +// FIXME could we unify Transpositions and Permutation into a single "shape"?? + +/** \internal + * \class transposition_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + */ +template +struct transposition_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) + { + MatrixType mat(xpr); + typedef typename TranspositionType::StorageIndex StorageIndex; + const Index size = tr.size(); + StorageIndex j = 0; + + if(!is_same_dense(dst,mat)) + dst = mat; + + for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, rhs, lhs); + } +}; + + +template +struct generic_product_impl, Rhs, TranspositionsShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, TranspositionsShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) + { + transposition_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_EVALUATORS_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Random.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Random.h new file mode 100644 index 0000000..6faf789 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Random.h @@ -0,0 +1,182 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RANDOM_H +#define EIGEN_RANDOM_H + +namespace Eigen { + +namespace internal { + +template struct scalar_random_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op) + inline const Scalar operator() () const { return random(); } +}; + +template +struct functor_traits > +{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false, IsRepeatable = false }; }; + +} // end namespace internal + +/** \returns a random matrix expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * \not_reentrant + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used + * instead. + * + * + * Example: \include MatrixBase_random_int_int.cpp + * Output: \verbinclude MatrixBase_random_int_int.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators. + * + * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random() + */ +template +inline const typename DenseBase::RandomReturnType +DenseBase::Random(Index rows, Index cols) +{ + return NullaryExpr(rows, cols, internal::scalar_random_op()); +} + +/** \returns a random vector expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * \not_reentrant + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Random() should be used + * instead. + * + * Example: \include MatrixBase_random_int.cpp + * Output: \verbinclude MatrixBase_random_int.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary vector whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random() + */ +template +inline const typename DenseBase::RandomReturnType +DenseBase::Random(Index size) +{ + return NullaryExpr(size, internal::scalar_random_op()); +} + +/** \returns a fixed-size random matrix or vector expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_random.cpp + * Output: \verbinclude MatrixBase_random.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * \not_reentrant + * + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index) + */ +template +inline const typename DenseBase::RandomReturnType +DenseBase::Random() +{ + return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op()); +} + +/** Sets all coefficients in this expression to random values. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * Example: \include MatrixBase_setRandom.cpp + * Output: \verbinclude MatrixBase_setRandom.out + * + * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index) + */ +template +inline Derived& DenseBase::setRandom() +{ + return *this = Random(rows(), cols()); +} + +/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \only_for_vectors + * \not_reentrant + * + * Example: \include Matrix_setRandom_int.cpp + * Output: \verbinclude Matrix_setRandom_int.out + * + * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setRandom(Index newSize) +{ + resize(newSize); + return setRandom(); +} + +/** Resizes to the given size, and sets all coefficients in this expression to random values. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setRandom_int_int.cpp + * Output: \verbinclude Matrix_setRandom_int_int.out + * + * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setRandom(Index rows, Index cols) +{ + resize(rows, cols); + return setRandom(); +} + +} // end namespace Eigen + +#endif // EIGEN_RANDOM_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Redux.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Redux.h new file mode 100644 index 0000000..b6e8f88 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Redux.h @@ -0,0 +1,505 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REDUX_H +#define EIGEN_REDUX_H + +namespace Eigen { + +namespace internal { + +// TODO +// * implement other kind of vectorization +// * factorize code + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for vectorization and unrolling +***************************************************************************/ + +template +struct redux_traits +{ +public: + typedef typename find_best_packet::type PacketType; + enum { + PacketSize = unpacket_traits::size, + InnerMaxSize = int(Derived::IsRowMajor) + ? Derived::MaxColsAtCompileTime + : Derived::MaxRowsAtCompileTime + }; + + enum { + MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) + && (functor_traits::PacketAccess), + MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit), + MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize + }; + +public: + enum { + Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(DefaultTraversal) + }; + +public: + enum { + Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) + }; + +public: + enum { + Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(Derived::Flags) + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(Unrolling) + std::cerr << std::endl; + } +#endif +}; + +/*************************************************************************** +* Part 2 : unrollers +***************************************************************************/ + +/*** no vectorization ***/ + +template +struct redux_novec_unroller +{ + enum { + HalfLength = Length/2 + }; + + typedef typename Derived::Scalar Scalar; + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) + { + return func(redux_novec_unroller::run(mat,func), + redux_novec_unroller::run(mat,func)); + } +}; + +template +struct redux_novec_unroller +{ + enum { + outer = Start / Derived::InnerSizeAtCompileTime, + inner = Start % Derived::InnerSizeAtCompileTime + }; + + typedef typename Derived::Scalar Scalar; + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) + { + return mat.coeffByOuterInner(outer, inner); + } +}; + +// This is actually dead code and will never be called. It is required +// to prevent false warnings regarding failed inlining though +// for 0 length run() will never be called at all. +template +struct redux_novec_unroller +{ + typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } +}; + +/*** vectorization ***/ + +template +struct redux_vec_unroller +{ + enum { + PacketSize = redux_traits::PacketSize, + HalfLength = Length/2 + }; + + typedef typename Derived::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) + { + return func.packetOp( + redux_vec_unroller::run(mat,func), + redux_vec_unroller::run(mat,func) ); + } +}; + +template +struct redux_vec_unroller +{ + enum { + index = Start * redux_traits::PacketSize, + outer = index / int(Derived::InnerSizeAtCompileTime), + inner = index % int(Derived::InnerSizeAtCompileTime), + alignment = Derived::Alignment + }; + + typedef typename Derived::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) + { + return mat.template packetByOuterInner(outer, inner); + } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +template::Traversal, + int Unrolling = redux_traits::Unrolling +> +struct redux_impl; + +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) + { + eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); + Scalar res; + res = mat.coeffByOuterInner(0, 0); + for(Index i = 1; i < mat.innerSize(); ++i) + res = func(res, mat.coeffByOuterInner(0, i)); + for(Index i = 1; i < mat.outerSize(); ++i) + for(Index j = 0; j < mat.innerSize(); ++j) + res = func(res, mat.coeffByOuterInner(i, j)); + return res; + } +}; + +template +struct redux_impl + : public redux_novec_unroller +{}; + +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + static Scalar run(const Derived &mat, const Func& func) + { + const Index size = mat.size(); + + const Index packetSize = redux_traits::PacketSize; + const int packetAlignment = unpacket_traits::alignment; + enum { + alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), + alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) + }; + const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); + const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); + const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); + const Index alignedEnd2 = alignedStart + alignedSize2; + const Index alignedEnd = alignedStart + alignedSize; + Scalar res; + if(alignedSize) + { + PacketScalar packet_res0 = mat.template packet(alignedStart); + if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop + { + PacketScalar packet_res1 = mat.template packet(alignedStart+packetSize); + for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) + { + packet_res0 = func.packetOp(packet_res0, mat.template packet(index)); + packet_res1 = func.packetOp(packet_res1, mat.template packet(index+packetSize)); + } + + packet_res0 = func.packetOp(packet_res0,packet_res1); + if(alignedEnd>alignedEnd2) + packet_res0 = func.packetOp(packet_res0, mat.template packet(alignedEnd2)); + } + res = func.predux(packet_res0); + + for(Index index = 0; index < alignedStart; ++index) + res = func(res,mat.coeff(index)); + + for(Index index = alignedEnd; index < size; ++index) + res = func(res,mat.coeff(index)); + } + else // too small to vectorize anything. + // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. + { + res = mat.coeff(0); + for(Index index = 1; index < size; ++index) + res = func(res,mat.coeff(index)); + } + + return res; + } +}; + +// NOTE: for SliceVectorizedTraversal we simply bypass unrolling +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename redux_traits::PacketType PacketType; + + EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) + { + eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); + const Index innerSize = mat.innerSize(); + const Index outerSize = mat.outerSize(); + enum { + packetSize = redux_traits::PacketSize + }; + const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; + Scalar res; + if(packetedInnerSize) + { + PacketType packet_res = mat.template packet(0,0); + for(Index j=0; j(j,i)); + + res = func.predux(packet_res); + for(Index j=0; j::run(mat, func); + } + + return res; + } +}; + +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + + typedef typename redux_traits::PacketType PacketScalar; + enum { + PacketSize = redux_traits::PacketSize, + Size = Derived::SizeAtCompileTime, + VectorizedSize = (Size / PacketSize) * PacketSize + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) + { + eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); + if (VectorizedSize > 0) { + Scalar res = func.predux(redux_vec_unroller::run(mat,func)); + if (VectorizedSize != Size) + res = func(res,redux_novec_unroller::run(mat,func)); + return res; + } + else { + return redux_novec_unroller::run(mat,func); + } + } +}; + +// evaluator adaptor +template +class redux_evaluator +{ +public: + typedef _XprType XprType; + EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + enum { + MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, + // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator + Flags = evaluator::Flags & ~DirectAccessBit, + IsRowMajor = XprType::IsRowMajor, + SizeAtCompileTime = XprType::SizeAtCompileTime, + InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, + CoeffReadCost = evaluator::CoeffReadCost, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index row, Index col) const + { return m_evaluator.coeff(row, col); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index index) const + { return m_evaluator.coeff(index); } + + template + PacketType packet(Index row, Index col) const + { return m_evaluator.template packet(row, col); } + + template + PacketType packet(Index index) const + { return m_evaluator.template packet(index); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + template + PacketType packetByOuterInner(Index outer, Index inner) const + { return m_evaluator.template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + const XprType & nestedExpression() const { return m_xpr; } + +protected: + internal::evaluator m_evaluator; + const XprType &m_xpr; +}; + +} // end namespace internal + +/*************************************************************************** +* Part 4 : public API +***************************************************************************/ + + +/** \returns the result of a full redux operation on the whole matrix or vector using \a func + * + * The template parameter \a BinaryOp is the type of the functor \a func which must be + * an associative operator. Both current C++98 and C++11 functor styles are handled. + * + * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() + */ +template +template +typename internal::traits::Scalar +DenseBase::redux(const Func& func) const +{ + eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); + + typedef typename internal::redux_evaluator ThisEvaluator; + ThisEvaluator thisEval(derived()); + + return internal::redux_impl::run(thisEval, func); +} + +/** \returns the minimum of all coefficients of \c *this. + * \warning the result is undefined if \c *this contains NaN. + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::minCoeff() const +{ + return derived().redux(Eigen::internal::scalar_min_op()); +} + +/** \returns the maximum of all coefficients of \c *this. + * \warning the result is undefined if \c *this contains NaN. + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::maxCoeff() const +{ + return derived().redux(Eigen::internal::scalar_max_op()); +} + +/** \returns the sum of all coefficients of \c *this + * + * If \c *this is empty, then the value 0 is returned. + * + * \sa trace(), prod(), mean() + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::sum() const +{ + if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) + return Scalar(0); + return derived().redux(Eigen::internal::scalar_sum_op()); +} + +/** \returns the mean of all coefficients of *this +* +* \sa trace(), prod(), sum() +*/ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::mean() const +{ +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif +} + +/** \returns the product of all coefficients of *this + * + * Example: \include MatrixBase_prod.cpp + * Output: \verbinclude MatrixBase_prod.out + * + * \sa sum(), mean(), trace() + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::prod() const +{ + if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) + return Scalar(1); + return derived().redux(Eigen::internal::scalar_product_op()); +} + +/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. + * + * \c *this can be any matrix, not necessarily square. + * + * \sa diagonal(), sum() + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +MatrixBase::trace() const +{ + return derived().diagonal().sum(); +} + +} // end namespace Eigen + +#endif // EIGEN_REDUX_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Ref.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Ref.h new file mode 100644 index 0000000..bdf24f5 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Ref.h @@ -0,0 +1,281 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REF_H +#define EIGEN_REF_H + +namespace Eigen { + +namespace internal { + +template +struct traits > + : public traits > +{ + typedef _PlainObjectType PlainObjectType; + typedef _StrideType StrideType; + enum { + Options = _Options, + Flags = traits >::Flags | NestByRefBit, + Alignment = traits >::Alignment + }; + + template struct match { + enum { + HasDirectAccess = internal::has_direct_access::ret, + StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)), + InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic) + || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime) + || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), + OuterStrideMatch = Derived::IsVectorAtCompileTime + || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), + // NOTE, this indirection of evaluator::Alignment is needed + // to workaround a very strange bug in MSVC related to the instantiation + // of has_*ary_operator in evaluator. + // This line is surprisingly very sensitive. For instance, simply adding parenthesis + // as "DerivedAlignment = (int(evaluator::Alignment))," will make MSVC fail... + DerivedAlignment = int(evaluator::Alignment), + AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment + ScalarTypeMatch = internal::is_same::value, + MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch + }; + typedef typename internal::conditional::type type; + }; + +}; + +template +struct traits > : public traits {}; + +} + +template class RefBase + : public MapBase +{ + typedef typename internal::traits::PlainObjectType PlainObjectType; + typedef typename internal::traits::StrideType StrideType; + +public: + + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) + + EIGEN_DEVICE_FUNC inline Index innerStride() const + { + return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; + } + + EIGEN_DEVICE_FUNC inline Index outerStride() const + { + return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() + : IsVectorAtCompileTime ? this->size() + : int(Flags)&RowMajorBit ? this->cols() + : this->rows(); + } + + EIGEN_DEVICE_FUNC RefBase() + : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime), + // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: + m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime, + StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime) + {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase) + +protected: + + typedef Stride StrideBase; + + template + EIGEN_DEVICE_FUNC void construct(Expression& expr) + { + if(PlainObjectType::RowsAtCompileTime==1) + { + eigen_assert(expr.rows()==1 || expr.cols()==1); + ::new (static_cast(this)) Base(expr.data(), 1, expr.size()); + } + else if(PlainObjectType::ColsAtCompileTime==1) + { + eigen_assert(expr.rows()==1 || expr.cols()==1); + ::new (static_cast(this)) Base(expr.data(), expr.size(), 1); + } + else + ::new (static_cast(this)) Base(expr.data(), expr.rows(), expr.cols()); + + if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit))) + ::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1); + else + ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(), + StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride()); + } + + StrideBase m_stride; +}; + +/** \class Ref + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing expression + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. + * The default is \c #Unaligned. + * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), + * but accepts a variable outer stride (leading dimension). + * This can be overridden by specifying strides. + * The type passed here must be a specialization of the Stride template, see examples below. + * + * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies. + * A Ref<> object can represent either a const expression or a l-value: + * \code + * // in-out argument: + * void foo1(Ref x); + * + * // read-only const argument: + * void foo2(const Ref& x); + * \endcode + * + * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. + * By default, a Ref can reference any dense vector expression of float having a contiguous memory layout. + * Likewise, a Ref can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with + * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension) + * can be greater than the number of rows. + * + * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. + * Here are some examples: + * \code + * MatrixXf A; + * VectorXf a; + * foo1(a.head()); // OK + * foo1(A.col()); // OK + * foo1(A.row()); // Compilation error because here innerstride!=1 + * foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object + * foo2(A.row().transpose()); // The row is copied into a contiguous temporary + * foo2(2*a); // The expression is evaluated into a temporary + * foo2(A.col().segment(2,4)); // No temporary + * \endcode + * + * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters. + * Here is an example accepting an innerstride!=1: + * \code + * // in-out argument: + * void foo3(Ref > x); + * foo3(A.row()); // OK + * \endcode + * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more + * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a + * template function, e.g.: + * \code + * // in the .h: + * void foo(const Ref& A); + * void foo(const Ref >& A); + * + * // in the .cpp: + * template void foo_impl(const TypeOfA& A) { + * ... // crazy code goes here + * } + * void foo(const Ref& A) { foo_impl(A); } + * void foo(const Ref >& A) { foo_impl(A); } + * \endcode + * + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ +template class Ref + : public RefBase > +{ + private: + typedef internal::traits Traits; + template + EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0); + public: + + typedef RefBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Ref) + + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + { + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.derived()); + } + template + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + #else + /** Implicit constructor from any dense expression */ + template + inline Ref(DenseBase& expr) + #endif + { + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + Base::construct(expr.const_cast_derived()); + } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref) + +}; + +// this is the const ref version +template class Ref + : public RefBase > +{ + typedef internal::traits Traits; + public: + + typedef RefBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Ref) + + template + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::ScalarTypeMatch),Derived>::type* = 0) + { +// std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," << match_helper::InnerStrideMatch << "\n"; +// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; +// std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n"; + construct(expr.derived(), typename Traits::template match::type()); + } + + EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + template + EIGEN_DEVICE_FUNC inline Ref(const RefBase& other) { + construct(other.derived(), typename Traits::template match::type()); + } + + protected: + + template + EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type) + { + Base::construct(expr); + } + + template + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) + { + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); + Base::construct(m_object); + } + + protected: + TPlainObjectType m_object; +}; + +} // end namespace Eigen + +#endif // EIGEN_REF_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Replicate.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Replicate.h new file mode 100644 index 0000000..9960ef8 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Replicate.h @@ -0,0 +1,142 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REPLICATE_H +#define EIGEN_REPLICATE_H + +namespace Eigen { + +namespace internal { +template +struct traits > + : traits +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type MatrixTypeNested; + typedef typename remove_reference::type _MatrixTypeNested; + enum { + RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic + ? Dynamic + : RowFactor * MatrixType::RowsAtCompileTime, + ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic + ? Dynamic + : ColFactor * MatrixType::ColsAtCompileTime, + //FIXME we don't propagate the max sizes !!! + MaxRowsAtCompileTime = RowsAtCompileTime, + MaxColsAtCompileTime = ColsAtCompileTime, + IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 + : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 + : (MatrixType::Flags & RowMajorBit) ? 1 : 0, + + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 + }; +}; +} + +/** + * \class Replicate + * \ingroup Core_Module + * + * \brief Expression of the multiple replication of a matrix or vector + * + * \tparam MatrixType the type of the object we are replicating + * \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic. + * \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic. + * + * This class represents an expression of the multiple replication of a matrix or vector. + * It is the return type of DenseBase::replicate() and most of the time + * this is the only way it is used. + * + * \sa DenseBase::replicate() + */ +template class Replicate + : public internal::dense_xpr_base< Replicate >::type +{ + typedef typename internal::traits::MatrixTypeNested MatrixTypeNested; + typedef typename internal::traits::_MatrixTypeNested _MatrixTypeNested; + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) + typedef typename internal::remove_all::type NestedExpression; + + template + EIGEN_DEVICE_FUNC + inline explicit Replicate(const OriginalMatrixType& matrix) + : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) + { + EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic); + } + + template + EIGEN_DEVICE_FUNC + inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) + : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) + { + EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + } + + EIGEN_DEVICE_FUNC + inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); } + EIGEN_DEVICE_FUNC + inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); } + + EIGEN_DEVICE_FUNC + const _MatrixTypeNested& nestedExpression() const + { + return m_matrix; + } + + protected: + MatrixTypeNested m_matrix; + const internal::variable_if_dynamic m_rowFactor; + const internal::variable_if_dynamic m_colFactor; +}; + +/** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate.cpp + * Output: \verbinclude MatrixBase_replicate.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(Index,Index), class Replicate + */ +template +template +const Replicate +DenseBase::replicate() const +{ + return Replicate(derived()); +} + +/** + * \return an expression of the replication of each column (or row) of \c *this + * + * Example: \include DirectionWise_replicate_int.cpp + * Output: \verbinclude DirectionWise_replicate_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ +template +const typename VectorwiseOp::ReplicateReturnType +VectorwiseOp::replicate(Index factor) const +{ + return typename VectorwiseOp::ReplicateReturnType + (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1); +} + +} // end namespace Eigen + +#endif // EIGEN_REPLICATE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ReturnByValue.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ReturnByValue.h new file mode 100644 index 0000000..c44b767 --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/ReturnByValue.h @@ -0,0 +1,117 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// Copyright (C) 2009-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RETURNBYVALUE_H +#define EIGEN_RETURNBYVALUE_H + +namespace Eigen { + +namespace internal { + +template +struct traits > + : public traits::ReturnType> +{ + enum { + // We're disabling the DirectAccess because e.g. the constructor of + // the Block-with-DirectAccess expression requires to have a coeffRef method. + // Also, we don't want to have to implement the stride stuff. + Flags = (traits::ReturnType>::Flags + | EvalBeforeNestingBit) & ~DirectAccessBit + }; +}; + +/* The ReturnByValue object doesn't even have a coeff() method. + * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix. + * So internal::nested always gives the plain return matrix type. + * + * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators + */ +template +struct nested_eval, n, PlainObject> +{ + typedef typename traits::ReturnType type; +}; + +} // end namespace internal + +/** \class ReturnByValue + * \ingroup Core_Module + * + */ +template class ReturnByValue + : public internal::dense_xpr_base< ReturnByValue >::type, internal::no_assignment_operator +{ + public: + typedef typename internal::traits::ReturnType ReturnType; + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue) + + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& dst) const + { static_cast(this)->evalTo(dst); } + EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast(this)->rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast(this)->cols(); } + +#ifndef EIGEN_PARSED_BY_DOXYGEN +#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT + class Unusable{ + Unusable(const Unusable&) {} + Unusable& operator=(const Unusable&) {return *this;} + }; + const Unusable& coeff(Index) const { return *reinterpret_cast(this); } + const Unusable& coeff(Index,Index) const { return *reinterpret_cast(this); } + Unusable& coeffRef(Index) { return *reinterpret_cast(this); } + Unusable& coeffRef(Index,Index) { return *reinterpret_cast(this); } +#undef Unusable +#endif +}; + +template +template +Derived& DenseBase::operator=(const ReturnByValue& other) +{ + other.evalTo(derived()); + return derived(); +} + +namespace internal { + +// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that +// when a ReturnByValue expression is assigned, the evaluator is not constructed. +// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world + +template +struct evaluator > + : public evaluator::ReturnType> +{ + typedef ReturnByValue XprType; + typedef typename internal::traits::ReturnType PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + xpr.evalTo(m_result); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_RETURNBYVALUE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Reverse.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Reverse.h new file mode 100644 index 0000000..0640cda --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Reverse.h @@ -0,0 +1,211 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2009 Ricard Marxer +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REVERSE_H +#define EIGEN_REVERSE_H + +namespace Eigen { + +namespace internal { + +template +struct traits > + : traits +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type MatrixTypeNested; + typedef typename remove_reference::type _MatrixTypeNested; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit) + }; +}; + +template struct reverse_packet_cond +{ + static inline PacketType run(const PacketType& x) { return preverse(x); } +}; + +template struct reverse_packet_cond +{ + static inline PacketType run(const PacketType& x) { return x; } +}; + +} // end namespace internal + +/** \class Reverse + * \ingroup Core_Module + * + * \brief Expression of the reverse of a vector or matrix + * + * \tparam MatrixType the type of the object of which we are taking the reverse + * \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections + * + * This class represents an expression of the reverse of a vector. + * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::reverse(), VectorwiseOp::reverse() + */ +template class Reverse + : public internal::dense_xpr_base< Reverse >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + typedef typename internal::remove_all::type NestedExpression; + using Base::IsRowMajor; + + protected: + enum { + PacketSize = internal::packet_traits::size, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1, + ReversePacket = (Direction == BothDirections) + || ((Direction == Vertical) && IsColMajor) + || ((Direction == Horizontal) && IsRowMajor) + }; + typedef internal::reverse_packet_cond reverse_packet; + public: + + EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } + + EIGEN_DEVICE_FUNC inline Index innerStride() const + { + return -m_matrix.innerStride(); + } + + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& + nestedExpression() const + { + return m_matrix; + } + + protected: + typename MatrixType::Nested m_matrix; +}; + +/** \returns an expression of the reverse of *this. + * + * Example: \include MatrixBase_reverse.cpp + * Output: \verbinclude MatrixBase_reverse.out + * + */ +template +inline typename DenseBase::ReverseReturnType +DenseBase::reverse() +{ + return ReverseReturnType(derived()); +} + + +//reverse const overload moved DenseBase.h due to a CUDA compiler bug + +/** This is the "in place" version of reverse: it reverses \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * - it allows future optimizations (cache friendliness, etc.) + * + * \sa VectorwiseOp::reverseInPlace(), reverse() */ +template +inline void DenseBase::reverseInPlace() +{ + if(cols()>rows()) + { + Index half = cols()/2; + leftCols(half).swap(rightCols(half).reverse()); + if((cols()%2)==1) + { + Index half2 = rows()/2; + col(half).head(half2).swap(col(half).tail(half2).reverse()); + } + } + else + { + Index half = rows()/2; + topRows(half).swap(bottomRows(half).reverse()); + if((rows()%2)==1) + { + Index half2 = cols()/2; + row(half).head(half2).swap(row(half).tail(half2).reverse()); + } + } +} + +namespace internal { + +template +struct vectorwise_reverse_inplace_impl; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.rows()/2; + xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse()); + } +}; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.cols()/2; + xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse()); + } +}; + +} // end namespace internal + +/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * + * \sa DenseBase::reverseInPlace(), reverse() */ +template +void VectorwiseOp::reverseInPlace() +{ + internal::vectorwise_reverse_inplace_impl::run(_expression().const_cast_derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_REVERSE_H diff --git a/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Select.h b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Select.h new file mode 100644 index 0000000..79eec1b --- /dev/null +++ b/src/runtime/ref/nn/depend/external/eigen/Eigen/src/Core/Select.h @@ -0,0 +1,162 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELECT_H +#define EIGEN_SELECT_H + +namespace Eigen { + +/** \class Select + * \ingroup Core_Module + * + * \brief Expression of a coefficient wise version of the C++ ternary operator ?: + * + * \param ConditionMatrixType the type of the \em condition expression which must be a boolean matrix + * \param ThenMatrixType the type of the \em then expression + * \param ElseMatrixType the type of the \em else expression + * + * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:. + * It is the return type of DenseBase::select() and most of the time this is the only way it is used. + * + * \sa DenseBase::select(const DenseBase&, const DenseBase&) const + */ + +namespace internal { +template +struct traits > + : traits +{ + typedef typename traits::Scalar Scalar; + typedef Dense StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ConditionMatrixType::Nested ConditionMatrixNested; + typedef typename ThenMatrixType::Nested ThenMatrixNested; + typedef typename ElseMatrixType::Nested ElseMatrixNested; + enum { + RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime, + ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit + }; +}; +} + +template +class Select : public internal::dense_xpr_base< Select >::type, + internal::no_assignment_operator +{ + public: + + typedef typename internal::dense_xpr_base