Commit 51e3029d by tatsukiishikawa

Changing name

parent d01e5c92
Showing with 0 additions and 4926 deletions

Too many changes to show.

To preserve performance only 1000 of 1000+ files are displayed.

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
// For per-channel functions, since it is defined in quantization spec that
// weights are symmetric
// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
// zero_point (params.weights_offset) is always 0.
// However, for per-tensor functions, params.weights_offset is still applied for
// backward compatibility.
template <typename InputType, typename WeightType, typename OutputType,
typename BiasType>
void FullyConnectedPerChannel(
const FullyConnectedParams& params, const int32_t* output_multiplier,
const int* output_shift, const RuntimeShape& input_shape,
const InputType* input_data, const RuntimeShape& filter_shape,
const WeightType* filter_data, const RuntimeShape& bias_shape,
const BiasType* bias_data, const RuntimeShape& output_shape,
OutputType* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = output_shape.Dims(0);
const int output_depth = output_shape.Dims(1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
BiasType acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += filter_val * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}
int32_t acc_scaled = MultiplyByQuantizedMultiplier(
acc, output_multiplier[out_c], output_shift[out_c]);
acc_scaled += output_offset;
acc_scaled = std::max(acc_scaled, output_activation_min);
acc_scaled = std::min(acc_scaled, output_activation_max);
output_data[out_c + output_depth * b] =
static_cast<OutputType>(acc_scaled);
}
}
}
template <typename InputType, typename WeightType, typename OutputType,
typename BiasType>
void FullyConnected(const FullyConnectedParams& params,
const RuntimeShape& input_shape,
const InputType* input_data,
const RuntimeShape& filter_shape,
const WeightType* filter_data,
const RuntimeShape& bias_shape, const BiasType* bias_data,
const RuntimeShape& output_shape, OutputType* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int output_dim_count = output_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = output_shape.Dims(output_dim_count - 1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
BiasType acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}
int32_t acc_scaled =
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
acc_scaled += output_offset;
acc_scaled = std::max(acc_scaled, output_activation_min);
acc_scaled = std::min(acc_scaled, output_activation_max);
output_data[out_c + output_depth * b] =
static_cast<OutputType>(acc_scaled);
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
int32_t depth, const int8_t* input_data,
int8_t* output_data) {
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
// The output scale must be in sync with Prepare().
// Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
// to [-1, 127/128].
static constexpr int32_t kOutputScale = 7;
for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
// int32_t = (int8_t - int8_t) ^ 2.
// ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
// safe from overflowing in at least 2^16 steps.
int32_t acc = 0;
for (int inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input =
input_data[depth * outer_index + inner_index] - input_zero_point;
acc += input * input;
}
int32_t inv_l2norm_multiplier;
int inv_l2norm_shift;
GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
&inv_l2norm_shift);
for (int inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input =
input_data[depth * outer_index + inner_index] - input_zero_point;
// Rescale and downcast. Rescale is folded into the division.
int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
output_in_q24 =
std::min(static_cast<int32_t>(kMaxInt8),
std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
output_data[depth * outer_index + inner_index] =
static_cast<int8_t>(output_in_q24);
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
int32_t input_multiplier, int32_t input_left_shift,
int32_t input_size, const int8_t* input_data,
int8_t* output_data) {
// Integer bits must be in sync with Prepare() function.
static constexpr int32_t kInputIntegerBits = 4;
static constexpr int32_t kOutputIntegerBits = 8;
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
static constexpr int32_t kOutputZeroPoint = -128;
for (int i = 0; i < input_size; ++i) {
const int32_t input =
static_cast<int32_t>(input_data[i]) - input_zero_point;
if (input <= -input_range_radius) {
output_data[i] = kMinInt8;
} else if (input >= input_range_radius) {
output_data[i] = kMaxInt8;
} else {
const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
input, input_multiplier, input_left_shift);
using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
const int32_t output_in_q0 =
gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
// Rescale and downcast.
using gemmlowp::RoundingDivideByPOT;
int32_t output_in_q23 =
RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
static_cast<int32_t>(kMinInt8)),
static_cast<int32_t>(kMaxInt8));
output_data[i] = static_cast<int8_t>(output_in_q23);
}
}
}
inline void Logistic(int32_t input_multiplier, int32_t input_left_shift,
int32_t input_size, const int16_t* ptr_input_data,
int16_t* ptr_output_data) {
// We use the LUT for sigmoid and take into account, that
// tanh(x) = 2*sigmoid(2*x) - 1
// We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
// In case of general parameter scale, multiplier 3 is taken into account
// in TanhPrepare function and it is included in
// input_multiplier already.
TFLITE_DCHECK_GE(input_left_shift, 0);
if (input_multiplier == 0) { // power of two case
input_multiplier = 3 << input_left_shift;
input_left_shift = 0;
}
int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
int32_t input_data =
((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
// We do interpolation on unsigned values.
uint32_t abs_input_data = abs(input_data);
// We divide by 2 power of 9, because
// we need to divide by 2 in power of 7 for
// the input conversion + 1/4 from the scale above.
// Define uh as uint32_t type not to make this function overflow.
uint32_t uh = abs_input_data >> 9;
uint32_t result;
if (uh >= 255) {
// Saturate to maximum.
result = 0x7FFF << 10;
} else {
uint32_t ua = sigmoid_table_uint16[uh];
uint32_t ub = sigmoid_table_uint16[uh + 1];
uint32_t ut = abs_input_data & 0x1ff;
// Interpolation is done using the fractional bit.
result = (ua << 9) + ut * (ub - ua);
}
result = (input_data >= 0) ? (result + (1 << 9))
: ((1 << (16 + 9)) - result + (1 << 9) - 1);
// Back to 16-bit.
result >>= 10;
*ptr_output_data = result;
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
#include <algorithm>
#include <limits>
#include "third_party/gemmlowp/fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
int32_t input_multiplier, int32_t input_shift,
const RuntimeShape& input_shape, const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
// Integer bits must be in sync with Prepare() function.
static constexpr int32_t kInputIntegerBits = 4;
static constexpr int32_t kOutputScale = 7;
static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t input =
static_cast<int32_t>(input_data[i]) - input_zero_point;
if (input <= -input_range_radius) {
output_data[i] = kMinInt8;
} else if (input >= input_range_radius) {
output_data[i] = kMaxInt8;
} else {
const int32_t input_in_q4 =
MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
const int32_t output_in_q0 =
gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
// Rescale and downcast.
using gemmlowp::RoundingDivideByPOT;
int32_t output_in_q24 =
RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
output_data[i] = static_cast<int8_t>(output_in_q24);
}
}
}
inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
const RuntimeShape& input_shape, const int16_t* ptr_input_data,
const RuntimeShape& output_shape, int16_t* ptr_output_data) {
// We use the LUT for sigmoid and take into account, that
// tanh(x) = 2*sigmoid(2*x) - 1
// We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
// In case of general parameter scale, multiplier 3 is taken into account
// in TanhPrepare function and it is included in
// input_multiplier already.
if (input_multiplier == 0) { // power of two case
input_multiplier = 3 << input_left_shift;
input_left_shift = 0;
}
int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) {
int32_t input_data =
((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
uint32_t abs_input_data = abs(input_data);
uint32_t uh = abs_input_data >> 8;
int32_t result;
if (uh >= 255) {
// Saturate to maximum.
result = 0xFFFF << 8;
} else {
uint32_t ua = sigmoid_table_uint16[uh];
uint32_t ub = sigmoid_table_uint16[uh + 1];
uint8_t ut = abs_input_data & 0xFF;
result = (ua << 8) + ut * (ub - ua);
}
result = (input_data >= 0)
? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
: (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
// Convert back to 16-bit.
result >>= (9 - 1);
*ptr_output_data = result;
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
#include <algorithm>
#include <cmath>
#include "tensorflow/lite/core/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
const RuntimeShape& input_shape,
const float* input_data,
const RuntimeShape& output_shape,
float* output_data, float epsilon = 1e-6) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
float squared_l2_norm = 0;
for (int c = 0; c < depth; ++c) {
const float val = input_data[depth * i + c];
squared_l2_norm += val * val;
}
float l2_norm = std::sqrt(squared_l2_norm);
l2_norm = std::max(l2_norm, epsilon);
for (int c = 0; c < depth; ++c) {
output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
}
}
}
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
const RuntimeShape& input_shape,
const uint8_t* input_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int32_t input_zero_point = op_params.input_zero_point;
for (int i = 0; i < outer_size; ++i) {
int32_t square_l2_norm = 0;
for (int c = 0; c < depth; c++) {
int32_t diff = input_data[depth * i + c] - input_zero_point;
square_l2_norm += diff * diff;
}
int32_t inv_l2norm_multiplier;
int inv_l2norm_shift;
GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
&inv_l2norm_multiplier, &inv_l2norm_shift);
for (int c = 0; c < depth; c++) {
int32_t diff = input_data[depth * i + c] - input_zero_point;
int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
int32_t unclamped_output_val = 128 + rescaled_diff;
int32_t output_val =
std::min(static_cast<int32_t>(255),
std::max(static_cast<int32_t>(0), unclamped_output_val));
output_data[depth * i + c] = static_cast<uint8_t>(output_val);
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
inline void LeakyRelu(const tflite::LeakyReluParams& params,
const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
// Note that alpha might be > 1 or < 0, so we don't use std::max here.
output_data[i] = val > 0 ? val : val * params.alpha;
}
}
template <typename T>
inline void QuantizeLeakyRelu(const LeakyReluParams& params,
const RuntimeShape& input_shape,
const T* input_data,
const RuntimeShape& output_shape,
T* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
static const int32_t quantized_min = std::numeric_limits<T>::min();
static const int32_t quantized_max = std::numeric_limits<T>::max();
for (int i = 0; i < flat_size; ++i) {
const int32_t input_value = input_data[i] - params.input_offset;
int32_t unclamped_output;
if (input_value >= 0) {
unclamped_output = params.output_offset +
MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_identity,
params.output_shift_identity);
} else {
unclamped_output = params.output_offset +
MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_alpha,
params.output_shift_alpha);
}
const T clamped_output =
std::min(quantized_max, std::max(quantized_min, unclamped_output));
output_data[i] = static_cast<T>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
#include <cmath>
#include "third_party/gemmlowp/fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace reference_ops {
inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const float cutoff_upper = 16.619047164916992188f;
const float cutoff_lower = -9.f;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
// Rational for using approximation in reference kernel.
// 0. This approximation gives enough precision for float.
// 1. This works around an issue on an embedded chipset where exp() does not
// return correctly as expected - exp(x) should return inf when overflown
// not 1.701417 IEEE 754 defines representation for inf.
// 2. This will speed up calculation and is matching the behavior in the
// optimized kernels. (check the definition of scalar_logistic_op<float>)
for (int i = 0; i < flat_size; i++) {
float val = input_data[i];
float result;
if (val > cutoff_upper) {
result = 1.0f;
} else if (val < cutoff_lower) {
result = std::exp(val);
} else {
result = 1.f / (1.f + std::exp(-val));
}
output_data[i] = result;
}
}
// Convenience version that allows, for example, generated-code calls to be
// uniform between data types.
inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& output_shape,
float* output_data) {
// Drop params: not needed.
Logistic(input_shape, input_data, output_shape, output_data);
}
inline void Logistic(const LogisticParams& params,
const RuntimeShape& input_shape, const int16_t* input_data,
const RuntimeShape& output_shape, int16_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
// F0 uses 0 integer bits, range [-1, 1].
// This is the return type of math functions such as tanh, logistic,
// whose range is in [-1, 1].
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
// F3 uses 3 integer bits, range [-8, 8], the input range expected here.
using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
const F3 input = F3::FromRaw(input_data[i]);
F0 output = gemmlowp::logistic(input);
output_data[i] = output.raw();
}
}
// Quantized int8_t logistic activation. Cheats by dequantizing and
// requantizing around the floating point logistic method. This implementation
// is slow on platforms without a floating point unit.
// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
// approach used in TFLite for int8_t Logistic.
inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
float input_scale, int input_zero_point,
const RuntimeShape& output_shape, int8_t* output_data,
float output_scale, int output_zero_point) {
const float cutoff_upper = 16.619047164916992188f;
const float cutoff_lower = -9.f;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
// Rational for using approximation in reference kernel.
// 0. This approximation gives enough precision for float.
// 1. This works around an issue on an embedded chipset where exp() does not
// return correctly as expected - exp(x) should return inf when overflown
// not 1.701417 IEEE 754 defines representation for inf.
// 2. This will speed up calculation and is matching the behavior in the
// optimized kernels. (check the definition of scalar_logistic_op<float>)
for (int i = 0; i < flat_size; i++) {
// Dequantize.
float val =
static_cast<float>((input_data[i] - input_zero_point) * input_scale);
float result;
if (val > cutoff_upper) {
result = 1.0f;
} else if (val < cutoff_lower) {
result = std::exp(val);
} else {
result = 1.f / (1.f + std::exp(-val));
}
// Requantize
int8_t output =
static_cast<int8_t>(result / output_scale + output_zero_point);
output_data[i] = output;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T, typename Op, int N = 5>
void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
const T* input1_data,
const RuntimeShape& unextended_input2_shape,
const T* input2_data,
const RuntimeShape& unextended_output_shape,
T* output_data, Op op) {
// Uses element-wise calculation if broadcast is not required.
if (unextended_input1_shape == unextended_input2_shape) {
const int flat_size =
MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
unextended_output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = op(input1_data[i], input2_data[i]);
}
} else {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(
unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
auto maxmin_func = [&](int indexes[N]) {
output_data[SubscriptToIndex(output_desc, indexes)] =
op(input1_data[SubscriptToIndex(desc1, indexes)],
input2_data[SubscriptToIndex(desc2, indexes)]);
};
NDOpsHelper<N>(output_desc, maxmin_func);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
#include <vector>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// TFLite Pad supports activation tensors with up to 5 dimensions.
constexpr int PadKernelMaxDimensionCount() { return 5; }
// There are two versions of pad: Pad and PadV2. In PadV2 there is a second
// scalar input that provides the padding value. Therefore pad_value_ptr can be
// equivalent to a simple input1_data. For Pad, it should point to a zero
// value.
//
// Note that two typenames are required, so that T=P=int32_t is considered a
// specialization distinct from P=int32_t.
template <typename T, typename P>
inline void PadImpl(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const P* pad_value_ptr, const RuntimeShape& output_shape,
T* output_data) {
const RuntimeShape ext_input_shape =
RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
const RuntimeShape ext_output_shape =
RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
// Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
// pad them to 5 dims (yes, we are "padding the padding").
int left_padding_copy[PadKernelMaxDimensionCount()];
for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
left_padding_copy[i] = 0;
}
for (int i = 0; i < op_params.left_padding_count; ++i) {
left_padding_copy[i + PadKernelMaxDimensionCount() -
op_params.left_padding_count] = op_params.left_padding[i];
}
int right_padding_copy[PadKernelMaxDimensionCount()];
for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
right_padding_copy[i] = 0;
}
for (int i = 0; i < op_params.right_padding_count; ++i) {
right_padding_copy[i + PadKernelMaxDimensionCount() -
op_params.right_padding_count] =
op_params.right_padding[i];
}
const int output_batch = ext_output_shape.Dims(0);
const int output_plane = ext_output_shape.Dims(1);
const int output_height = ext_output_shape.Dims(2);
const int output_width = ext_output_shape.Dims(3);
const int output_depth = ext_output_shape.Dims(4);
const int left_b_padding = left_padding_copy[0];
const int left_p_padding = left_padding_copy[1];
const int left_h_padding = left_padding_copy[2];
const int left_w_padding = left_padding_copy[3];
const int left_d_padding = left_padding_copy[4];
const int right_b_padding = right_padding_copy[0];
const int right_p_padding = right_padding_copy[1];
const int right_h_padding = right_padding_copy[2];
const int right_w_padding = right_padding_copy[3];
const int right_d_padding = right_padding_copy[4];
const T pad_value = *pad_value_ptr;
const T* in_ptr = input_data;
T* out_ptr = output_data;
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_p = 0; out_p < output_plane; ++out_p) {
for (int out_h = 0; out_h < output_height; ++out_h) {
for (int out_w = 0; out_w < output_width; ++out_w) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
if (out_b < left_b_padding ||
out_b >= output_batch - right_b_padding ||
out_p < left_p_padding ||
out_p >= output_plane - right_p_padding ||
out_h < left_h_padding ||
out_h >= output_height - right_h_padding ||
out_w < left_w_padding ||
out_w >= output_width - right_w_padding ||
out_d < left_d_padding ||
out_d >= output_depth - right_d_padding) {
*out_ptr++ = pad_value;
} else {
*out_ptr++ = *in_ptr++;
}
}
}
}
}
}
}
template <typename T, typename P>
inline void Pad(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const P* pad_value_ptr, const RuntimeShape& output_shape,
T* output_data) {
PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
template <typename T>
inline void Pad(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
T* output_data) {
const T converted_pad_value = static_cast<T>(*pad_value_ptr);
PadImpl(op_params, input_shape, input_data, &converted_pad_value,
output_shape, output_data);
}
// This version avoids conflicting template matching.
template <>
inline void Pad(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const int32_t* input_data,
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
int32_t* output_data) {
PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
template <typename T, typename P>
inline void PadImageStyle(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const P* pad_value_ptr,
const RuntimeShape& output_shape, T* output_data) {
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
template <typename P>
inline void PadImageStyle(const tflite::PadParams& op_params,
const RuntimeShape& input_shape,
const float* input_data, const P* pad_value_ptr,
const RuntimeShape& output_shape,
float* output_data) {
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// Broadcast prelu to output_shape for quantized uint8_t/int8_t data.
template <typename T>
inline void BroadcastPrelu4DSlow(
const PreluParams& params, const RuntimeShape& input_shape,
const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data,
const RuntimeShape& output_shape, T* output_data) {
TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(alpha_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input_shape, alpha_shape, &desc1, &desc2);
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
int output_index = Offset(extended_output_shape, b, y, x, c);
int input_index = SubscriptToIndex(desc1, b, y, x, c);
const int32_t input_value =
params.input_offset + input_data[input_index];
int32_t output_value;
if (input_value >= 0) {
output_value = MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_1, params.output_shift_1);
} else {
auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
const int32_t alpha_value =
params.alpha_offset + alpha_data[alpha_index];
output_value = MultiplyByQuantizedMultiplier(
input_value * alpha_value, params.output_multiplier_2,
params.output_shift_2);
}
output_value += params.output_offset;
const int32_t quantized_min = std::numeric_limits<T>::min();
const int32_t quantized_max = std::numeric_limits<T>::max();
const int32_t clamped_output =
std::min(quantized_max, std::max(quantized_min, output_value));
output_data[output_index] = static_cast<T>(clamped_output);
}
}
}
}
}
template <typename T>
inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape,
const T* input_data, const RuntimeShape& alpha_shape,
const T* alpha_data, const RuntimeShape& output_shape,
T* output_data) {
const int32_t quantized_min = std::numeric_limits<T>::min();
const int32_t quantized_max = std::numeric_limits<T>::max();
const int flat_size =
MatchingElementsSize(input_shape, alpha_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t input_value = params.input_offset + input_data[i];
int32_t output_value;
if (input_value >= 0) {
output_value = MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_1, params.output_shift_1);
} else {
const int32_t alpha_value = params.alpha_offset + alpha_data[i];
output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value,
params.output_multiplier_2,
params.output_shift_2);
}
output_value += params.output_offset;
const int32_t clamped_output =
std::min(quantized_max, std::max(quantized_min, output_value));
output_data[i] = static_cast<T>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// Consolidates dimensions in broadcast inputs, checks for five-fold pattern.
//
// For example, if sequence of dimensions of one input is
// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ...
// we can consolidate these as
// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1.
//
// The category is updated in the less-frequent case of shapes that are
// not suited to a fivefold-loop broadcast.
//
// Falls back to generic pattern when it does not know how to process properly.
//
// Returns true iff there is some sort of broadcast, which includes five-fold
// patterns and falling back to generic broadcast.
inline bool ProcessBroadcastShapes(const RuntimeShape& shape0,
const RuntimeShape& shape1,
tflite::ArithmeticParams* params) {
const int dims_count =
std::max(shape0.DimensionsCount(), shape1.DimensionsCount());
params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
RuntimeShape scalar_shape(dims_count, 1);
auto extended_shape0 = RuntimeShape::ExtendedShape(dims_count, shape0);
auto extended_shape1 = RuntimeShape::ExtendedShape(dims_count, shape1);
// Check for "exact" match, implicitly accepting any scalar shapes.
if (extended_shape0 == extended_shape1) {
params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
return false;
}
for (int i = dims_count - 1; i >= 0; --i) {
if (extended_shape0.Dims(i) == extended_shape1.Dims(i)) {
continue;
} else if (extended_shape0.Dims(i) == 1) {
params->broadcast_category =
BroadcastableOpCategory::kFirstInputBroadcastsFast;
break;
} else if (extended_shape1.Dims(i) == 1) {
params->broadcast_category =
BroadcastableOpCategory::kSecondInputBroadcastsFast;
break;
} else {
// This case is erroneous: there is a dimension that does not match and
// is not a broadcast from one shape to the other.
params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
return true;
}
}
if (params->broadcast_category !=
BroadcastableOpCategory::kFirstInputBroadcastsFast &&
params->broadcast_category !=
BroadcastableOpCategory::kSecondInputBroadcastsFast) {
// This is unreachable because at least one else clause in the above loop
// must be reached.
TFLITE_DCHECK(false);
params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
return false;
}
// From this point it is assumed contractually that corresponding dimensions
// in shape0 and shape1 are either (a) equal or (b) one or other equals 1.
const bool swap_inputs = params->broadcast_category ==
BroadcastableOpCategory::kSecondInputBroadcastsFast;
const RuntimeShape* shape_a =
swap_inputs ? &extended_shape1 : &extended_shape0;
const RuntimeShape* shape_b =
swap_inputs ? &extended_shape0 : &extended_shape1;
int i = dims_count - 1;
params->broadcast_shape[0] = 1;
params->broadcast_shape[1] = 1;
params->broadcast_shape[2] = 1;
params->broadcast_shape[3] = 1;
params->broadcast_shape[4] = 1;
// y_0 is greedy: include dims if both or neither equal 1: in other words,
// test for equality rather than (shape_a->Dims(i) != 1).
while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
params->broadcast_shape[4] *= shape_b->Dims(i);
--i;
}
// Here either input_a or input_b has dim of 1 (if i >= 0). If it is input_b
// that has the unit dimension, the next two loops are not entered.
while (i >= 0 && shape_a->Dims(i) == 1) {
params->broadcast_shape[3] *= shape_b->Dims(i);
--i;
}
while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
params->broadcast_shape[2] *= shape_a->Dims(i);
--i;
}
// Here either input_a or input_b has dim of 1 (if i >= 0).
while (i >= 0 && shape_b->Dims(i) == 1) {
params->broadcast_shape[1] *= shape_a->Dims(i);
--i;
}
while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
params->broadcast_shape[0] *= shape_b->Dims(i);
--i;
}
// Rarer case is when the broadcast dimensions cannot be handled by a fivefold
// loop.
if (i >= 0) {
params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
}
return true;
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
#include <algorithm>
#include <limits>
#include <vector>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename InputT, typename OutputT>
inline void AffineQuantize(const tflite::QuantizationParams& op_params,
const RuntimeShape& input_shape,
const InputT* input_data,
const RuntimeShape& output_shape,
OutputT* output_data) {
const int32_t zero_point = op_params.zero_point;
const double scale = op_params.scale;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
for (int i = 0; i < flat_size; i++) {
const InputT val = input_data[i];
int32_t unclamped =
static_cast<int32_t>(TfLiteRound(val / static_cast<float>(scale))) +
zero_point;
int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
output_data[i] = clamped;
}
}
// Quantizes per-channel.
template <typename InputT, typename OutputT>
inline void PerChannelQuantize(
const tflite::PerChannelQuantizationParams& op_params,
const RuntimeShape& input_shape, const InputT* input_data,
const RuntimeShape& output_shape, OutputT* output_data) {
// Ensure flat size is same.
MatchingFlatSize(input_shape, output_shape);
const int32_t* zero_point = op_params.zero_point;
const float* scale = op_params.scale;
const int32_t quantized_dimension = op_params.quantized_dimension;
const int32_t num_dims = input_shape.DimensionsCount();
const int32_t* dims_data = input_shape.DimsData();
std::vector<int> current_dim(num_dims, 0);
static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
do {
size_t offset =
ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
current_dim.data(), 0, nullptr);
const InputT val = input_data[offset];
const int channel = current_dim[quantized_dimension];
int32_t unclamped = static_cast<int32_t>(TfLiteRound(
val / static_cast<float>(scale[channel]))) +
zero_point[channel];
int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
output_data[offset] = static_cast<OutputT>(clamped);
} while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
current_dim.data()));
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
#include <algorithm>
#include "third_party/ruy/ruy/profiler/instrumentation.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename input_type, typename output_type>
inline void Requantize(const input_type* input_data, int32_t size,
int32_t effective_scale_multiplier,
int32_t effective_scale_shift, int32_t input_zeropoint,
int32_t output_zeropoint, output_type* output_data) {
ruy::profiler::ScopeLabel label("Requantize");
const bool same_scale =
(effective_scale_multiplier == 1 << 30 && effective_scale_shift == 1);
if (same_scale) {
const bool mixed_type_int8_uint8 =
std::is_same<input_type, int8_t>::value &&
std::is_same<output_type, uint8_t>::value;
const bool mixed_type_uint8_int8 =
std::is_same<input_type, uint8_t>::value &&
std::is_same<output_type, int8_t>::value;
const int32_t zero_point_diff = input_zeropoint - output_zeropoint;
// Fast path to do requantization for the case when just a shift of 128 is
// needed.
if ((mixed_type_int8_uint8 && zero_point_diff == -128) ||
(mixed_type_uint8_int8 && zero_point_diff == 128)) {
for (int i = 0; i < size; ++i) {
output_data[i] = input_data[i] ^ 0x80;
}
return;
}
}
static constexpr int32_t kMinOutput = std::numeric_limits<output_type>::min();
static constexpr int32_t kMaxOutput = std::numeric_limits<output_type>::max();
for (int i = 0; i < size; ++i) {
const int32_t input = input_data[i] - input_zeropoint;
const int32_t output =
MultiplyByQuantizedMultiplier(input, effective_scale_multiplier,
effective_scale_shift) +
output_zeropoint;
const int32_t clamped_output =
std::max(std::min(output, kMaxOutput), kMinOutput);
output_data[i] = static_cast<output_type>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
#include <algorithm>
#include <cmath>
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline int32_t GetNearestNeighbor(const int input_value,
const int32_t input_size,
const int32_t output_size,
const bool align_corners,
const bool half_pixel_centers) {
const float scale =
(align_corners && output_size > 1)
? (input_size - 1) / static_cast<float>(output_size - 1)
: input_size / static_cast<float>(output_size);
const float offset = half_pixel_centers ? 0.5f : 0.0f;
int32_t output_value = std::min(
align_corners
? static_cast<int32_t>(TfLiteRound((input_value + offset) * scale))
: static_cast<int32_t>(std::floor((input_value + offset) * scale)),
input_size - 1);
if (half_pixel_centers) {
output_value = std::max(static_cast<int32_t>(0), output_value);
}
return output_value;
}
template <typename T>
inline void ResizeNearestNeighbor(
const tflite::ResizeNearestNeighborParams& op_params,
const RuntimeShape& unextended_input_shape, const T* input_data,
const RuntimeShape& output_size_shape, const int32_t* output_size_data,
const RuntimeShape& unextended_output_shape, T* output_data) {
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
int32_t input_height = input_shape.Dims(1);
int32_t input_width = input_shape.Dims(2);
int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
// The Tensorflow version of this op allows resize on the width and height
// axis only.
TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
int32_t output_height = output_size_data[0];
int32_t output_width = output_size_data[1];
const int col_offset = input_shape.Dims(3);
const int row_offset = input_shape.Dims(2) * col_offset;
const int batch_offset = input_shape.Dims(1) * row_offset;
const T* input_ptr = input_data;
T* output_ptr = output_data;
for (int b = 0; b < batches; ++b) {
for (int y = 0; y < output_height; ++y) {
int32_t in_y = GetNearestNeighbor(y, input_height, output_height,
op_params.align_corners,
op_params.half_pixel_centers);
const T* y_input_ptr = input_ptr + in_y * row_offset;
for (int x = 0; x < output_width; ++x) {
int32_t in_x = GetNearestNeighbor(x, input_width, output_width,
op_params.align_corners,
op_params.half_pixel_centers);
const T* x_input_ptr = y_input_ptr + in_x * col_offset;
memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
output_ptr += depth;
}
}
input_ptr += batch_offset;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
#include <cmath>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline float RoundToNearest(float value) {
auto floor_val = std::floor(value);
auto diff = value - floor_val;
if ((diff < 0.5f) ||
((diff == 0.5f) && (static_cast<int>(floor_val) % 2 == 0))) {
return floor_val;
} else {
return floor_val = floor_val + 1.0f;
}
}
inline void Round(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
// Note that this implementation matches that of tensorFlow tf.round
// and corresponds to the bankers rounding method.
// cfenv (for fesetround) is not yet supported universally on Android, so
// using a work around.
output_data[i] = RoundToNearest(input_data[i]);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
#include <cmath>
#include "third_party/ruy/ruy/profiler/instrumentation.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename D, typename T>
void Select(const RuntimeShape& input_condition_shape,
const D* input_condition_data, const RuntimeShape& input_x_shape,
const T* input_x_data, const RuntimeShape& input_y_shape,
const T* input_y_data, const RuntimeShape& output_shape,
T* output_data) {
ruy::profiler::ScopeLabel label("Select");
int64_t flatsize;
// Allow select operator executions on mixed scalar tensors and one element
// tensors.
if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1) {
flatsize = 1;
} else {
flatsize = MatchingFlatSize(input_condition_shape, input_x_shape,
input_y_shape, output_shape);
}
for (int64_t i = 0; i < flatsize; ++i) {
output_data[i] =
input_condition_data[i] ? input_x_data[i] : input_y_data[i];
}
}
template <typename D, typename T>
void RankOneSelect(const RuntimeShape& input_condition_shape,
const D* input_condition_data,
const RuntimeShape& input_x_shape, const T* input_x_data,
const RuntimeShape& input_y_shape, const T* input_y_data,
const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("Select/RankOneSelect");
const int64_t outer_size = input_condition_shape.FlatSize();
int64_t inner_size;
if (input_condition_shape.DimensionsCount() == 0) {
inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
} else {
TFLITE_DCHECK_EQ(
MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0),
outer_size);
inner_size =
MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
}
int64_t offset = 0;
for (int64_t i = 0; i < outer_size; i++) {
const T* input_data = input_condition_data[i] ? input_x_data : input_y_data;
memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
offset += inner_size;
}
}
template <typename D, typename T>
void BroadcastSelect5DSlow(const RuntimeShape& input_condition_shape,
const D* input_condition_data,
const RuntimeShape& input_x_shape,
const T* input_x_data,
const RuntimeShape& input_y_shape,
const T* input_y_data,
const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("Select/BroadcastSelectSlow");
TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 5);
TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 5);
TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 5);
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 5);
NdArrayDesc<5> desc_condition;
NdArrayDesc<5> desc_x;
NdArrayDesc<5> desc_y;
NdArrayDesc<5> desc_output;
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(5, output_shape);
CopyDimsToDesc(extended_output_shape, &desc_output);
NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape,
input_y_shape, &desc_condition, &desc_x,
&desc_y);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest
// stride, typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for
// the best cache behavior.
for (int n = 0; n < desc_output.extents[0]; ++n) {
int out_idx_n = desc_output.extents[1] * n;
int cond_idx_n = desc_condition.strides[0] * n;
int in_idx1_n = desc_x.strides[0] * n;
int in_idx2_n = desc_y.strides[0] * n;
for (int b = 0; b < desc_output.extents[1]; ++b) {
int out_idx_b = (out_idx_n + b) * desc_output.extents[2];
int cond_idx_b = cond_idx_n + desc_condition.strides[1] * b;
int in_idx1_b = in_idx1_n + desc_x.strides[1] * b;
int in_idx2_b = in_idx2_n + desc_y.strides[1] * b;
for (int y = 0; y < desc_output.extents[2]; ++y) {
int out_idx_y = (out_idx_b + y) * desc_output.extents[3];
int cond_idx_y = cond_idx_b + desc_condition.strides[2] * y;
int in_idx1_y = in_idx1_b + desc_x.strides[2] * y;
int in_idx2_y = in_idx2_b + desc_y.strides[2] * y;
for (int x = 0; x < desc_output.extents[3]; ++x) {
int out_idx = (out_idx_y + x) * desc_output.extents[4];
int cond_idx = cond_idx_y + desc_condition.strides[3] * x;
int in_idx1 = in_idx1_y + desc_x.strides[3] * x;
int in_idx2 = in_idx2_y + desc_y.strides[3] * x;
for (int c = 0; c < desc_output.extents[4]; ++c) {
output_data[out_idx] = input_condition_data[cond_idx]
? input_x_data[in_idx1]
: input_y_data[in_idx2];
out_idx++;
cond_idx += desc_condition.strides[4];
in_idx1 += desc_x.strides[4];
in_idx2 += desc_y.strides[4];
}
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void Slice(const tflite::SliceParams& op_params,
const RuntimeShape& input_shape,
const RuntimeShape& output_shape,
SequentialTensorWriter<T>* writer) {
const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
TFLITE_DCHECK_LE(op_params.begin_count, 5);
TFLITE_DCHECK_LE(op_params.size_count, 5);
const int begin_count = op_params.begin_count;
const int size_count = op_params.size_count;
// We front-pad the begin and size vectors.
int start[5];
int stop[5];
for (int i = 0; i < 5; ++i) {
int padded_i = 5 - i;
start[i] =
begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
stop[i] =
(size_count < padded_i || op_params.size[size_count - padded_i] == -1)
? ext_shape.Dims(i)
: start[i] + op_params.size[size_count - padded_i];
}
for (int i0 = start[0]; i0 < stop[0]; ++i0) {
for (int i1 = start[1]; i1 < stop[1]; ++i1) {
for (int i2 = start[2]; i2 < stop[2]; ++i2) {
for (int i3 = start[3]; i3 < stop[3]; ++i3) {
for (int i4 = start[4]; i4 < stop[4]; ++i4) {
writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
}
}
}
}
}
}
template <typename T>
inline void Slice(const tflite::SliceParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const RuntimeShape& output_shape, T* output_data) {
SequentialTensorWriter<T> writer(input_data, output_data);
return Slice(op_params, input_shape, output_shape, &writer);
}
template <typename T>
inline void Slice(const tflite::SliceParams& op_params,
const RuntimeShape& input_shape, const TfLiteTensor* input,
const RuntimeShape& output_shape, TfLiteTensor* output) {
SequentialTensorWriter<T> writer(input, output);
return Slice(op_params, input_shape, output_shape, &writer);
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_BATCH_ND_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_BATCH_ND_H_
#include <cmath>
#include "third_party/ruy/ruy/profiler/instrumentation.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// TODO(b/135760455): Move this method anonymous namespace in a cc file.
inline RuntimeShape ExtendShapeSpaceToBatch(const RuntimeShape& shape) {
if (shape.DimensionsCount() == 4) {
return shape;
}
RuntimeShape new_shape(4, 1);
new_shape.SetDim(0, shape.Dims(0));
new_shape.SetDim(1, shape.Dims(1));
new_shape.SetDim(3, shape.Dims(2));
return new_shape;
}
template <typename T>
inline void SpaceToBatchND(const SpaceToBatchParams& params,
const RuntimeShape& unextended_input1_shape,
const T* input1_data,
const RuntimeShape& unextended_input2_shape,
const int32_t* block_shape_data,
const RuntimeShape& unextended_input3_shape,
const int32_t* paddings_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
ruy::profiler::ScopeLabel label("SpaceToBatchND");
TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3);
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(),
unextended_output_shape.DimensionsCount());
// Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C.
const RuntimeShape input1_shape =
ExtendShapeSpaceToBatch(unextended_input1_shape);
const RuntimeShape output_shape =
ExtendShapeSpaceToBatch(unextended_output_shape);
const int depth = input1_shape.Dims(3);
const int input_width = input1_shape.Dims(2);
const int input_height = input1_shape.Dims(1);
const int input_batch_size = input1_shape.Dims(0);
const int output_width = output_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_batch_size = output_shape.Dims(0);
const int block_shape_height = block_shape_data[0];
const int block_shape_width =
unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1;
const int padding_top = paddings_data[0];
const int padding_left =
unextended_input1_shape.DimensionsCount() == 4 ? paddings_data[2] : 0;
// For uint8 quantized, the correct padding "zero value" is the output offset.
const int32_t pad_value = params.output_offset;
for (int out_b = 0; out_b < output_batch_size; ++out_b) {
int input_batch = out_b % input_batch_size;
int shift_w = (out_b / input_batch_size) % block_shape_width;
int shift_h = (out_b / input_batch_size) / block_shape_width;
for (int out_h = 0; out_h < output_height; ++out_h) {
for (int out_w = 0; out_w < output_width; ++out_w) {
T* out = output_data + Offset(output_shape, out_b, out_h, out_w, 0);
if (out_h * block_shape_height + shift_h < padding_top ||
out_h * block_shape_height + shift_h >=
padding_top + input_height ||
out_w * block_shape_width + shift_w < padding_left ||
out_w * block_shape_width + shift_w >= padding_left + input_width) {
// This may not execute correctly when pad_value != 0 and T != uint8.
memset(out, pad_value, depth * sizeof(T));
} else {
const T* in =
input1_data +
Offset(input1_shape, input_batch,
(out_h * block_shape_height + shift_h) - padding_top,
(out_w * block_shape_width + shift_w) - padding_left, 0);
memcpy(out, in, depth * sizeof(T));
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_BATCH_ND_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
#include <cstdint>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int input_depth = input_shape.Dims(3);
const int input_width = input_shape.Dims(2);
const int input_height = input_shape.Dims(1);
const int input_batch = input_shape.Dims(0);
const int output_depth = output_shape.Dims(3);
const int output_width = output_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_batch = output_shape.Dims(0);
const int32_t block_size = op_params.block_size;
TFLITE_DCHECK_EQ(input_width, output_width * block_size);
TFLITE_DCHECK_EQ(input_height, output_height * block_size);
TFLITE_DCHECK_EQ(input_depth * block_size * block_size, output_depth);
TFLITE_DCHECK_EQ(input_batch, output_batch);
for (int in_b = 0; in_b < input_batch; ++in_b) {
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
for (int in_d = 0; in_d < input_depth; ++in_d) {
const int out_d =
in_d + ((in_h % block_size) * block_size + in_w % block_size) *
input_depth;
const int out_w = in_w / block_size;
const int out_h = in_h / block_size;
const int out_b = in_b;
const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
const int output_index =
Offset(output_shape, out_b, out_h, out_w, out_d);
output_data[output_index] = input_data[input_index];
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
#include "third_party/ruy/ruy/profiler/instrumentation.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void StridedSlice(const tflite::StridedSliceParams& op_params,
const RuntimeShape& unextended_input_shape,
const RuntimeShape& unextended_output_shape,
SequentialTensorWriter<T>* writer) {
ruy::profiler::ScopeLabel label("StridedSlice");
// Note that the output_shape is not used herein.
tflite::StridedSliceParams params_copy = op_params;
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 5);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 5);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(5, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(5, unextended_output_shape);
// Reverse and pad to 5 dimensions because that is what the runtime code
// requires (ie. all shapes must be 5D and are given backwards).
strided_slice::StridedSlicePadIndices(&params_copy, 5);
const int start_0 =
strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 0);
const int stop_0 = strided_slice::StridedSliceEndForAxis(
params_copy, input_shape, 0, start_0);
const int start_1 =
strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 1);
const int stop_1 = strided_slice::StridedSliceEndForAxis(
params_copy, input_shape, 1, start_1);
const int start_2 =
strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 2);
const int stop_2 = strided_slice::StridedSliceEndForAxis(
params_copy, input_shape, 2, start_2);
const int start_3 =
strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 3);
const int stop_3 = strided_slice::StridedSliceEndForAxis(
params_copy, input_shape, 3, start_3);
const int start_4 =
strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 4);
const int stop_4 = strided_slice::StridedSliceEndForAxis(
params_copy, input_shape, 4, start_4);
auto lc = [&](int end, int stride, int index) {
if (stride < 0) {
return index > end;
} else {
return index < end;
}
};
// With a static_cast it is not possible to initialize
// a variable of type 'const int *'
// with an rvalue of type 'const int32_t *' (aka 'const long *').
// reinterpret_cast is required to handle this casting.
const int* shape = reinterpret_cast<const int*>(input_shape.DimsData());
const int* stride = reinterpret_cast<const int*>(params_copy.strides);
const bool inner_stride_is_1 = params_copy.strides[4] == 1;
for (int offset_0 = start_0; lc(stop_0, stride[0], offset_0);
offset_0 += stride[0]) {
for (int offset_1 = start_1; lc(stop_1, stride[1], offset_1);
offset_1 += stride[1]) {
for (int offset_2 = start_2; lc(stop_2, stride[2], offset_2);
offset_2 += stride[2]) {
for (int offset_3 = start_3; lc(stop_3, stride[3], offset_3);
offset_3 += stride[3]) {
// When the stride is 1, the inner loop is equivalent to the
// optimized slice inner loop. Otherwise, it is identical to the
// strided_slice reference implementation inner loop.
if (inner_stride_is_1) {
const int len = stop_4 - start_4;
int index = start_4 + offset_3 * shape[4] +
offset_2 * shape[3] * shape[4] +
offset_1 * shape[2] * shape[3] * shape[4] +
offset_0 * shape[1] * shape[2] * shape[3] * shape[4];
if (len > 0) {
writer->WriteN(index, len);
}
} else {
for (int offset_4 = start_4; lc(stop_4, stride[4], offset_4);
offset_4 += stride[4]) {
int index = offset_4 + offset_3 * shape[4] +
offset_2 * shape[3] * shape[4] +
offset_1 * shape[2] * shape[3] * shape[4] +
offset_0 * shape[1] * shape[2] * shape[3] * shape[4];
writer->Write(index);
}
}
}
}
}
}
}
template <typename T>
inline void StridedSlice(const tflite::StridedSliceParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
SequentialTensorWriter<T> writer(input_data, output_data);
StridedSlice<T>(op_params, unextended_input_shape, unextended_output_shape,
&writer);
}
template <typename T>
inline void StridedSlice(const tflite::StridedSliceParams& op_params,
const RuntimeShape& unextended_input_shape,
const TfLiteTensor* input,
const RuntimeShape& unextended_output_shape,
TfLiteTensor* output) {
SequentialTensorWriter<T> writer(input, output);
StridedSlice<T>(op_params, unextended_input_shape, unextended_output_shape,
&writer);
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_
#include <cmath>
#include "third_party/gemmlowp/fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace reference_ops {
inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
float val = input_data[i];
float result = std::tanh(val);
output_data[i] = result;
}
}
// Convenience version that allows, for example, generated-code calls to be
// uniform between data types.
inline void Tanh(const TanhParams&, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& output_shape,
float* output_data) {
// Drop params: not needed.
Tanh(input_shape, input_data, output_shape, output_data);
}
inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& output_shape,
int16_t* output_data) {
const int input_left_shift = params.input_left_shift;
// Support for shifts is limited until we have a parameterized version of
// SaturatingRoundingMultiplyByPOT().
TFLITE_DCHECK_GE(input_left_shift, 0);
TFLITE_DCHECK_LE(input_left_shift, 1);
const int flat_size = MatchingFlatSize(input_shape, output_shape);
// F0 uses 0 integer bits, range [-1, 1].
// This is the return type of math functions such as tanh, logistic,
// whose range is in [-1, 1].
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
// F3 uses 3 integer bits, range [-8, 8], the input range expected here.
using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
if (input_left_shift == 0) {
for (int i = 0; i < flat_size; i++) {
F3 input = F3::FromRaw(input_data[i]);
F0 output = gemmlowp::tanh(input);
output_data[i] = output.raw();
}
} else {
for (int i = 0; i < flat_size; i++) {
F3 input = F3::FromRaw(
gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i]));
F0 output = gemmlowp::tanh(input);
output_data[i] = output.raw();
}
}
}
inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
const int32_t input_zero_point = params.input_zero_point;
const int32_t input_range_radius = params.input_range_radius;
const int32_t input_multiplier = params.input_multiplier;
const int input_left_shift = params.input_left_shift;
const int32_t output_zero_point = 128;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
const uint8_t input_val_u8 = input_data[i];
const int32_t input_val_centered =
static_cast<int32_t>(input_val_u8) - input_zero_point;
uint8_t output_val;
if (input_val_centered <= -input_range_radius) {
output_val = 0;
} else if (input_val_centered >= input_range_radius) {
output_val = 255;
} else {
const int32_t input_val_rescaled =
MultiplyByQuantizedMultiplierGreaterThanOne(
input_val_centered, input_multiplier, input_left_shift);
using FixedPoint4 = gemmlowp::FixedPoint<int32_t, 4>;
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled);
const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4);
// Convert from Q0.31 to Q24.7.
using gemmlowp::RoundingDivideByPOT;
int32_t output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24);
output_val_s32 += output_zero_point;
if (output_val_s32 == 256) {
output_val_s32 = 255;
}
// Reinterpret as Q0.7, encoded in uint8_t.
TFLITE_DCHECK_GE(output_val_s32, 0);
TFLITE_DCHECK_LE(output_val_s32, 255);
output_val = static_cast<uint8_t>(output_val_s32);
}
output_data[i] = output_val;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
#include <array>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
namespace transpose_internal {
// Recursively explores all the dimensions of the output tensor and writes the
// corresponding input tensor data.
//
// - depth: the current depth of the recursion.
// - dims: tensor dimension count, also `perm` size.
// - perm: permutation array.
// - input_data: Running input data pointer. If depth == num_dims-1, this points
// to the first element of the last dimension to traverse.
// - input_stride: Reverse partial product of input shapes.
// - output_data: Running output data pointer. If depth == num_dims-1, this
// points to the first element of the last dimension to traverse.
// - output_stride: Reverse partial product of output shapes.
// - output_shape: Shape of the output tensor.
//
// ## Algorithm explanation
//
// Assume a 3D tensor T with a shape of [I, J, K] stored in row major order.
// T[i, j, k] is at position `i*J*K + j*K + k` in the tensor buffer.
//
// If we want to go through the whole tensor iteratively, we can use loops.
//
// ```
// for(i = 0; i < I; ++i) {
// for(j = 0; j < J; ++j) {
// for(k = 0; k < K; ++k) {
// T.data[i*J*K + j*K + k] = ...
// }
// }
// }
// ```
//
// We can also compute the offset as we go through the loops.
//
// ```
// stride_i = K * J;
// stride_j = K;
// stride_k = 1;
// for(i = 0; i < I; ++i) {
// offset_i = i * stride_i;
// offset_j = 0;
// for(j = 0; j < J; ++j) {
// offset_j += stride_j;
// offset_k = 0;
// for(k = 0; k < K; ++k) {
// offset_k += stride_k;
// T.data[offset_i + offset_j + offset_k] = ...
// }
// }
// }
// ```
//
// This nicely extends to a recursive version which is the base of this
// algorithm and supports any number of dimensions.
//
// ```
// shape = [I, J, K]
// strides = [K*J, K, 1]
// void recurse(T* data, shape, strides, depth = 0) {
// if(depth == shape.size) {
// *data = ...
// } else {
// for(a = 0; a < shape[depth]; ++a) {
// recurse(data, shape, strides, depth+1);
// data += strides[depth];
// }
// }
// }
// ```
template <typename T>
void TransposeImpl(const int depth, const int dims, const int32_t* perm,
const T* input_data, const int* input_stride, T* output_data,
const int* output_stride, const int32_t* output_shape) {
const int dimension_size = output_shape[depth];
if (depth == dims - 1) {
const int loop_stride = input_stride[perm[depth]];
for (int i = 0; i < dimension_size; ++i) {
output_data[i] = *input_data;
input_data += loop_stride;
}
} else {
for (int i = 0; i < dimension_size; ++i) {
TransposeImpl(depth + 1, dims, perm, input_data, input_stride,
output_data, output_stride, output_shape);
input_data += input_stride[perm[depth]];
output_data += output_stride[depth];
}
}
}
// Compile-time switch to get the storage type of the transposition.
template <int Size>
struct TransposeStorageType;
template <>
struct TransposeStorageType<1> {
using type = int8_t;
};
template <>
struct TransposeStorageType<2> {
using type = int16_t;
};
template <>
struct TransposeStorageType<4> {
using type = int32_t;
};
template <>
struct TransposeStorageType<8> {
using type = int64_t;
};
// Sets up the stride arrays for the recursive transpose algorithm.
//
// Implementation notes:
//
// This is a reverse partial product. We could use standard algorithms to
// implement this but the result is not a readable and is tricky to get right
// because the first element must be set to 1, which leads to offset
// shenanigans:
//
// ```
// stride[dims - 1] = 1;
// std::partial_sum(std::make_reverse_iterator(shape + dims),
// std::make_reverse_iterator(shape + 1),
// stride.rend() - input_rank + 1, std::multiplies());
// ```
//
// Note that Abseil isn't used in kernels implementation. That would make the
// above solution more readable.
inline void SetupTransposeStrides(
std::array<int, kTransposeMaxDimensions>& stride, const int32_t* shape,
const int dims) {
stride[dims - 1] = 1;
for (int i = dims - 2; i >= 0; --i) {
stride[i] = stride[i + 1] * shape[i + 1];
}
}
} // namespace transpose_internal
// Copies a tensor to an other buffer and permutes its dimensions.
//
// Note: template parameter N is not used anymore. It is kept for API
// compatibility with TFLite micro.
template <typename T, int N = kTransposeMaxDimensions>
void Transpose(const TransposeParams& params, const RuntimeShape& input_shape,
const T* input_data, const RuntimeShape& output_shape,
T* output_data) {
using transpose_internal::SetupTransposeStrides;
using transpose_internal::TransposeImpl;
using transpose_internal::TransposeStorageType;
// Transpose kernel only does rearranging values not numeric evaluations on
// each cell. It's safe to implement per size of scalar type and this trick
// keeps the total code size in a reasonable range.
using StorageType = typename TransposeStorageType<sizeof(T)>::type;
const StorageType* const input_data_storage =
reinterpret_cast<const StorageType*>(input_data);
StorageType* const output_data_storage =
reinterpret_cast<StorageType*>(output_data);
const int dims = input_shape.DimensionsCount();
std::array<int, kTransposeMaxDimensions> input_stride, output_stride;
SetupTransposeStrides(input_stride, input_shape.DimsData(), dims);
SetupTransposeStrides(output_stride, output_shape.DimsData(), dims);
TransposeImpl(0, dims, &params.perm[0], input_data_storage,
input_stride.data(), output_data_storage, output_stride.data(),
output_shape.DimsData());
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/runtime_shape.h"
namespace tflite {
// Defining a constexpr static class member is necessary in C++11
constexpr int tflite::RuntimeShape::kMaxSmallSize;
} // namespace tflite
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_
#include <cstring>
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
template <int N>
struct Dims {
int sizes[N];
int strides[N];
};
class RuntimeShape {
public:
RuntimeShape& operator=(RuntimeShape const&) = delete;
// RuntimeShape in TFLM supports up to 6 dimensions.
// The name kMaxSmallSize comes from the same file of the upstream
// tensorflow lite repo and need to be kept the same for max reuse.
static constexpr int kMaxSmallSize = 6;
RuntimeShape() : size_(0) {}
explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) {
TFLITE_DCHECK_LE(dimensions_count, kMaxSmallSize);
}
RuntimeShape(int shape_size, int32_t value) : size_(shape_size) {
TFLITE_DCHECK_LE(shape_size, kMaxSmallSize);
for (int i = 0; i < shape_size; ++i) {
SetDim(i, value);
}
}
RuntimeShape(int dimensions_count, const int32_t* dims_data)
: size_(dimensions_count) {
// check of dimensions_count handled by ReplaceWith()
ReplaceWith(dimensions_count, dims_data);
}
bool operator==(const RuntimeShape& comp) const {
return this->size_ == comp.size_ &&
std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) ==
0;
}
~RuntimeShape() {}
int32_t DimensionsCount() const { return size_; }
int32_t Dims(int i) const {
TFLITE_DCHECK_GE(i, 0);
TFLITE_DCHECK_LT(i, size_);
return dims_[i];
}
void SetDim(int i, int32_t val) {
TFLITE_DCHECK_GE(i, 0);
TFLITE_DCHECK_LT(i, size_);
dims_[i] = val;
}
static RuntimeShape ExtendedShape(int new_shape_size,
const RuntimeShape& shape) {
TFLITE_DCHECK_LE(new_shape_size, kMaxSmallSize);
return RuntimeShape(new_shape_size, shape, 1);
}
int32_t* DimsData() { return dims_; }
const int32_t* DimsData() const { return dims_; }
const int32_t* DimsDataUpTo5D() const { return dims_; }
void ReplaceWith(int dimensions_count, const int32_t* dims_data) {
TFLITE_DCHECK_LE(dimensions_count, kMaxSmallSize);
size_ = dimensions_count;
int32_t* dst_dims = DimsData();
std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
}
// Returns the total count of elements, that is the size when flattened into a
// vector.
int FlatSize() const {
int buffer_size = 1;
const int* dims_data = reinterpret_cast<const int*>(DimsData());
for (int i = 0; i < size_; i++) {
buffer_size *= dims_data[i];
}
return buffer_size;
}
private:
// For use only by ExtendedShape(), written to guarantee (return-value) copy
// elision in C++17.
// This creates a shape padded to the desired size with the specified value.
RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value)
: size_(new_shape_size) {
// If the following check fails, it is likely because a 4D-only kernel is
// being used with an array of larger dimension count.
TFLITE_CHECK_GE(new_shape_size, shape.DimensionsCount());
const int size_increase = new_shape_size - shape.DimensionsCount();
for (int i = 0; i < size_increase; ++i) {
SetDim(i, pad_value);
}
std::memcpy(DimsData() + size_increase, shape.DimsData(),
sizeof(int32_t) * shape.DimensionsCount());
}
int32_t size_;
union {
int32_t dims_[kMaxSmallSize];
};
};
// Since tensors with '0' in their shape are valid in TF, these offset functions
// allow that as long as the corresponding index is also 0. It is upto the
// calling ops to ensure that they perform verification checks on tensor shapes
// if they don't support a particular behavior.
inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) {
TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4);
const int* dims_data = reinterpret_cast<const int*>(shape.DimsData());
TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
(i0 >= 0 && i0 < dims_data[0]));
TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
(i1 >= 0 && i1 < dims_data[1]));
TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
(i2 >= 0 && i2 < dims_data[2]));
TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
(i3 >= 0 && i3 < dims_data[3]));
return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
}
inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3,
int i4) {
TFLITE_DCHECK_EQ(shape.DimensionsCount(), 5);
const int* dims_data = reinterpret_cast<const int*>(shape.DimsData());
TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
(i0 >= 0 && i0 < dims_data[0]));
TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
(i1 >= 0 && i1 < dims_data[1]));
TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
(i2 >= 0 && i2 < dims_data[2]));
TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
(i3 >= 0 && i3 < dims_data[3]));
TFLITE_DCHECK((dims_data[4] == 0 && i4 == 0) ||
(i4 >= 0 && i4 < dims_data[4]));
return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) *
dims_data[4] +
i4;
}
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_
#include <limits>
#include <vector>
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace strided_slice {
// Use until std::clamp() is available from C++17.
inline int Clamp(const int v, const int lo, const int hi) {
TFLITE_DCHECK(!(hi < lo));
if (hi < v) return hi;
if (v < lo) return lo;
return v;
}
inline void StridedSlicePadIndices(tflite::StridedSliceParams* p,
int dim_count) {
// Add indices and mask bits to fully include extra dimensions
TFLITE_CHECK_LE(dim_count, 5);
TFLITE_CHECK_GE(dim_count, p->start_indices_count);
TFLITE_CHECK_EQ(p->start_indices_count, p->stop_indices_count);
TFLITE_CHECK_EQ(p->stop_indices_count, p->strides_count);
const int pad_count = dim_count - p->start_indices_count;
// Pad indices at start, so move arrays by pad_count.
for (int i = p->start_indices_count - 1; i >= 0; --i) {
p->strides[i + pad_count] = p->strides[i];
p->start_indices[i + pad_count] = p->start_indices[i];
p->stop_indices[i + pad_count] = p->stop_indices[i];
}
for (int i = 0; i < pad_count; ++i) {
p->start_indices[i] = 0;
p->stop_indices[i] = 1;
p->strides[i] = 1;
}
// Pad masks with 0s or 1s as required.
p->shrink_axis_mask <<= pad_count;
p->ellipsis_mask <<= pad_count;
p->new_axis_mask <<= pad_count;
p->begin_mask <<= pad_count;
p->end_mask <<= pad_count;
p->begin_mask |= (1 << pad_count) - 1;
p->end_mask |= (1 << pad_count) - 1;
p->start_indices_count = dim_count;
p->stop_indices_count = dim_count;
p->strides_count = dim_count;
}
// Return the index for the first element along that axis. This index will be a
// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
// that can be used to index directly into the data.
inline int StridedSliceStartForAxis(const tflite::StridedSliceParams& params,
const RuntimeShape& input_shape,
int32_t axis) {
const int32_t axis_size = input_shape.Dims(axis);
int32_t start = params.start_indices[axis];
const int32_t stride = params.strides[axis];
const int32_t begin_mask = (params.begin_mask & 1 << axis);
if (start < 0) {
start += axis_size;
}
if (stride > 0) {
start = Clamp(start, 0, axis_size);
} else {
start = Clamp(start, -1, axis_size - 1);
}
if (begin_mask) {
if (stride > 0) {
start = 0;
} else {
start = axis_size - 1;
}
}
return start;
}
inline int StridedSliceEndForAxis(const tflite::StridedSliceParams& params,
const RuntimeShape& input_shape, int axis,
int start) {
const auto shrink_axis_mask = params.shrink_axis_mask;
const bool shrink_axis = shrink_axis_mask & (1 << axis);
const int axis_size = input_shape.Dims(axis);
const bool offset = params.offset;
if (shrink_axis) {
if (start >= axis_size) {
return start;
} else {
return start + 1;
}
}
const auto* indices = params.stop_indices;
int end = indices[axis];
if (offset) {
end += start;
}
const int32_t stride = params.strides[axis];
const int32_t end_mask = (params.end_mask & 1 << axis);
if (end < 0) {
end += axis_size;
}
if (stride > 0) {
end = Clamp(end, 0, axis_size);
} else {
end = Clamp(end, -1, axis_size - 1);
}
if (end_mask) {
if (stride > 0) {
end = axis_size;
} else {
end = -1;
}
}
return end;
}
// Return the index for the first element along that axis. This index will be a
// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
// that can be used to index directly into the data.
inline int StartForAxis(const tflite::StridedSliceParams& params,
const RuntimeShape& input_shape, int axis) {
const auto begin_mask = params.begin_mask;
const auto* start_indices = params.start_indices;
const auto* strides = params.strides;
const int axis_size = input_shape.Dims(axis);
if (axis_size == 0) {
return 0;
}
// Begin with the specified index.
int start = start_indices[axis];
// begin_mask override
if (begin_mask & 1 << axis) {
if (strides[axis] > 0) {
// Forward iteration - use the first element. These values will get
// clamped below (Note: We could have set them to 0 and axis_size-1, but
// use lowest() and max() to maintain symmetry with StopForAxis())
start = std::numeric_limits<int>::lowest();
} else {
// Backward iteration - use the last element.
start = std::numeric_limits<int>::max();
}
}
// Handle negative indices
if (start < 0) {
start += axis_size;
}
// Clamping
if (strides[axis] > 0) {
// Forward iteration
start = Clamp(start, 0, axis_size);
} else {
// Backward iteration
start = Clamp(start, -1, axis_size - 1);
}
return start;
}
// Return the "real" index for the end of iteration along that axis. This is an
// "end" in the traditional C sense, in that it points to one past the last
// element. ie. So if you were iterating through all elements of a 1D array of
// size 4, this function would return 4 as the stop, because it is one past the
// "real" indices of 0, 1, 2 & 3.
inline int StopForAxis(const tflite::StridedSliceParams& params,
const RuntimeShape& input_shape, int axis,
int start_for_axis) {
const auto end_mask = params.end_mask;
const auto shrink_axis_mask = params.shrink_axis_mask;
const auto* stop_indices = params.stop_indices;
const auto* strides = params.strides;
const int axis_size = input_shape.Dims(axis);
if (axis_size == 0) {
return 0;
}
// Begin with the specified index
const bool shrink_axis = shrink_axis_mask & (1 << axis);
int stop = stop_indices[axis];
// When shrinking an axis, the end position does not matter (and can be
// incorrect when negative indexing is used, see Issue #19260). Always use
// start_for_axis + 1 to generate a length 1 slice, since start_for_axis has
// already been adjusted for negative indices.
if (shrink_axis) {
return start_for_axis + 1;
}
// end_mask override
if (end_mask & (1 << axis)) {
if (strides[axis] > 0) {
// Forward iteration - use the last element. These values will get
// clamped below
stop = std::numeric_limits<int>::max();
} else {
// Backward iteration - use the first element.
stop = std::numeric_limits<int>::lowest();
}
}
// Handle negative indices
if (stop < 0) {
stop += axis_size;
}
// Clamping
// Because the end index points one past the last element, we need slightly
// different clamping ranges depending on the direction.
if (strides[axis] > 0) {
// Forward iteration
stop = Clamp(stop, 0, axis_size);
} else {
// Backward iteration
stop = Clamp(stop, -1, axis_size - 1);
}
return stop;
}
inline bool LoopCondition(int index, int stop, int stride) {
// True when we have reached the end of an axis and should loop.
return stride > 0 ? index >= stop : index <= stop;
}
inline tflite::StridedSliceParams BuildStridedSliceParams(
int begin_mask, int end_mask, int shrink_axis_mask,
const std::vector<int>& start_indices, const std::vector<int>& stop_indices,
const std::vector<int>& strides) {
tflite::StridedSliceParams op_params{};
const int dims_count = start_indices.size();
op_params.start_indices_count = dims_count;
op_params.stop_indices_count = dims_count;
op_params.strides_count = dims_count;
for (int i = 0; i < dims_count; ++i) {
op_params.start_indices[i] = start_indices[i];
op_params.stop_indices[i] = stop_indices[i];
op_params.strides[i] = strides[i];
}
op_params.begin_mask = begin_mask;
op_params.ellipsis_mask = 0;
op_params.end_mask = end_mask;
op_params.new_axis_mask = 0;
op_params.shrink_axis_mask = shrink_axis_mask;
return op_params;
}
} // namespace strided_slice
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include <vector>
namespace tflite {
RuntimeShape GetTensorShape(const TfLiteTensor* tensor) {
if (tensor == nullptr) {
return RuntimeShape();
}
TfLiteIntArray* dims = tensor->dims;
const int dims_size = dims->size;
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
return RuntimeShape(dims_size, dims_data);
}
RuntimeShape GetTensorShape(std::vector<int32_t> data) {
return RuntimeShape(data.size(), data.data());
}
} // namespace tflite
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
#include <vector>
#include "tensorflow/lite/core/c/common.h"
#include "tensorflow/lite/core/macros.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
template <typename T>
inline T* GetTensorData(TfLiteTensor* tensor) {
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
}
template <typename T>
inline const T* GetTensorData(const TfLiteTensor* tensor) {
return tensor != nullptr ? reinterpret_cast<const T*>(tensor->data.raw)
: nullptr;
}
TFLITE_NOINLINE RuntimeShape GetTensorShape(const TfLiteTensor* tensor);
RuntimeShape GetTensorShape(std::vector<int32_t> data);
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================
*/
// internal/reference/portable_tensor_utils.h has the implementation of the
// functions declared in internal/portable_tensor_utils.h. This somewhat
// confusing setup is derived from how the code is organized in TfLite where it
// is used to select between NEON, SSE and portable implementaitons. See
// https://github.com/tensorflow/tensorflow/blob/d76c23975c4a3a0d7987cfe3f45c76566df06180/tensorflow/lite/kernels/internal/tensor_utils.cc
// for how the code is written in TfLite.
#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h"
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
#define TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
#include "tensorflow/lite/micro/micro_log.h"
#if !defined(TF_LITE_MCU_DEBUG_LOG)
#include <cstdlib>
#define TFLITE_ABORT abort()
#else
inline void AbortImpl() {
MicroPrintf("HALTED");
while (1) {
}
}
#define TFLITE_ABORT AbortImpl();
#endif
#if defined(ARDUINO)
#define TFLITE_ASSERT_FALSE (static_cast<void>(0))
#else
#define TFLITE_ASSERT_FALSE TFLITE_ABORT
#endif
#define TF_LITE_FATAL(msg) \
do { \
MicroPrintf("%s", (msg)); \
TFLITE_ABORT; \
} while (0)
#define TF_LITE_ASSERT(x) \
do { \
if (!(x)) TF_LITE_FATAL(#x); \
} while (0)
#endif // TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_PADDING_H_
#define TENSORFLOW_LITE_KERNELS_PADDING_H_
#include "tensorflow/lite/core/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
inline int ComputePadding(int stride, int dilation_rate, int in_size,
int filter_size, int out_size) {
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
int padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
return padding > 0 ? padding : 0;
}
// It's not guaranteed that padding is symmetric. It's important to keep
// offset for algorithms need all paddings.
inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
int filter_size, int out_size,
int* offset) {
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
int total_padding =
((out_size - 1) * stride + effective_filter_size - in_size);
total_padding = total_padding > 0 ? total_padding : 0;
*offset = total_padding % 2;
return total_padding / 2;
}
// Matching GetWindowedOutputSize in TensorFlow.
inline int ComputeOutSize(TfLitePadding padding, int image_size,
int filter_size, int stride, int dilation_rate = 1) {
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
// TODO(b/186448822): This uses 0 since the function has no other way to
// report error case
if (stride == 0) return 0;
switch (padding) {
case kTfLitePaddingSame:
return (image_size + stride - 1) / stride;
case kTfLitePaddingValid:
return (image_size + stride - effective_filter_size) / stride;
default:
return 0;
}
}
inline TfLitePaddingValues ComputePaddingHeightWidth(
int stride_height, int stride_width, int dilation_rate_height,
int dilation_rate_width, int in_height, int in_width, int filter_height,
int filter_width, TfLitePadding padding, int* out_height, int* out_width) {
*out_width = ComputeOutSize(padding, in_width, filter_width, stride_width,
dilation_rate_width);
*out_height = ComputeOutSize(padding, in_height, filter_height, stride_height,
dilation_rate_height);
TfLitePaddingValues padding_values;
int offset = 0;
padding_values.height =
ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height,
filter_height, *out_height, &offset);
padding_values.height_offset = offset;
padding_values.width =
ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width,
filter_width, *out_width, &offset);
padding_values.width_offset = offset;
return padding_values;
}
inline Padding3DValues ComputePadding3DValues(
int stride_height, int stride_width, int stride_depth,
int dilation_rate_height, int dilation_rate_width, int dilation_rate_depth,
int in_height, int in_width, int in_depth, int filter_height,
int filter_width, int filter_depth, TfLitePadding padding, int* out_height,
int* out_width, int* out_depth) {
*out_width = ComputeOutSize(padding, in_width, filter_width, stride_width,
dilation_rate_width);
*out_height = ComputeOutSize(padding, in_height, filter_height, stride_height,
dilation_rate_height);
*out_depth = ComputeOutSize(padding, in_depth, filter_depth, stride_depth,
dilation_rate_depth);
Padding3DValues padding_values;
int offset = 0;
padding_values.depth =
ComputePaddingWithOffset(stride_depth, dilation_rate_depth, in_depth,
filter_depth, *out_depth, &offset);
padding_values.depth_offset = offset;
padding_values.height =
ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height,
filter_height, *out_height, &offset);
padding_values.height_offset = offset;
padding_values.width =
ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width,
filter_width, *out_width, &offset);
padding_values.width_offset = offset;
return padding_values;
}
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_PADDING_H_
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/c_api_types.h"
namespace tflite {
// Interface classes that the TFLM framework relies on to get buffers it needs.
// There are two types of buffers that the TFLM framework requires: persistent
// and non-persistent. Persistent buffers, once allocated, are never freed by
// the TFLM framework. Non-persist buffers can be allocated and deallocated by
// the TFLM framework. This file defines two interfaces classes that TFLM
// framework will rely on to manage these buffers.
// Interface class for managing persistent buffers.
class IPersistentBufferAllocator {
public:
IPersistentBufferAllocator() {}
virtual ~IPersistentBufferAllocator() {}
// Allocates persistent memory. The persistent buffer is never freed.
virtual uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) = 0;
// Returns the size of all persistent allocations in bytes.
virtual size_t GetPersistentUsedBytes() const = 0;
};
// Interface class for managing non-persistent buffers.
// The default non-persistent buffers are temp buffers that are not resizable.
// Support of at least one resizable buffer is required.
class INonPersistentBufferAllocator {
public:
INonPersistentBufferAllocator() {}
virtual ~INonPersistentBufferAllocator() {}
// Allocates a temporary buffer. This buffer is not resizable.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment) = 0;
// Signals that a temporary buffer is no longer needed.
virtual void DeallocateTemp(uint8_t* buf) = 0;
// Returns true if all temporary buffers are already deallocated.
virtual bool IsAllTempDeallocated() = 0;
// Signals that all temporary allocations can be reclaimed. TFLM calls this
// API when it knows that all temporary buffers that it requested has been
// deallocated. The goal of API is to facilitate implementations of
// INonPersistentBufferAllocator can reuse buffer with some reasonable
// complexity.
virtual TfLiteStatus ResetTempAllocations() = 0;
// Returns a buffer that is resizable viable ResizeBuffer().
virtual uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) = 0;
// Resizes a buffer that is previously returned by the
// AllocateResizableBuffer.
virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) = 0;
// Frees up the memory occupied by the resizable buffer.
virtual TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) = 0;
// Returns a pointer pointing to the start of the overlay memory, which is
// used for activation tensors and scratch buffers by kernels at Invoke stage.
virtual uint8_t* GetOverlayMemoryAddress() const = 0;
// Reserves the size of the overlay memory. This overlay is reserved for the
// kernels at Invoke stage. This is referred to as the overlay because before
// Invoket state, the same memory can be used for temp buffers. The layout of
// the memory is planned by the memory planner separately at Invoke stage.
virtual TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
size_t alignment) = 0;
// Returns the size of non-persistent buffer in use.
virtual size_t GetNonPersistentUsedBytes() const = 0;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
virtual size_t GetAvailableMemory(size_t alignment) const = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_log.h"
namespace tflite {
NonPersistentArenaBufferAllocator::NonPersistentArenaBufferAllocator(
uint8_t* buffer, size_t buffer_size)
: buffer_head_(buffer),
buffer_tail_(buffer + buffer_size),
head_temp_(buffer),
next_temp_(buffer) {}
NonPersistentArenaBufferAllocator::~NonPersistentArenaBufferAllocator() {}
// Allocates a temporary buffer. This buffer is not resizable.
uint8_t* NonPersistentArenaBufferAllocator::AllocateTemp(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerUp(next_temp_, alignment);
const size_t available_memory = buffer_tail_ - aligned_result;
if (available_memory < size) {
MicroPrintf(
"Failed to allocate temp memory. Requested: %u, "
"available %u, missing: %u",
size, available_memory, size - available_memory);
return nullptr;
}
next_temp_ = aligned_result + size;
temp_buffer_ptr_check_sum_ ^= reinterpret_cast<intptr_t>(aligned_result);
temp_buffer_count_++;
return aligned_result;
}
// Signals that a temporary buffer is no longer needed.
void NonPersistentArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) {
temp_buffer_ptr_check_sum_ ^= reinterpret_cast<intptr_t>(temp_buf);
temp_buffer_count_--;
}
// Returns true if all temporary buffers are already deallocated.
bool NonPersistentArenaBufferAllocator::IsAllTempDeallocated() {
if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) {
MicroPrintf(
"Number of allocated temp buffers: %d. Checksum passing status: %d",
temp_buffer_count_, !temp_buffer_ptr_check_sum_);
return false;
}
return true;
}
// Signals that all temporary allocations can be reclaimed. TFLM calls this
// API when it knows that all temporary buffers that it requested has been
// deallocated. The goal of API is to facilitate implementations of
// INonPersistentBufferAllocator can reuse buffer with some reasonable
// complexity.
TfLiteStatus NonPersistentArenaBufferAllocator::ResetTempAllocations() {
if (!IsAllTempDeallocated()) {
MicroPrintf(
"All temp buffers must be freed before calling ResetTempAllocations()");
return kTfLiteError;
}
next_temp_ = head_temp_;
return kTfLiteOk;
}
// Returns a buffer that is resizable viable ResizeBuffer().
uint8_t* NonPersistentArenaBufferAllocator::AllocateResizableBuffer(
size_t size, size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expected_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (resizable_buffer_allocated_) {
MicroPrintf(
"Cannot allocate a new resizable buffer when one is already allocated");
return nullptr;
}
if (ResizeBuffer(expected_resizable_buf, size, alignment) == kTfLiteOk) {
resizable_buffer_allocated_ = true;
return expected_resizable_buf;
}
return nullptr;
}
// Resizes a buffer that is previously returned by the AllocateResizableBuffer.
// Note that ResizeBuffer(old_resizable_buf, 0, 1) effectively deallocates
// a previous allocated resizable buffer.
TfLiteStatus NonPersistentArenaBufferAllocator::ResizeBuffer(
uint8_t* resizable_buf, size_t size, size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (resizable_buf != expect_resizable_buf) {
MicroPrintf("Internal error: buffer is not resizable");
return kTfLiteError;
}
if (head_temp_ != next_temp_) {
MicroPrintf("ResetTempAllocations() is not called before ResizeBuffer().");
return kTfLiteError;
}
const size_t available_memory = buffer_tail_ - expect_resizable_buf;
if (available_memory < size) {
MicroPrintf(
"Failed to resize buffer. Requested: %u, available %u, missing: %u",
size, available_memory, size - available_memory);
return kTfLiteError;
}
head_temp_ = expect_resizable_buf + size;
next_temp_ = head_temp_;
return kTfLiteOk;
}
// Frees up the memory occupied by the resizable buffer.
TfLiteStatus NonPersistentArenaBufferAllocator::DeallocateResizableBuffer(
uint8_t* resizable_buf) {
TfLiteStatus status = ResizeBuffer(resizable_buf, 0, 1);
if (status == kTfLiteOk) {
resizable_buffer_allocated_ = false;
}
return status;
}
// Returns a pointer pointing to the start of the overlay memory, which is
// used for activation tensors and scratch buffers by kernels at Invoke stage.
uint8_t* NonPersistentArenaBufferAllocator::GetOverlayMemoryAddress() const {
return buffer_head_;
}
// Reserves the size of the overlay memory. This overlay is reserved for the
// kernels at Invoke stage. This is referred to as the overlay because before
// Invoket state, the same memory can be used for temp buffers. The layout of
// the memory is planned by the memory planner separately at Invoke stage.
TfLiteStatus
NonPersistentArenaBufferAllocator::ReserveNonPersistentOverlayMemory(
size_t size, size_t alignment) {
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
return ResizeBuffer(expect_resizable_buf, size, alignment);
}
// Returns the size of non-persistent buffer in use.
size_t NonPersistentArenaBufferAllocator::GetNonPersistentUsedBytes() const {
return (next_temp_ - buffer_head_);
}
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
size_t NonPersistentArenaBufferAllocator::GetAvailableMemory(
size_t alignment) const {
uint8_t* const aligned_temp = AlignPointerUp(next_temp_, alignment);
uint8_t* const aligned_tail = AlignPointerDown(buffer_tail_, alignment);
return aligned_tail - aligned_temp;
}
} // namespace tflite
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// Implement INonPersistentBufferAllocator on an arena that is dedicated for
// non-persistent buffers.
class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
public:
NonPersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
virtual ~NonPersistentArenaBufferAllocator();
// Allocates a temporary buffer. This buffer is not resizable.
uint8_t* AllocateTemp(size_t size, size_t alignment) override;
// Signals that a temporary buffer is no longer needed.
void DeallocateTemp(uint8_t* buf) override;
// Returns true if all temporary buffers are already deallocated.
bool IsAllTempDeallocated() override;
// Signals that all temporary allocations can be reclaimed. TFLM calls this
// API when it knows that all temporary buffers that it requested has been
// deallocated.
TfLiteStatus ResetTempAllocations() override;
// Returns a buffer that is resizable viable ResizeBuffer().
uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) override;
// Resizes a buffer that is previously returned by the
// AllocateResizableBuffer.
TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) override;
// Frees up the memory occupied by the resizable buffer.
TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) override;
// Returns a pointer pointing to the start of the overlay memory, which is
// used for activation tensors and scratch buffers by kernels at Invoke stage.
uint8_t* GetOverlayMemoryAddress() const override;
// Reserves the size of the overlay memory. This overlay is reserved for the
// kernels at Invoke stage. This is referred to as the overlay because before
// Invoket state, the same memory can be used for temp buffers. The layout of
// the memory is planned by the memory planner separately at Invoke stage.
TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
size_t alignment) override;
// Returns the size of non-persistent buffer in use.
size_t GetNonPersistentUsedBytes() const override;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
size_t GetAvailableMemory(size_t alignment) const override;
TF_LITE_REMOVE_VIRTUAL_DELETE
private:
// The memory arena that this allocator manages.
uint8_t* const buffer_head_;
uint8_t* const buffer_tail_;
// The whole region is split into two parts:
// buffer_head_ to head_temp_ - 1 belongs to the only resizable buffer.
// head_temp_ to buffer_tail_ can be used for (non-resizable) temp buffers.
uint8_t* head_temp_;
// next_temp_ points to the next available temp buffer allocation address and
// its range is between head_temp_ and buffer_tail_
uint8_t* next_temp_;
// XOR Check sum for outstanding temp buffers.
// If all temp buffers are deallocated OR no temp buffers are allocated,
// temp_buffer_ptr_check_sum_ == nullptr.
intptr_t temp_buffer_ptr_check_sum_ = 0;
// Count of outstanding temp buffers.
int temp_buffer_count_ = 0;
bool resizable_buffer_allocated_ = false;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_log.h"
namespace tflite {
PersistentArenaBufferAllocator::PersistentArenaBufferAllocator(
uint8_t* buffer, size_t buffer_size)
: buffer_head_(buffer),
buffer_tail_(buffer + buffer_size),
tail_temp_(buffer_tail_) {}
PersistentArenaBufferAllocator::~PersistentArenaBufferAllocator() {}
uint8_t* PersistentArenaBufferAllocator::AllocatePersistentBuffer(
size_t size, size_t alignment) {
uint8_t* const aligned_result =
AlignPointerDown(tail_temp_ - size, alignment);
if (aligned_result < buffer_head_) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const size_t missing_memory = buffer_head_ - aligned_result;
MicroPrintf(
"Failed to allocate tail memory. Requested: %u, "
"available %u, missing: %u",
size, size - missing_memory, missing_memory);
#endif
return nullptr;
}
tail_temp_ = aligned_result;
return aligned_result;
}
size_t PersistentArenaBufferAllocator::GetPersistentUsedBytes() const {
return buffer_tail_ - tail_temp_;
}
} // namespace tflite
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// PersistentArenaBufferAllocator is an implementatation of
// IPersistentBufferAllocator interface on an arena that is dedicated for
// persistent buffers.
class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
public:
PersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
virtual ~PersistentArenaBufferAllocator();
// Allocates persistent memory. The persistent buffer is never freed.
// Returns nullptr if errors occured.
uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override;
// Returns the size of all persistent allocations in bytes.
size_t GetPersistentUsedBytes() const override;
TF_LITE_REMOVE_VIRTUAL_DELETE
private:
// The memory arena that this allocator manages.
uint8_t* const buffer_head_;
uint8_t* const buffer_tail_;
// The whole region is split into two parts:
// tail_temp_ to buffer_tail_ contains allocated buffers;
// buffer_head_ to tail_temp_ - 1 belongs to still available spaces.
// So in essence, the allocated region grows from the bottom and emulates
// SingleArenaBufferAllocator's persistent part.
uint8_t* tail_temp_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h"
#include <new>
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
RecordingSingleArenaBufferAllocator::RecordingSingleArenaBufferAllocator(
uint8_t* buffer_head, size_t buffer_size)
: SingleArenaBufferAllocator(buffer_head, buffer_size),
requested_head_bytes_(0),
requested_tail_bytes_(0),
used_bytes_(0),
alloc_count_(0) {}
RecordingSingleArenaBufferAllocator::~RecordingSingleArenaBufferAllocator() {}
RecordingSingleArenaBufferAllocator*
RecordingSingleArenaBufferAllocator::Create(uint8_t* buffer_head,
size_t buffer_size) {
TFLITE_DCHECK(buffer_head != nullptr);
RecordingSingleArenaBufferAllocator tmp =
RecordingSingleArenaBufferAllocator(buffer_head, buffer_size);
uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer(
sizeof(RecordingSingleArenaBufferAllocator),
alignof(RecordingSingleArenaBufferAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) RecordingSingleArenaBufferAllocator(tmp);
}
size_t RecordingSingleArenaBufferAllocator::GetRequestedBytes() const {
return requested_head_bytes_ + requested_tail_bytes_;
}
size_t RecordingSingleArenaBufferAllocator::GetUsedBytes() const {
return used_bytes_;
}
size_t RecordingSingleArenaBufferAllocator::GetAllocatedCount() const {
return alloc_count_;
}
TfLiteStatus RecordingSingleArenaBufferAllocator::ResizeBuffer(
uint8_t* resizable_buf, size_t size, size_t alignment) {
const uint8_t* previous_head = head();
TfLiteStatus status =
SingleArenaBufferAllocator::ResizeBuffer(resizable_buf, size, alignment);
if (status == kTfLiteOk) {
used_bytes_ += head() - previous_head;
requested_head_bytes_ = size;
}
return status;
}
uint8_t* RecordingSingleArenaBufferAllocator::AllocatePersistentBuffer(
size_t size, size_t alignment) {
const uint8_t* previous_tail = tail();
uint8_t* result =
SingleArenaBufferAllocator::AllocatePersistentBuffer(size, alignment);
if (result != nullptr) {
used_bytes_ += previous_tail - tail();
requested_tail_bytes_ += size;
alloc_count_++;
}
return result;
}
} // namespace tflite
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// Utility class used to log allocations of a SingleArenaBufferAllocator. Should
// only be used in debug/evaluation settings or unit tests to evaluate
// allocation usage.
class RecordingSingleArenaBufferAllocator : public SingleArenaBufferAllocator {
public:
RecordingSingleArenaBufferAllocator(uint8_t* buffer_head, size_t buffer_size);
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
~RecordingSingleArenaBufferAllocator() override;
static RecordingSingleArenaBufferAllocator* Create(uint8_t* buffer_head,
size_t buffer_size);
// Returns the number of bytes requested from the head or tail.
size_t GetRequestedBytes() const;
// Returns the number of bytes actually allocated from the head or tail. This
// value will be >= to the number of requested bytes due to padding and
// alignment.
size_t GetUsedBytes() const;
// Returns the number of alloc calls from the head or tail.
size_t GetAllocatedCount() const;
TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) override;
uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override;
private:
size_t requested_head_bytes_;
size_t requested_tail_bytes_;
size_t used_bytes_;
size_t alloc_count_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
#include <cstddef>
#include <cstdint>
#include <new>
#include "tensorflow/lite/c/c_api_types.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_log.h"
namespace tflite {
SingleArenaBufferAllocator::SingleArenaBufferAllocator(uint8_t* buffer_head,
uint8_t* buffer_tail)
: buffer_head_(buffer_head),
buffer_tail_(buffer_tail),
head_(buffer_head),
tail_(buffer_tail),
temp_(buffer_head_) {}
SingleArenaBufferAllocator::SingleArenaBufferAllocator(uint8_t* buffer,
size_t buffer_size)
: SingleArenaBufferAllocator(buffer, buffer + buffer_size) {}
/* static */
SingleArenaBufferAllocator* SingleArenaBufferAllocator::Create(
uint8_t* buffer_head, size_t buffer_size) {
TFLITE_DCHECK(buffer_head != nullptr);
SingleArenaBufferAllocator tmp =
SingleArenaBufferAllocator(buffer_head, buffer_size);
// Allocate enough bytes from the buffer to create a
// SingleArenaBufferAllocator. The new instance will use the current adjusted
// tail buffer from the tmp allocator instance.
uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer(
sizeof(SingleArenaBufferAllocator), alignof(SingleArenaBufferAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) SingleArenaBufferAllocator(tmp);
}
SingleArenaBufferAllocator::~SingleArenaBufferAllocator() {}
uint8_t* SingleArenaBufferAllocator::AllocateResizableBuffer(size_t size,
size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (ResizeBuffer(expect_resizable_buf, size, alignment) == kTfLiteOk) {
return expect_resizable_buf;
}
return nullptr;
}
TfLiteStatus SingleArenaBufferAllocator::DeallocateResizableBuffer(
uint8_t* resizable_buf) {
return ResizeBuffer(resizable_buf, 0, 1);
}
TfLiteStatus SingleArenaBufferAllocator::ReserveNonPersistentOverlayMemory(
size_t size, size_t alignment) {
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
return ResizeBuffer(expect_resizable_buf, size, alignment);
}
TfLiteStatus SingleArenaBufferAllocator::ResizeBuffer(uint8_t* resizable_buf,
size_t size,
size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (head_ != temp_ || resizable_buf != expect_resizable_buf) {
MicroPrintf(
"Internal error: either buffer is not resizable or "
"ResetTempAllocations() is not called before ResizeBuffer().");
return kTfLiteError;
}
uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment);
const size_t available_memory = tail_ - aligned_result;
if (available_memory < size) {
MicroPrintf(
"Failed to resize buffer. Requested: %u, available %u, missing: %u",
size, available_memory, size - available_memory);
return kTfLiteError;
}
head_ = aligned_result + size;
temp_ = head_;
return kTfLiteOk;
}
uint8_t* SingleArenaBufferAllocator::AllocatePersistentBuffer(
size_t size, size_t alignment) {
uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
if (aligned_result < head_) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const size_t missing_memory = head_ - aligned_result;
MicroPrintf(
"Failed to allocate tail memory. Requested: %u, "
"available %u, missing: %u",
size, size - missing_memory, missing_memory);
#endif
return nullptr;
}
tail_ = aligned_result;
return aligned_result;
}
uint8_t* SingleArenaBufferAllocator::AllocateTemp(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerUp(temp_, alignment);
const size_t available_memory = tail_ - aligned_result;
if (available_memory < size) {
MicroPrintf(
"Failed to allocate temp memory. Requested: %u, "
"available %u, missing: %u",
size, available_memory, size - available_memory);
return nullptr;
}
temp_ = aligned_result + size;
temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(aligned_result));
temp_buffer_count_++;
return aligned_result;
}
void SingleArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) {
temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(temp_buf));
temp_buffer_count_--;
}
bool SingleArenaBufferAllocator::IsAllTempDeallocated() {
if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) {
MicroPrintf(
"Number of allocated temp buffers: %d. Checksum passing status: %d",
temp_buffer_count_, !temp_buffer_ptr_check_sum_);
return false;
}
return true;
}
TfLiteStatus SingleArenaBufferAllocator::ResetTempAllocations() {
// TODO(b/209453859): enable error check based on IsAllTempDeallocated after
// all AllocateTemp have been paird with DeallocateTemp
if (!IsAllTempDeallocated()) {
MicroPrintf(
"All temp buffers must be freed before calling ResetTempAllocations()");
return kTfLiteError;
}
temp_ = head_;
return kTfLiteOk;
}
uint8_t* SingleArenaBufferAllocator::GetOverlayMemoryAddress() const {
return buffer_head_;
}
size_t SingleArenaBufferAllocator::GetNonPersistentUsedBytes() const {
return std::max(head_ - buffer_head_, temp_ - buffer_head_);
}
size_t SingleArenaBufferAllocator::GetPersistentUsedBytes() const {
return buffer_tail_ - tail_;
}
size_t SingleArenaBufferAllocator::GetAvailableMemory(size_t alignment) const {
uint8_t* const aligned_temp = AlignPointerUp(temp_, alignment);
uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment);
return aligned_tail - aligned_temp;
}
size_t SingleArenaBufferAllocator::GetUsedBytes() const {
return GetPersistentUsedBytes() + GetNonPersistentUsedBytes();
}
size_t SingleArenaBufferAllocator::GetBufferSize() const {
return buffer_tail_ - buffer_head_;
}
uint8_t* SingleArenaBufferAllocator::head() const { return head_; }
uint8_t* SingleArenaBufferAllocator::tail() const { return tail_; }
} // namespace tflite
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// TODO(petewarden): This allocator never frees up or reuses any memory, even
// though we have enough information about lifetimes of the tensors to do so.
// This makes it pretty wasteful, so we should use a more intelligent method.
class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
public IPersistentBufferAllocator {
public:
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
SingleArenaBufferAllocator(uint8_t* buffer_head, uint8_t* buffer_tail);
SingleArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
virtual ~SingleArenaBufferAllocator();
// Creates a new SingleArenaBufferAllocator from a given buffer head and size.
static SingleArenaBufferAllocator* Create(uint8_t* buffer_head,
size_t buffer_size);
// Resizes a buffer that is previously returned by the
// AllocateResizableBuffer. In current implementation, it Adjusts the head
// (lowest address and moving upwards) memory allocation to a given size.
// Calls to this method will also invalidate all temporary allocation values
// (it sets the location of temp space at the end of the head section). This
// call will fail if a chain of allocations through AllocateTemp() have not
// been cleaned up with a call to ResetTempAllocations().
virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) override;
// Returns a buffer that is resizable viable ResizeBuffer(). Only one
// resizable buffer is currently supported.
virtual uint8_t* AllocateResizableBuffer(size_t size,
size_t alignment) override;
// Frees up the memory occupied by the resizable buffer
virtual TfLiteStatus DeallocateResizableBuffer(
uint8_t* resizable_buf) override;
// Reserves the non-persistent memory that is planned by the memory planner.
virtual TfLiteStatus ReserveNonPersistentOverlayMemory(
size_t size, size_t alignment) override;
// Allocates persistent memory starting at the tail of the arena (highest
// address and moving downwards).
virtual uint8_t* AllocatePersistentBuffer(size_t size,
size_t alignment) override;
// Allocates a temporary buffer from the head of the arena (lowest address and
// moving upwards) but does not update the actual head allocation size or
// position. The returned buffer is guaranteed until either
// ResetTempAllocations() is called or another call to AllocateFromHead().
// Repeat calls to this function will create a chain of temp allocations. All
// calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
// AllocateFromHead() is called before a call to ResetTempAllocations(), it
// will fail with an error message.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment) override;
// Signals that a temporary buffer is no longer needed. This is currently for
// book-keeping purpose and the memory region are not immediately available
// for re-use. The deallocated memory region are only reclaimed after
// ResetTempAllocations is called as it is right now.
virtual void DeallocateTemp(uint8_t* buf) override;
// Returns true if all temporary buffers are already deallocated.
virtual bool IsAllTempDeallocated() override;
// Resets a chain of temporary allocations back to the current head of the
// arena (lowest address).
virtual TfLiteStatus ResetTempAllocations() override;
// Returns a pointer to the buffer currently assigned to the head section.
// This buffer is set by calling SetHeadSize().
uint8_t* GetOverlayMemoryAddress() const override;
// Returns the size of the head section in bytes.
size_t GetNonPersistentUsedBytes() const override;
// Returns the size of all allocations in the tail section in bytes.
size_t GetPersistentUsedBytes() const override;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
size_t GetAvailableMemory(size_t alignment) const override;
// Returns the number of used bytes in the allocator. This number takes in
// account any temporary allocations.
size_t GetUsedBytes() const;
TF_LITE_REMOVE_VIRTUAL_DELETE
protected:
// Returns a pointer to the current end of the head buffer.
uint8_t* head() const;
// Returns a pointer to the current end of the tail buffer.
uint8_t* tail() const;
private:
size_t GetBufferSize() const;
uint8_t* buffer_head_;
uint8_t* buffer_tail_;
uint8_t* head_;
uint8_t* tail_;
uint8_t* temp_;
// The combination of the checksum of outstanding temporary buffer pointers
// AND the count of outstanding temporary buffer provide a low cost mechanism
// to audit temporary buffers' allocation and deallocation.
//
// XOR Check sum for outstanding temp buffers.
// If all temp buffers are deallocated OR no temp buffers are allocated,
// temp_buffer_ptr_check_sum_ == nullptr.
intptr_t temp_buffer_ptr_check_sum_ = 0;
// Count of outstanding temp buffers.
int temp_buffer_count_ = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_BENCHMARKS_MICRO_BENCHMARK_H_
#define TENSORFLOW_LITE_MICRO_BENCHMARKS_MICRO_BENCHMARK_H_
#include <climits>
#include "tensorflow/lite/micro/micro_log.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler_interface.h"
#include "tensorflow/lite/micro/micro_resource_variable.h"
#include "tensorflow/lite/micro/micro_time.h"
#include "tensorflow/lite/micro/recording_micro_interpreter.h"
namespace tflite {
template <typename inputT>
class MicroBenchmarkRunner {
public:
// The lifetimes of model, op_resolver, tensor_arena, profiler must exceed
// that of the created MicroBenchmarkRunner object.
MicroBenchmarkRunner(const uint8_t* model,
const tflite::MicroOpResolver* op_resolver,
uint8_t* tensor_arena, int tensor_arena_size,
MicroProfilerInterface* profiler,
int num_resource_variables = 0)
: allocator_(
RecordingMicroAllocator::Create(tensor_arena, tensor_arena_size)),
interpreter_(
GetModel(model), *op_resolver, allocator_,
MicroResourceVariables::Create(allocator_, num_resource_variables),
profiler) {
interpreter_.AllocateTensors();
}
void RunSingleIteration() {
// Run the model on this input and make sure it succeeds.
TfLiteStatus invoke_status = interpreter_.Invoke();
if (invoke_status == kTfLiteError) {
MicroPrintf("Invoke failed.");
}
}
int NumInputs() { return interpreter_.inputs().size(); }
void SetRandomInput(const int random_seed, int input_index = 0) {
// The pseudo-random number generator is initialized to a constant seed
std::srand(random_seed);
TfLiteTensor* input = interpreter_.input(input_index);
// Pre-populate input tensor with random values.
int input_length = input->bytes / sizeof(inputT);
inputT* input_values = tflite::GetTensorData<inputT>(input);
for (int i = 0; i < input_length; i++) {
// Pre-populate input tensor with a random value based on a constant seed.
input_values[i] = static_cast<inputT>(
std::rand() % (std::numeric_limits<inputT>::max() -
std::numeric_limits<inputT>::min() + 1));
}
}
void SetInput(const inputT* custom_input, int input_index = 0) {
TfLiteTensor* input = interpreter_.input(input_index);
inputT* input_buffer = tflite::GetTensorData<inputT>(input);
int input_length = input->bytes / sizeof(inputT);
for (int i = 0; i < input_length; i++) {
input_buffer[i] = custom_input[i];
}
}
void PrintAllocations() const {
interpreter_.GetMicroAllocator().PrintAllocations();
}
private:
tflite::RecordingMicroAllocator* allocator_;
tflite::RecordingMicroInterpreter interpreter_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_MICRO_BENCHMARK_H_
\ No newline at end of file
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
// C++ will automatically create class-specific delete operators for virtual
// objects, which by default call the global delete function. For embedded
// applications we want to avoid this, and won't be calling new/delete on these
// objects, so we need to override the default implementation with one that does
// nothing to avoid linking in ::delete().
// This macro needs to be included in all subclasses of a virtual base class in
// the private section.
#ifdef ARDUINO
#define TF_LITE_REMOVE_VIRTUAL_DELETE \
void operator delete(void* p) {}
#else
#define TF_LITE_REMOVE_VIRTUAL_DELETE
#endif
#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_COMPRESSION_H_
#define TENSORFLOW_LITE_MICRO_MICRO_COMPRESSION_H_
#ifdef USE_TFLM_COMPRESSION
#include "tensorflow/lite/c/common.h"
namespace tflite {
//
// Compressed tensors
//
static constexpr const char* kCompressionMetadataString =
"COMPRESSION_METADATA";
enum class CompressionScheme : uint8_t {
kBinQuant,
};
struct LookupTableData {
static constexpr size_t kMaxBitWidth = 7;
static constexpr size_t kMaxValueTableChannelStride = 128;
const void* value_table; // Pointer into FlatBuffer Values.
uint8_t value_table_channel_stride; // elements per channel
uint8_t compressed_bit_width : 3; // 1 to 7 bits
bool is_per_channel_quantized : 1; // tensor is per-channel quantized
bool use_alternate_axis : 1; // shape default channel:
// 0 = first, 1 = last
uint8_t reserved : 3;
};
union CompressionData {
LookupTableData* lut_data;
};
struct CompressionTensorData {
CompressionScheme scheme;
CompressionData data;
};
struct CompressedTensorList {
// Sparsely populated array with the same number of elements as there are
// tensors in the Subgraph. An alternative would include a tensor index in
// the struct for each and walk the list on look up. This could be slow.
const CompressionTensorData** tensors;
};
} // namespace tflite
#endif // USE_TFLM_COMPRESSION
#endif // TENSORFLOW_LITE_MICRO_MICRO_COMPRESSION_H_
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This file is empty to ensure that a specialized implementation of
// debug_log.h is used (instead of the default implementation from
// tensorflow/lite/micro/debug_log.cc).
//
// The actual target-specific implementation of debug_log.h is in
// system_setup.cc since that allows us to consolidate all the target-specific
// specializations into one source file.
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
#ifdef __cplusplus
#include <cstdarg>
#include <cstddef>
#else
#include <stdarg.h>
#include <stddef.h>
#endif // __cplusplus
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
// These functions should be implemented by each target platform, and provide a
// way for strings to be output to some text stream. For more information, see
// the tensorflow/lite/micro/debug_log.cc file. These functions should support
// standard C/C++ stdio style formatting operations.
void DebugLog(const char* format, va_list args);
int DebugVsnprintf(char* buffer, size_t buf_size, const char* format,
va_list vlist);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/fake_micro_context.h"
#include "tensorflow/lite/c/c_api_types.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
#include "tensorflow/lite/micro/micro_arena_constants.h"
#include "tensorflow/lite/micro/micro_log.h"
namespace tflite {
FakeMicroContext::FakeMicroContext(
TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator,
MicroGraph* micro_graph
#ifdef USE_TFLM_COMPRESSION
,
const CompressedTensorList* compressed_tensors
#endif // USE_TFLM_COMPRESSION
)
: graph_(*micro_graph),
tensors_(tensors),
allocator_(allocator)
#ifdef USE_TFLM_COMPRESSION
,
compressed_tensors_(compressed_tensors)
#endif // USE_TFLM_COMPRESSION
{
}
TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) {
allocated_temp_count_++;
return &tensors_[tensor_index];
}
void FakeMicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
allocated_temp_count_--;
}
bool FakeMicroContext::IsAllTempTfLiteTensorDeallocated() {
return !allocated_temp_count_;
}
uint8_t* FakeMicroContext::AllocateTempBuffer(size_t size, size_t alignment) {
allocated_temp_count_++;
return allocator_->AllocateTemp(size, alignment);
}
void FakeMicroContext::DeallocateTempBuffer(uint8_t* buffer) {
allocated_temp_count_--;
allocator_->DeallocateTemp(buffer);
}
TfLiteEvalTensor* FakeMicroContext::GetEvalTensor(int tensor_index) {
TfLiteEvalTensor* eval_tensor =
reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocateTemp(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
TFLITE_DCHECK(eval_tensor != nullptr);
// In unit tests, the TfLiteTensor pointer contains the source of truth for
// buffers and values:
eval_tensor->data = tensors_[tensor_index].data;
eval_tensor->dims = tensors_[tensor_index].dims;
eval_tensor->type = tensors_[tensor_index].type;
return eval_tensor;
}
void* FakeMicroContext::AllocatePersistentBuffer(size_t bytes) {
// FakeMicroContext use SingleArenaBufferAllocator, which does not
// automatically apply the buffer alignment like MicroAllocator. The buffer
// alignment is potentially wasteful but allows the fake_micro_context to work
// correctly with optimized kernels.
return allocator_->AllocatePersistentBuffer(bytes,
MicroArenaBufferAlignment());
}
TfLiteStatus FakeMicroContext::RequestScratchBufferInArena(size_t bytes,
int* buffer_index) {
TFLITE_DCHECK(buffer_index != nullptr);
if (scratch_buffer_count_ == kNumScratchBuffers_) {
MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
kNumScratchBuffers_);
return kTfLiteError;
}
// For tests, we allocate scratch buffers from the tail and keep them around
// for the lifetime of model. This means that the arena size in the tests will
// be more than what we would have if the scratch buffers could share memory.
scratch_buffers_[scratch_buffer_count_] =
allocator_->AllocatePersistentBuffer(bytes, MicroArenaBufferAlignment());
TFLITE_DCHECK(scratch_buffers_[scratch_buffer_count_] != nullptr);
*buffer_index = scratch_buffer_count_++;
return kTfLiteOk;
}
void* FakeMicroContext::GetScratchBuffer(int buffer_index) {
TFLITE_DCHECK(scratch_buffer_count_ <= kNumScratchBuffers_);
if (buffer_index >= scratch_buffer_count_) {
return nullptr;
}
return scratch_buffers_[buffer_index];
}
TfLiteStatus FakeMicroContext::set_external_context(
void* external_context_payload) {
return kTfLiteError;
}
void* FakeMicroContext::external_context() { return nullptr; }
MicroGraph& FakeMicroContext::graph() { return graph_; }
#ifdef USE_TFLM_COMPRESSION
// Available during Prepare & Eval. Returns false if tensor is not
// compressed.
bool FakeMicroContext::IsTensorCompressed(const TfLiteNode* node,
int tensor_idx) {
if (compressed_tensors_ != nullptr && tensor_idx < node->inputs->size) {
int index = node->inputs->data[tensor_idx];
if (index >= 0 && compressed_tensors_->tensors[index] != nullptr) {
return true;
}
}
return false;
}
// Only available during Prepare. The kernel is responsible for storing the
// scratch buffer handle.
int FakeMicroContext::AllocateDecompressionScratchBuffer(const TfLiteNode* node,
int tensor_idx) {
if (compressed_tensors_ == nullptr || tensor_idx >= node->inputs->size) {
return -1;
}
int index = node->inputs->data[tensor_idx];
if (index < 0 || compressed_tensors_->tensors[index] == nullptr) {
return -1;
}
TfLiteTensor* tensor = &tensors_[index];
int scratch_index = -1;
TfLiteStatus result =
RequestScratchBufferInArena(tensor->bytes, &scratch_index);
if (result != kTfLiteOk) {
return -1;
}
return scratch_index;
}
// Available during Prepare & Eval. Returns nullptr if tensor is not
// compressed.
const CompressionTensorData* FakeMicroContext::GetTensorCompressionData(
const TfLiteNode* node, int tensor_idx) {
if (compressed_tensors_ == nullptr || tensor_idx >= node->inputs->size) {
return nullptr;
}
int index = node->inputs->data[tensor_idx];
if (index < 0) {
return nullptr;
}
return compressed_tensors_->tensors[index];
}
#endif // USE_TFLM_COMPRESSION
} // namespace tflite
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
#define TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
namespace tflite {
// A fake of MicroContext for kernel util tests.
// TODO(b/272759060): FakeMicroContext currently inherits from MicroContext.
// Which allow tests to use functions from MicroContext that weren't added to
// FakeMicroContext in tests. This should be looked into further.
class FakeMicroContext : public MicroContext {
public:
~FakeMicroContext() = default;
FakeMicroContext(TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator,
MicroGraph* micro_graph
#ifdef USE_TFLM_COMPRESSION
,
const CompressedTensorList* compressed_tensors = nullptr
#endif // USE_TFLM_COMPRESSION
);
void* AllocatePersistentBuffer(size_t bytes) override;
TfLiteStatus RequestScratchBufferInArena(size_t bytes,
int* buffer_index) override;
void* GetScratchBuffer(int buffer_index) override;
TfLiteTensor* AllocateTempTfLiteTensor(int tensor_index) override;
void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) override;
bool IsAllTempTfLiteTensorDeallocated();
uint8_t* AllocateTempBuffer(size_t size, size_t alignment) override;
void DeallocateTempBuffer(uint8_t* buffer) override;
TfLiteEvalTensor* GetEvalTensor(int tensor_index) override;
TfLiteStatus set_external_context(void* external_context_payload) override;
void* external_context() override;
MicroGraph& graph() override;
#ifdef USE_TFLM_COMPRESSION
// Available during Prepare & Eval. Returns false if tensor is not
// compressed.
bool IsTensorCompressed(const TfLiteNode* node, int tensor_idx) override;
// Only available during Prepare. The kernel is responsible for storing the
// scratch buffer handle.
int AllocateDecompressionScratchBuffer(const TfLiteNode* node,
int tensor_idx) override;
// Available during Prepare & Eval. Returns nullptr if tensor is not
// compressed.
const CompressionTensorData* GetTensorCompressionData(
const TfLiteNode* node, int tensor_idx) override;
#endif // USE_TFLM_COMPRESSION
private:
static constexpr int kNumScratchBuffers_ = 12;
MicroGraph& graph_;
int scratch_buffer_count_ = 0;
uint8_t* scratch_buffers_[kNumScratchBuffers_];
TfLiteTensor* tensors_;
int allocated_temp_count_ = 0;
SingleArenaBufferAllocator* allocator_;
#ifdef USE_TFLM_COMPRESSION
//
// Compression
//
const CompressedTensorList* compressed_tensors_;
#endif // USE_TFLM_COMPRESSION
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/flatbuffer_utils.h"
namespace tflite {
FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size)
: flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {}
int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadInt64(elem, byte_width_);
}
uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadUInt64(elem, byte_width_);
}
int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const {
return static_cast<int32_t>(ElementAsInt64(i));
}
bool FlexbufferWrapper::ElementAsBool(size_t i) const {
return static_cast<bool>(ElementAsUInt64(i));
}
double FlexbufferWrapper::ElementAsDouble(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadDouble(elem, byte_width_);
}
float FlexbufferWrapper::ElementAsFloat(size_t i) const {
return static_cast<float>(FlexbufferWrapper::ElementAsDouble(i));
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const SubGraph* subgraph) {
if (subgraph->operators() != nullptr) {
return subgraph->operators()->size();
} else {
return 0;
}
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
return NumSubgraphOperators(subgraph);
}
TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<int32_t>* flatbuffer_array) {
// On little-endian machines, TfLiteIntArray happens to have the same memory
// layout as flatbuffers:Vector<int32_t>, so we can reinterpret_cast the
// flatbuffer vector and avoid a copy and malloc.
// TODO(b/188459715): audit this usage of const_cast.
return const_cast<TfLiteIntArray*>(
reinterpret_cast<const TfLiteIntArray*>(flatbuffer_array));
}
TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<float>* flatbuffer_array) {
// On little-endian machines, TfLiteFloatArray happens to have the same memory
// layout as flatbuffers:Vector<float>, so we can reinterpret_cast the
// flatbuffer vector and avoid a copy and malloc.
// TODO(b/188459715): audit this usage of const_cast.
return const_cast<TfLiteFloatArray*>(
reinterpret_cast<const TfLiteFloatArray*>(flatbuffer_array));
}
} // namespace tflite
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#include "third_party/flatbuffers/include/flatbuffers/flatbuffers.h"
#include "third_party/flatbuffers/include/flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Kernels use flexbuffers::Map to pack their init parameters in a tflite file,
// with the parameter names as map keys and the parameter values as the
// corresponding map values.
// Accessing the map values using the flexbuffers:Map class is inline heavy,
// which can cause the code size to bloat beyond what's reasonable for a micro
// application. Use this class instead, when possible.
// FlexbufferWrapper takes advantage of the following properties of
// flexbuffers::Map:
// 1. It can be viewed as a flexbuffers::Vector of the values.
// 2. The values in the vector are ordered alphabetically by their keys.
// 3. All integer and Boolean values are stored as 64-bit numbers.
// 4. All floating point values are stored as double precision numbers.
// The properties are mentioned in the flexbuffers docs, but we rely on
// a unit test to catch design changes.
class FlexbufferWrapper : public flexbuffers::Vector {
public:
// Construct with a serialized flexbuffer 'buffer' of 'size' bytes
explicit FlexbufferWrapper(const uint8_t* buffer, size_t size);
int64_t ElementAsInt64(size_t i) const;
uint64_t ElementAsUInt64(size_t i) const;
int32_t ElementAsInt32(size_t i) const;
bool ElementAsBool(size_t i) const;
double ElementAsDouble(size_t i) const;
float ElementAsFloat(size_t i) const;
};
// Return the number of operators in a subgraph tflite
uint32_t NumSubgraphOperators(const SubGraph* subgraph);
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx);
// Converts a flatbuffer array to a TfLiteArray.
// TODO(b/188459715): These function convert a const input to a non-const via a
// const_cast. It is unclear exactly why this is required.
TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<int32_t>* flatbuffer_array);
TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<float>* flatbuffer_array);
} // namespace tflite
#endif // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
// Copyright 2024 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "tensorflow/lite/micro/hexdump.h"
#include <algorithm>
#include <cctype>
#include "tensorflow/lite/micro/debug_log.h"
#include "tensorflow/lite/micro/static_vector.h"
namespace {
tflite::Span<char> output(const tflite::Span<char>& buf, const char* format,
...) {
// Writes formatted output, printf-style, to either a buffer or DebugLog.
// Writes to DebugLog if the buffer data pointer is null. Does not exceed
// the size of the buffer. Returns the unused remainder of the buffer, or a
// buffer with a null data pointer in the case of printing to DebugLog.
tflite::Span<char> result{nullptr, 0};
va_list args;
va_start(args, format);
if (buf.data() == nullptr) {
DebugLog(format, args);
result = {nullptr, 0};
} else {
size_t len = DebugVsnprintf(buf.data(), buf.size(), format, args);
// Returns the number of characters that would have been written if
// there were enough room, so cap it at the size of the buffer in order to
// know how much was actually written.
size_t consumed = std::min(len, buf.size());
result = {buf.data() + consumed, buf.size() - consumed};
}
va_end(args);
return result;
}
} // end anonymous namespace
tflite::Span<char> tflite::hexdump(const tflite::Span<const std::byte> region,
const tflite::Span<char> out) {
tflite::Span<char> buffer{out};
std::size_t byte_nr = 0;
constexpr int per_line = 16;
const int lines = (region.size() + per_line - 1) / per_line; // round up
for (int line = 0; line < lines; ++line) {
tflite::StaticVector<char, per_line> ascii;
// print address
buffer = output(buffer, "%08X:", line);
for (int pos = 0; pos < per_line; ++pos) {
if (byte_nr < region.size()) {
// print byte
int as_int = static_cast<int>(region[byte_nr++]);
buffer = output(buffer, " %02X", as_int);
// buffer an ascii printable value
char c{'.'};
if (std::isprint(as_int)) {
c = static_cast<char>(as_int);
}
ascii.push_back(c);
} else {
buffer = output(buffer, " ");
}
// print extra space in middle of the line
if (pos == per_line / 2 - 1) {
buffer = output(buffer, " ");
}
}
// print the ascii value
buffer = output(buffer, " ");
for (const auto& c : ascii) {
buffer = output(buffer, "%c", c);
}
buffer = output(buffer, "%c", '\n');
}
return {out.data(), out.size() - buffer.size()};
}
void tflite::hexdump(const tflite::Span<const std::byte> region) {
hexdump(region, {nullptr, 0});
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment