tflite_micro_person_detection_init
This commit is contained in:
@@ -0,0 +1,57 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/max.h"
|
||||
#include "tensorflow/lite/kernels/internal/min.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Returns the floating point value for a fused activation:
|
||||
inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
|
||||
switch (act) {
|
||||
case kTfLiteActNone:
|
||||
return a;
|
||||
case kTfLiteActRelu:
|
||||
return TfLiteMax(0.0f, a);
|
||||
case kTfLiteActReluN1To1:
|
||||
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
|
||||
case kTfLiteActRelu6:
|
||||
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
|
||||
case kTfLiteActTanh:
|
||||
return std::tanh(a);
|
||||
case kTfLiteActSignBit:
|
||||
return std::signbit(a);
|
||||
case kTfLiteActSigmoid:
|
||||
return 1.0f / (1.0f + std::exp(-a));
|
||||
}
|
||||
return 0.0f; // To indicate an unsupported activation (i.e. when a new fused
|
||||
// activation is added to the enum and not handled here).
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
@@ -0,0 +1,285 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
struct ReluOpData {
|
||||
ReluParams params;
|
||||
};
|
||||
|
||||
struct Relu6OpData {
|
||||
int8_t six_int8;
|
||||
int8_t zero_int8;
|
||||
uint8_t six_uint8;
|
||||
uint8_t zero_uint8;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
inline void ReluQuantized(const ReluOpData& data,
|
||||
const RuntimeShape& input_shape,
|
||||
const RuntimeShape& output_shape, const T* input_data,
|
||||
T* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const int32_t val = static_cast<int32_t>(input_data[i]);
|
||||
int32_t clamped =
|
||||
data.params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
|
||||
data.params.output_multiplier,
|
||||
data.params.output_shift);
|
||||
clamped = std::max(data.params.quantized_activation_min, clamped);
|
||||
clamped = std::min(data.params.quantized_activation_max, clamped);
|
||||
output_data[i] = static_cast<T>(clamped);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
ReluOpData* data) {
|
||||
float act_min = 0.0;
|
||||
float act_max = std::numeric_limits<float>::infinity();
|
||||
double real_multiplier =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
|
||||
const RuntimeShape input_shape = GetTensorShape(input);
|
||||
const RuntimeShape output_shape = GetTensorShape(output);
|
||||
|
||||
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
|
||||
&data->params.output_shift);
|
||||
|
||||
data->params.quantized_activation_min = std::max(
|
||||
static_cast<int32_t>(std::numeric_limits<T>::min()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(roundf(act_min / output->params.scale)));
|
||||
data->params.quantized_activation_max =
|
||||
act_max == std::numeric_limits<float>::infinity()
|
||||
? static_cast<int32_t>(std::numeric_limits<T>::max())
|
||||
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(
|
||||
roundf(act_max / output->params.scale)));
|
||||
data->params.input_offset = input->params.zero_point;
|
||||
data->params.output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const float val = input_data[i];
|
||||
const float lower = 0.0f;
|
||||
const float clamped = val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const float val = input_data[i];
|
||||
const float upper = 6.0f;
|
||||
const float lower = 0.0f;
|
||||
const float clamped = val > upper ? upper : val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Q>
|
||||
inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
|
||||
const Q* input_data,
|
||||
const RuntimeShape& output_shape, Q* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const Q val = input_data[i];
|
||||
const Q clamped = val > upper ? upper : val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
|
||||
}
|
||||
|
||||
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
CalculateReluOpData<int8_t>(input, output, data);
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
CalculateReluOpData<uint8_t>(input, output, data);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
ReluFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
data->six_int8 = FloatToAsymmetricQuantizedInt8(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_int8 = input->params.zero_point;
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
data->six_uint8 = FloatToAsymmetricQuantizedUInt8(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_uint8 = input->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
Relu6Float(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration Register_RELU() {
|
||||
return {/*init=*/activations::ReluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_RELU6() {
|
||||
return {/*init=*/activations::Relu6Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,378 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "mcu_init.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/micro/all_ops_resolver.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
|
||||
#include "tensorflow/lite/micro/testing/micro_test.h"
|
||||
#include "tensorflow/lite/micro/testing/test_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace testing {
|
||||
namespace {
|
||||
|
||||
void TestReluFloat(const int* input_dims_data, const float* input_data,
|
||||
const int* output_dims_data, const float* golden,
|
||||
float* output_data) {
|
||||
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
|
||||
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
|
||||
const int output_elements_count = ElementCount(*output_dims);
|
||||
|
||||
constexpr int inputs_size = 1;
|
||||
constexpr int outputs_size = 1;
|
||||
constexpr int tensors_size = inputs_size + outputs_size;
|
||||
TfLiteTensor tensors[tensors_size] = {
|
||||
CreateFloatTensor(input_data, input_dims),
|
||||
CreateFloatTensor(output_data, output_dims),
|
||||
};
|
||||
|
||||
int inputs_array_data[] = {1, 0};
|
||||
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
|
||||
int outputs_array_data[] = {1, 1};
|
||||
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
|
||||
|
||||
const TfLiteRegistration registration = ops::micro::Register_RELU();
|
||||
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
|
||||
outputs_array,
|
||||
/*builtin_data=*/nullptr, micro_test::reporter);
|
||||
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
|
||||
|
||||
for (int i = 0; i < output_elements_count; ++i) {
|
||||
TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
void TestRelu6Float(const int* input_dims_data, const float* input_data,
|
||||
const int* output_dims_data, const float* golden,
|
||||
float* output_data) {
|
||||
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
|
||||
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
|
||||
const int output_elements_count = ElementCount(*output_dims);
|
||||
|
||||
constexpr int inputs_size = 1;
|
||||
constexpr int outputs_size = 1;
|
||||
constexpr int tensors_size = inputs_size + outputs_size;
|
||||
TfLiteTensor tensors[tensors_size] = {
|
||||
CreateFloatTensor(input_data, input_dims),
|
||||
CreateFloatTensor(output_data, output_dims),
|
||||
};
|
||||
|
||||
int inputs_array_data[] = {1, 0};
|
||||
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
|
||||
int outputs_array_data[] = {1, 1};
|
||||
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
|
||||
|
||||
const TfLiteRegistration registration = ops::micro::Register_RELU6();
|
||||
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
|
||||
outputs_array,
|
||||
/*builtin_data=*/nullptr, micro_test::reporter);
|
||||
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
|
||||
|
||||
for (int i = 0; i < output_elements_count; ++i) {
|
||||
TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
void TestReluUint8(const int* input_dims_data, const float* input_data,
|
||||
uint8_t* input_data_quantized, const float input_scale,
|
||||
const int input_zero_point, const float* golden,
|
||||
uint8_t* golden_quantized, const int* output_dims_data,
|
||||
const float output_scale, const int output_zero_point,
|
||||
uint8_t* output_data) {
|
||||
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
|
||||
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
|
||||
const int output_elements_count = ElementCount(*output_dims);
|
||||
|
||||
constexpr int inputs_size = 1;
|
||||
constexpr int outputs_size = 1;
|
||||
constexpr int tensors_size = inputs_size + outputs_size;
|
||||
TfLiteTensor tensors[tensors_size] = {
|
||||
CreateQuantizedTensor(input_data, input_data_quantized, input_dims,
|
||||
input_scale, input_zero_point),
|
||||
CreateQuantizedTensor(output_data, output_dims, output_scale,
|
||||
output_zero_point),
|
||||
};
|
||||
|
||||
int inputs_array_data[] = {1, 0};
|
||||
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
|
||||
int outputs_array_data[] = {1, 1};
|
||||
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
|
||||
|
||||
const TfLiteRegistration registration = ops::micro::Register_RELU();
|
||||
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
|
||||
outputs_array,
|
||||
/*builtin_data=*/nullptr, micro_test::reporter);
|
||||
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
|
||||
|
||||
AsymmetricQuantize(golden, golden_quantized, output_elements_count,
|
||||
output_scale, output_zero_point);
|
||||
|
||||
for (int i = 0; i < output_elements_count; ++i) {
|
||||
TF_LITE_MICRO_EXPECT_EQ(golden_quantized[i], output_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void TestRelu6Uint8(const int* input_dims_data, const float* input_data,
|
||||
uint8_t* input_data_quantized, const float input_scale,
|
||||
const int input_zero_point, const float* golden,
|
||||
uint8_t* golden_quantized, const int* output_dims_data,
|
||||
const float output_scale, const int output_zero_point,
|
||||
uint8_t* output_data) {
|
||||
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
|
||||
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
|
||||
const int output_elements_count = ElementCount(*output_dims);
|
||||
|
||||
constexpr int inputs_size = 1;
|
||||
constexpr int outputs_size = 1;
|
||||
constexpr int tensors_size = inputs_size + outputs_size;
|
||||
TfLiteTensor tensors[tensors_size] = {
|
||||
CreateQuantizedTensor(input_data, input_data_quantized, input_dims,
|
||||
input_scale, input_zero_point),
|
||||
CreateQuantizedTensor(output_data, output_dims, output_scale,
|
||||
output_zero_point),
|
||||
};
|
||||
|
||||
int inputs_array_data[] = {1, 0};
|
||||
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
|
||||
int outputs_array_data[] = {1, 1};
|
||||
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
|
||||
|
||||
const TfLiteRegistration registration = ops::micro::Register_RELU6();
|
||||
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
|
||||
outputs_array,
|
||||
/*builtin_data=*/nullptr, micro_test::reporter);
|
||||
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
|
||||
|
||||
AsymmetricQuantize(golden, golden_quantized, output_elements_count,
|
||||
output_scale, output_zero_point);
|
||||
|
||||
for (int i = 0; i < output_elements_count; ++i) {
|
||||
TF_LITE_MICRO_EXPECT_EQ(golden_quantized[i], output_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void TestReluInt8(const int* input_dims_data, const float* input_data,
|
||||
int8_t* input_data_quantized, const float input_scale,
|
||||
const int input_zero_point, const float* golden,
|
||||
int8_t* golden_quantized, const int* output_dims_data,
|
||||
const float output_scale, const int output_zero_point,
|
||||
int8_t* output_data) {
|
||||
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
|
||||
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
|
||||
const int output_elements_count = ElementCount(*output_dims);
|
||||
constexpr int inputs_size = 1;
|
||||
constexpr int outputs_size = 1;
|
||||
constexpr int tensors_size = inputs_size + outputs_size;
|
||||
TfLiteTensor tensors[tensors_size] = {
|
||||
CreateQuantizedTensor(input_data, input_data_quantized, input_dims,
|
||||
input_scale, input_zero_point),
|
||||
CreateQuantizedTensor(output_data, output_dims, output_scale,
|
||||
output_zero_point),
|
||||
};
|
||||
|
||||
int inputs_array_data[] = {1, 0};
|
||||
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
|
||||
int outputs_array_data[] = {1, 1};
|
||||
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
|
||||
|
||||
const TfLiteRegistration registration = ops::micro::Register_RELU();
|
||||
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
|
||||
outputs_array,
|
||||
/*builtin_data=*/nullptr, micro_test::reporter);
|
||||
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
|
||||
|
||||
AsymmetricQuantize(golden, golden_quantized, output_elements_count,
|
||||
output_scale, output_zero_point);
|
||||
|
||||
for (int i = 0; i < output_elements_count; ++i) {
|
||||
TF_LITE_MICRO_EXPECT_EQ(golden_quantized[i], output_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void TestRelu6Int8(const int* input_dims_data, const float* input_data,
|
||||
int8_t* input_data_quantized, const float input_scale,
|
||||
const int input_zero_point, const float* golden,
|
||||
int8_t* golden_quantized, const int* output_dims_data,
|
||||
const float output_scale, const int output_zero_point,
|
||||
int8_t* output_data) {
|
||||
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
|
||||
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
|
||||
const int output_elements_count = ElementCount(*output_dims);
|
||||
constexpr int inputs_size = 1;
|
||||
constexpr int outputs_size = 1;
|
||||
constexpr int tensors_size = inputs_size + outputs_size;
|
||||
TfLiteTensor tensors[tensors_size] = {
|
||||
CreateQuantizedTensor(input_data, input_data_quantized, input_dims,
|
||||
input_scale, input_zero_point),
|
||||
CreateQuantizedTensor(output_data, output_dims, output_scale,
|
||||
output_zero_point),
|
||||
};
|
||||
|
||||
int inputs_array_data[] = {1, 0};
|
||||
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
|
||||
int outputs_array_data[] = {1, 1};
|
||||
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
|
||||
|
||||
const TfLiteRegistration registration = ops::micro::Register_RELU6();
|
||||
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
|
||||
outputs_array,
|
||||
/*builtin_data=*/nullptr, micro_test::reporter);
|
||||
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
|
||||
|
||||
AsymmetricQuantize(golden, golden_quantized, output_elements_count,
|
||||
output_scale, output_zero_point);
|
||||
|
||||
for (int i = 0; i < output_elements_count; ++i) {
|
||||
TF_LITE_MICRO_EXPECT_EQ(golden_quantized[i], output_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace testing
|
||||
} // namespace tflite
|
||||
|
||||
TF_LITE_MICRO_TESTS_BEGIN
|
||||
|
||||
TF_LITE_MICRO_TEST(SimpleReluTestFloat) {
|
||||
const int output_elements_count = 10;
|
||||
const int input_shape[] = {2, 1, 5};
|
||||
const float input_data[] = {
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, -1.0, -2.0, -3.0, -4.0, -5.0,
|
||||
};
|
||||
const float golden[] = {1.0, 2.0, 3.0, 4.0, 5.0, 0, 0, 0, 0, 0};
|
||||
const int output_shape[] = {2, 1, 5};
|
||||
float output_data[output_elements_count];
|
||||
tflite::testing::TestReluFloat(input_shape, input_data, output_shape, golden,
|
||||
output_data);
|
||||
}
|
||||
|
||||
TF_LITE_MICRO_TEST(SimpleRelu6TestFloat) {
|
||||
const int output_elements_count = 10;
|
||||
float output_data[output_elements_count];
|
||||
const int input_shape[] = {2, 1, 5};
|
||||
const float input_data[] = {4.0, 5.0, 6.0, 7.0, 8.0,
|
||||
-4.0, -5.0, -6.0, -7.0, -8.0};
|
||||
const int output_shape[] = {2, 1, 5};
|
||||
const float golden[] = {
|
||||
4.0, 5.0, 6.0, 6.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0,
|
||||
};
|
||||
|
||||
tflite::testing::TestRelu6Float(input_shape, input_data, output_shape, golden,
|
||||
output_data);
|
||||
}
|
||||
|
||||
TF_LITE_MICRO_TEST(SimpleReluTestUint8) {
|
||||
const int elements_count = 10;
|
||||
|
||||
const int input_shape[] = {2, 1, 5};
|
||||
const float input_data[] = {1, 2, 3, 4, 5, -1, -2, -3, -4, -5};
|
||||
uint8_t input_quantized[elements_count];
|
||||
const int output_shape[] = {2, 1, 5};
|
||||
const float golden[] = {1, 2, 3, 4, 5, 0, 0, 0, 0, 0};
|
||||
uint8_t golden_quantized[elements_count];
|
||||
uint8_t output_data[elements_count];
|
||||
|
||||
const float input_scale = 0.5f;
|
||||
const int input_zero_point = 127;
|
||||
const float output_scale = 0.5f;
|
||||
const int output_zero_point = 127;
|
||||
|
||||
tflite::testing::TestReluUint8(input_shape, input_data, input_quantized,
|
||||
input_scale, input_zero_point, golden,
|
||||
golden_quantized, output_shape, output_scale,
|
||||
output_zero_point, output_data);
|
||||
}
|
||||
|
||||
TF_LITE_MICRO_TEST(SimpleRelu6TestUint8) {
|
||||
const int elements_count = 10;
|
||||
|
||||
const int input_shape[] = {2, 1, 5};
|
||||
const float input_data[] = {4, 5, 6, 7, 8, -1, -2, -3, -4, -5};
|
||||
uint8_t input_quantized[elements_count];
|
||||
const int output_shape[] = {2, 1, 5};
|
||||
const float golden[] = {4, 5, 6, 6, 6, 0, 0, 0, 0, 0};
|
||||
uint8_t golden_quantized[elements_count];
|
||||
uint8_t output_data[elements_count];
|
||||
|
||||
const float input_scale = 0.5f;
|
||||
const int input_zero_point = 127;
|
||||
const float output_scale = 0.5f;
|
||||
const int output_zero_point = 127;
|
||||
|
||||
tflite::testing::TestRelu6Uint8(input_shape, input_data, input_quantized,
|
||||
input_scale, input_zero_point, golden,
|
||||
golden_quantized, output_shape, output_scale,
|
||||
output_zero_point, output_data);
|
||||
}
|
||||
|
||||
TF_LITE_MICRO_TEST(SimpleReluTestInt8) {
|
||||
const int elements_count = 10;
|
||||
|
||||
const int input_shape[] = {2, 1, 5};
|
||||
const float input_data[] = {1, 2, 3, 4, 5, -1, -2, -3, -4, -5};
|
||||
int8_t input_quantized[elements_count];
|
||||
const int output_shape[] = {2, 1, 5};
|
||||
const float golden[] = {1, 2, 3, 4, 5, 0, 0, 0, 0, 0};
|
||||
int8_t golden_quantized[elements_count];
|
||||
int8_t output_data[elements_count];
|
||||
|
||||
const float input_scale = 0.5f;
|
||||
const int input_zero_point = 0;
|
||||
const float output_scale = 0.5f;
|
||||
const int output_zero_point = 0;
|
||||
|
||||
tflite::testing::TestReluInt8(input_shape, input_data, input_quantized,
|
||||
input_scale, input_zero_point, golden,
|
||||
golden_quantized, output_shape, output_scale,
|
||||
output_zero_point, output_data);
|
||||
}
|
||||
|
||||
TF_LITE_MICRO_TEST(SimpleRelu6TestInt8) {
|
||||
const int elements_count = 10;
|
||||
|
||||
const int input_shape[] = {2, 1, 5};
|
||||
const float input_data[] = {4, 5, 6, 7, 8, -1, -2, -3, -4, -5};
|
||||
int8_t input_quantized[elements_count];
|
||||
const int output_shape[] = {2, 1, 5};
|
||||
const float golden[] = {4, 5, 6, 6, 6, 0, 0, 0, 0, 0};
|
||||
int8_t golden_quantized[elements_count];
|
||||
int8_t output_data[elements_count];
|
||||
|
||||
const float input_scale = 0.5f;
|
||||
const int input_zero_point = 127;
|
||||
const float output_scale = 0.5f;
|
||||
const int output_zero_point = 127;
|
||||
|
||||
tflite::testing::TestRelu6Int8(input_shape, input_data, input_quantized,
|
||||
input_scale, input_zero_point, golden,
|
||||
golden_quantized, output_shape, output_scale,
|
||||
output_zero_point, output_data);
|
||||
}
|
||||
|
||||
TF_LITE_MICRO_TESTS_END
|
240
components/tflite_micro/tensorflow/lite/micro/kernels/add.cc
Normal file
240
components/tflite_micro/tensorflow/lite/micro/kernels/add.cc
Normal file
@@ -0,0 +1,240 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/add.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace add {
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
bool requires_broadcast;
|
||||
|
||||
// These fields are used in both the general 8-bit -> 8bit quantized path,
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
|
||||
// Used only for float evals:
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output,
|
||||
OpData* data) {
|
||||
data->requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
// 8bit -> 8bit general quantized path, with general rescalings
|
||||
data->input1_offset = -input1->params.zero_point;
|
||||
data->input2_offset = -input2->params.zero_point;
|
||||
data->output_offset = output->params.zero_point;
|
||||
data->left_shift = 20;
|
||||
const double twice_max_input_scale =
|
||||
2 * static_cast<double>(
|
||||
std::max(input1->params.scale, input2->params.scale));
|
||||
const double real_input1_multiplier =
|
||||
static_cast<double>(input1->params.scale) / twice_max_input_scale;
|
||||
const double real_input2_multiplier =
|
||||
static_cast<double>(input2->params.scale) / twice_max_input_scale;
|
||||
const double real_output_multiplier =
|
||||
twice_max_input_scale /
|
||||
((1 << data->left_shift) * static_cast<double>(output->params.scale));
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_output_multiplier, &data->output_multiplier, &data->output_shift);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data->output_activation_min_f32,
|
||||
data->output_activation_max_f32, &op_params);
|
||||
#define TF_LITE_ADD(opname) \
|
||||
reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<float>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<float>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<float>(output))
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_ADD(BroadcastAdd4DSlow);
|
||||
} else {
|
||||
TF_LITE_ADD(Add);
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteAddParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
op_params.input1_offset = data->input1_offset;
|
||||
op_params.input1_multiplier = data->input1_multiplier;
|
||||
op_params.input1_shift = data->input1_shift;
|
||||
op_params.input2_offset = data->input2_offset;
|
||||
op_params.input2_multiplier = data->input2_multiplier;
|
||||
op_params.input2_shift = data->input2_shift;
|
||||
op_params.output_offset = data->output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_ADD(type, opname, dtype) \
|
||||
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<dtype>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<dtype>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<dtype>(output));
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
|
||||
} else {
|
||||
TF_LITE_ADD(reference_integer_ops, Add, int8_t);
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_ADD(reference_ops, Add, uint8_t);
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAdd(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace add
|
||||
|
||||
TfLiteRegistration Register_ADD() {
|
||||
return {/*init=*/add::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/add::Prepare,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,133 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace arg_min_max {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kAxis = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
|
||||
const T1* input1_data, const T3* input2_data,
|
||||
const RuntimeShape& output_shape, T2* output_data,
|
||||
bool is_arg_max) {
|
||||
if (is_arg_max) {
|
||||
reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
|
||||
output_shape, output_data, micro::Greater());
|
||||
} else {
|
||||
reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
|
||||
output_shape, output_data, micro::Less());
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* axis =
|
||||
tflite::micro::GetEvalInput(context, node, kAxis);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<data_type>(input), \
|
||||
tflite::micro::GetTensorData<axis_type>(axis), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_type>(output), \
|
||||
is_arg_max)
|
||||
if (axis->type == kTfLiteInt32) {
|
||||
if (output->type == kTfLiteInt32) {
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_ARG_MIN_MAX(float, int32_t, int32_t);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
TF_LITE_ARG_MIN_MAX(int8_t, int32_t, int32_t);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only float32, uint8_t and int8_t are "
|
||||
"supported currently, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(axis->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
#undef TF_LITE_ARG_MIN_MAX
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ArgMinEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return Eval(context, node, false);
|
||||
}
|
||||
|
||||
TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return Eval(context, node, true);
|
||||
}
|
||||
|
||||
} // namespace arg_min_max
|
||||
|
||||
TfLiteRegistration Register_ARG_MAX() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_ARG_MIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,74 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/ceil.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace ceil {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace ceil
|
||||
|
||||
TfLiteRegistration Register_CEIL() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,178 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
/*
|
||||
* The circular buffer custom operator is used to implement strided streaming
|
||||
* convolutions on TFLite Micro. Each time this operator is invoked, it checks
|
||||
* whether or not to run, based on a predetermined stride in time. If the op
|
||||
* runs, it inserts the input into the end of the output buffer and shifts the
|
||||
* output values towards the start of the buffer. It discards the oldest value
|
||||
* in the output buffer.
|
||||
*
|
||||
* Input: [<input N+1]
|
||||
* Before shifting:
|
||||
* Output: [<input 1>, <input 2>, <input ...>, <input N>]
|
||||
*
|
||||
* After shifting:
|
||||
* Output: [<input 2>, <input 3>, <input ...>, <input N+1>]
|
||||
*
|
||||
* We make some assumptions in this custom operator:
|
||||
* - Input shape must be [1, 1, 1, depth]
|
||||
* - Output shape must be [1, num_slots, 1, depth]
|
||||
* - Input and output types must match.
|
||||
* - Input and output quantization params must be identical.
|
||||
*/
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace circular_buffer {
|
||||
|
||||
namespace {
|
||||
|
||||
// The CircularBuffer op has one input and one output tensor.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// TODO(b/149795762): Add this to TfLiteStatus enum.
|
||||
constexpr int kTfLiteAbort = -9;
|
||||
|
||||
// These fields control the stride period of a strided streaming model. This op
|
||||
// returns kTfLiteAbort until cycles_until_run-- is zero. At this time,
|
||||
// cycles_until_run is reset to cycles_max.
|
||||
struct OpData {
|
||||
int cycles_until_run;
|
||||
int cycles_max;
|
||||
};
|
||||
|
||||
// These constants represent constants specific to the music detect model.
|
||||
// They exist until (b/132070898) is fixed.
|
||||
constexpr int kMaxOpDataSize = 7;
|
||||
int op_data_counter = 0;
|
||||
OpData op_data_array[kMaxOpDataSize];
|
||||
|
||||
} // namespace
|
||||
|
||||
void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[0]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[0]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
// The circular buffer custom operator currently only supports int8_t.
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
|
||||
// TODO(b/132070898): Use statically slotted OpData structures until a
|
||||
// scratch memory API is ready.
|
||||
TFLITE_DCHECK_LE(op_data_counter, kMaxOpDataSize);
|
||||
OpData* op_data = &op_data_array[op_data_counter++];
|
||||
// The last circular buffer layer (length 5) simply accumulates outputs, and
|
||||
// does not run periodically.
|
||||
// TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
|
||||
if (output->dims->data[1] == 5) {
|
||||
op_data->cycles_max = 1;
|
||||
} else {
|
||||
op_data->cycles_max = 2;
|
||||
}
|
||||
op_data->cycles_until_run = op_data->cycles_max;
|
||||
node->user_data = op_data;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Shifts buffer over by the output depth, and write new input to end of buffer.
|
||||
// num_slots is the number of samples stored in the output buffer.
|
||||
// depth is the size of each sample.
|
||||
void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
|
||||
memmove(output, &output[depth], (num_slots - 1) * depth);
|
||||
memcpy(&output[(num_slots - 1) * depth], input, depth);
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
int num_slots = output->dims->data[1];
|
||||
int depth = output->dims->data[3];
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (--data->cycles_until_run != 0) {
|
||||
// Signal the interpreter to end current run if the delay before op invoke
|
||||
// has not been reached.
|
||||
// TODO(b/149795762): Add kTfLiteAbort to TfLiteStatus enum.
|
||||
return static_cast<TfLiteStatus>(kTfLiteAbort);
|
||||
}
|
||||
|
||||
// If prepare is ever called more than one time (for example, when testing the
|
||||
// ambient model, the interpreter is created a few times), this op data
|
||||
// counter needs to be reset so that future instances do not overrun this op
|
||||
// data array.
|
||||
op_data_counter = 0;
|
||||
|
||||
data->cycles_until_run = data->cycles_max;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace circular_buffer
|
||||
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/circular_buffer::Free,
|
||||
/*prepare=*/circular_buffer::Prepare,
|
||||
/*invoke=*/circular_buffer::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,246 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/add.h"
|
||||
|
||||
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace add {
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
bool requires_broadcast;
|
||||
|
||||
// These fields are used in both the general 8-bit -> 8bit quantized path,
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output,
|
||||
OpData* data) {
|
||||
data->requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
// 8bit -> 8bit general quantized path, with general rescalings
|
||||
data->input1_offset = -input1->params.zero_point;
|
||||
data->input2_offset = -input2->params.zero_point;
|
||||
data->output_offset = output->params.zero_point;
|
||||
data->left_shift = 20;
|
||||
const double twice_max_input_scale =
|
||||
2 * static_cast<double>(
|
||||
std::max(input1->params.scale, input2->params.scale));
|
||||
const double real_input1_multiplier =
|
||||
static_cast<double>(input1->params.scale) / twice_max_input_scale;
|
||||
const double real_input2_multiplier =
|
||||
static_cast<double>(input2->params.scale) / twice_max_input_scale;
|
||||
const double real_output_multiplier =
|
||||
twice_max_input_scale /
|
||||
((1 << data->left_shift) * static_cast<double>(output->params.scale));
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_output_multiplier, &data->output_multiplier, &data->output_shift);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
#define TF_LITE_ADD(opname) \
|
||||
reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<float>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<float>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<float>(output))
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_ADD(BroadcastAdd4DSlow);
|
||||
} else {
|
||||
TF_LITE_ADD(Add);
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteAddParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
op_params.input1_offset = data->input1_offset;
|
||||
op_params.input1_multiplier = data->input1_multiplier;
|
||||
op_params.input1_shift = data->input1_shift;
|
||||
op_params.input2_offset = data->input2_offset;
|
||||
op_params.input2_multiplier = data->input2_multiplier;
|
||||
op_params.input2_shift = data->input2_shift;
|
||||
op_params.output_offset = data->output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_ADD(type, opname, dtype) \
|
||||
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<dtype>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<dtype>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<dtype>(output));
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
|
||||
} else {
|
||||
arm_elementwise_add_s8(
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
op_params.input1_offset, op_params.input1_multiplier,
|
||||
op_params.input1_shift, op_params.input2_offset,
|
||||
op_params.input2_multiplier, op_params.input2_shift,
|
||||
op_params.left_shift, tflite::micro::GetTensorData<int8_t>(output),
|
||||
op_params.output_offset, op_params.output_multiplier,
|
||||
op_params.output_shift, op_params.quantized_activation_min,
|
||||
op_params.quantized_activation_max,
|
||||
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorShape(output)));
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_ADD(reference_ops, Add, uint8_t);
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAdd(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace add
|
||||
|
||||
TfLiteRegistration Register_ADD() {
|
||||
return {/*init=*/add::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/add::Prepare,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,455 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
|
||||
#include "cmsis/CMSIS/NN/Include/arm_nn_types.h"
|
||||
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
constexpr int kMaxChannels = 256;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// Index to buffer for optimizations if applicable.
|
||||
int buffer_idx;
|
||||
};
|
||||
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
params->dilation_height_factor, params->dilation_width_factor, height,
|
||||
width, filter_height, filter_width, padding, &out_height, &out_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE)
|
||||
int32_t buf_size = 0;
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
auto* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input_shape = GetTensorShape(input);
|
||||
RuntimeShape output_shape = GetTensorShape(output);
|
||||
|
||||
// Initialize cmsis-nn input dimensions
|
||||
cmsis_nn_dims input_dims;
|
||||
input_dims.n = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
input_dims.h = input->dims->data[1];
|
||||
input_dims.w = input->dims->data[2];
|
||||
input_dims.c = input_shape.Dims(3);
|
||||
|
||||
// Initialize cmsis-nn filter dimensions
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = output_shape.Dims(3);
|
||||
filter_dims.h = filter->dims->data[1];
|
||||
filter_dims.w = filter->dims->data[2];
|
||||
filter_dims.c = input_dims.c;
|
||||
|
||||
// Initialize cmsis-nn output dimensions
|
||||
cmsis_nn_dims output_dims;
|
||||
output_dims.n = input_dims.n;
|
||||
output_dims.h = output->dims->data[1];
|
||||
output_dims.w = output->dims->data[2];
|
||||
output_dims.c = output_shape.Dims(3);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_dims.w, input_dims.h, filter_dims.w,
|
||||
filter_dims.h, output_dims.w, output_dims.h, input->type, data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
// Initialize cmsis-nn convolution parameters
|
||||
cmsis_nn_conv_params conv_params;
|
||||
conv_params.input_offset = -input->params.zero_point;
|
||||
conv_params.output_offset = output->params.zero_point;
|
||||
conv_params.stride.h = params->stride_height;
|
||||
conv_params.stride.w = params->stride_width;
|
||||
conv_params.dilation.h = params->dilation_height_factor;
|
||||
conv_params.dilation.w = params->dilation_width_factor;
|
||||
conv_params.padding.h = data->padding.height;
|
||||
conv_params.padding.w = data->padding.width;
|
||||
conv_params.activation.min = data->output_activation_min;
|
||||
conv_params.activation.max = data->output_activation_max;
|
||||
|
||||
buf_size = arm_convolve_wrapper_s8_get_buffer_size(
|
||||
&conv_params, &input_dims, &filter_dims, &output_dims);
|
||||
}
|
||||
|
||||
if (buf_size > 0) {
|
||||
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
|
||||
context, buf_size, &data->buffer_idx));
|
||||
} else {
|
||||
data->buffer_idx = -1;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* im2col,
|
||||
TfLiteEvalTensor* hwcn_weights,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizedPerChannel(
|
||||
TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params,
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output, TfLiteEvalTensor* im2col) {
|
||||
// Initialize cmsis-nn convolution parameters
|
||||
cmsis_nn_conv_params conv_params;
|
||||
conv_params.input_offset = -data.input_zero_point;
|
||||
conv_params.output_offset = data.output_zero_point;
|
||||
conv_params.stride.h = params->stride_height;
|
||||
conv_params.stride.w = params->stride_width;
|
||||
conv_params.dilation.h = params->dilation_height_factor;
|
||||
conv_params.dilation.w = params->dilation_width_factor;
|
||||
conv_params.padding.h = data.padding.height;
|
||||
conv_params.padding.w = data.padding.width;
|
||||
conv_params.activation.min = data.output_activation_min;
|
||||
conv_params.activation.max = data.output_activation_max;
|
||||
|
||||
// Initialize cmsis-nn per channel quantization parameters
|
||||
cmsis_nn_per_channel_quant_params quant_params;
|
||||
quant_params.multiplier =
|
||||
const_cast<int32_t*>(data.per_channel_output_multiplier);
|
||||
quant_params.shift = const_cast<int32_t*>(data.per_channel_output_shift);
|
||||
|
||||
#if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE)
|
||||
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
|
||||
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
|
||||
|
||||
// Consistency check.
|
||||
TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
||||
if (tflite::micro::GetTensorData<int8_t>(bias)) {
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
}
|
||||
|
||||
// Initialize cmsis-nn dimensions
|
||||
// Input
|
||||
cmsis_nn_dims input_dims;
|
||||
input_dims.n = batch_size;
|
||||
input_dims.h = input_shape.Dims(1);
|
||||
input_dims.w = input_shape.Dims(2);
|
||||
input_dims.c = input_depth;
|
||||
|
||||
// Filter
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = output_depth;
|
||||
filter_dims.h = filter_shape.Dims(1);
|
||||
filter_dims.w = filter_shape.Dims(2);
|
||||
filter_dims.c = input_depth;
|
||||
|
||||
// Bias
|
||||
cmsis_nn_dims bias_dims;
|
||||
bias_dims.n = 1;
|
||||
bias_dims.h = 1;
|
||||
bias_dims.w = 1;
|
||||
bias_dims.c = output_depth;
|
||||
|
||||
// Output
|
||||
cmsis_nn_dims output_dims;
|
||||
output_dims.n = batch_size;
|
||||
output_dims.h = output_shape.Dims(1);
|
||||
output_dims.w = output_shape.Dims(2);
|
||||
output_dims.c = output_depth;
|
||||
|
||||
// Initialize cmsis-nn context
|
||||
cmsis_nn_context ctx;
|
||||
ctx.buf = nullptr;
|
||||
ctx.size = 0;
|
||||
|
||||
if (data.buffer_idx > -1) {
|
||||
ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
|
||||
// Note: ctx.size is currently not used in cmsis-nn.
|
||||
// The buffer should be allocated in the Prepare function through
|
||||
// arm_convolve_wrapper_s8_get_buffer_size
|
||||
}
|
||||
|
||||
// arm_convolve_wrapper_s8 dispatches the optimized kernel accordingly with
|
||||
// the parameters passed
|
||||
arm_status status = arm_convolve_wrapper_s8(
|
||||
&ctx, &conv_params, &quant_params, &input_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
|
||||
tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
|
||||
if (status == ARM_MATH_SUCCESS) {
|
||||
return kTfLiteOk;
|
||||
} else {
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
#else
|
||||
#pragma message( \
|
||||
"CMSIS-NN optimization for conv not available for this target. Using reference kernel.")
|
||||
|
||||
ConvParams op_params;
|
||||
conv_params.input_offset = -data.input_zero_point;
|
||||
conv_params.output_offset = data.output_zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
|
||||
TfLiteEvalTensor* hwcn_weights,
|
||||
TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<float>(im2col));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
return EvalQuantizedPerChannel(context, node, params, data, input, filter,
|
||||
bias, output, nullptr);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
return EvalQuantized(context, node, params, data, input, filter, bias,
|
||||
nullptr, nullptr, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
|
||||
TfLiteRegistration Register_CONV_2D() {
|
||||
return {/*init=*/conv::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/conv::Prepare,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,478 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
|
||||
|
||||
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace depthwise_conv {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
constexpr int kMaxChannels = 256;
|
||||
|
||||
// Depthwise conv is quantized along dimension 3:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO: Allocate dynamic buffers when b/158779832 is resolved
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
// Index to buffer for optimizations if applicable.
|
||||
int buffer_idx;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
int unused_output_height, unused_output_width;
|
||||
// Set buffer index to a reset value
|
||||
data->buffer_idx = -1;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width, 1, 1, height, width,
|
||||
filter_height, filter_width, params->padding, &unused_output_height,
|
||||
&unused_output_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
|
||||
return tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteType data_type = input->type;
|
||||
int width = SizeOfDimension(input, 2);
|
||||
int height = SizeOfDimension(input, 1);
|
||||
int filter_width = SizeOfDimension(filter, 2);
|
||||
int filter_height = SizeOfDimension(filter, 1);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
// Allocate memory for per-channel quantization parameters
|
||||
const int num_channels =
|
||||
filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
TFLITE_DCHECK_LE(num_channels, kMaxChannels);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
TF_LITE_ENSURE(
|
||||
context, affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kDepthwiseConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
|
||||
filter_width, filter_height, data_type,
|
||||
data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
RuntimeShape input_shape = GetTensorShape(input);
|
||||
RuntimeShape output_shape = GetTensorShape(output);
|
||||
RuntimeShape filter_shape = GetTensorShape(filter);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int output_depth = MatchingDim(output_shape, 3, filter_shape, 3);
|
||||
TFLITE_DCHECK_EQ(batch_size, 1); /* Only batch = 1 is supported */
|
||||
|
||||
cmsis_nn_dims input_dims;
|
||||
input_dims.n = batch_size;
|
||||
input_dims.h = height;
|
||||
input_dims.w = width;
|
||||
input_dims.c = input_shape.Dims(3);
|
||||
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = 1;
|
||||
filter_dims.h = filter_height;
|
||||
filter_dims.w = filter_width;
|
||||
filter_dims.c = output_depth;
|
||||
|
||||
cmsis_nn_dims output_dims;
|
||||
output_dims.n = batch_size;
|
||||
output_dims.h = output_shape.Dims(1);
|
||||
output_dims.w = output_shape.Dims(2);
|
||||
output_dims.c = output_depth;
|
||||
|
||||
cmsis_nn_dw_conv_params dw_conv_params;
|
||||
dw_conv_params.padding.h = data->padding.height;
|
||||
dw_conv_params.padding.w = data->padding.width;
|
||||
|
||||
const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
|
||||
&dw_conv_params, &input_dims, &filter_dims, &output_dims);
|
||||
|
||||
if (buf_size > 0) {
|
||||
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
|
||||
context, buf_size, &data->buffer_idx));
|
||||
} else {
|
||||
data->buffer_idx = -1;
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
cmsis_nn_dw_conv_params dw_conv_params;
|
||||
dw_conv_params.dilation.h = params->dilation_height_factor;
|
||||
dw_conv_params.dilation.w = params->dilation_width_factor;
|
||||
// Call to reference implementation can be removed when dilation is supported
|
||||
// in the optimized implementations.
|
||||
if (1 == dw_conv_params.dilation.h && 1 == dw_conv_params.dilation.w) {
|
||||
dw_conv_params.input_offset = -data->input_zero_point;
|
||||
dw_conv_params.output_offset = data->output_zero_point;
|
||||
dw_conv_params.stride.h = params->stride_height;
|
||||
dw_conv_params.stride.w = params->stride_width;
|
||||
dw_conv_params.padding.h = data->padding.height;
|
||||
dw_conv_params.padding.w = data->padding.width;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
dw_conv_params.activation.min = std::numeric_limits<int8_t>::min();
|
||||
dw_conv_params.activation.max = std::numeric_limits<int8_t>::max();
|
||||
dw_conv_params.ch_mult = params->depth_multiplier;
|
||||
|
||||
cmsis_nn_per_channel_quant_params quant_params;
|
||||
quant_params.multiplier = data->per_channel_output_multiplier;
|
||||
quant_params.shift = data->per_channel_output_shift;
|
||||
|
||||
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
|
||||
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
|
||||
|
||||
TFLITE_DCHECK_LE(dw_conv_params.activation.min,
|
||||
dw_conv_params.activation.max);
|
||||
|
||||
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
||||
|
||||
if (tflite::micro::GetTensorData<int8_t>(bias)) {
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
}
|
||||
|
||||
cmsis_nn_dims input_dims;
|
||||
input_dims.n = batch_size;
|
||||
input_dims.h = input_shape.Dims(1);
|
||||
input_dims.w = input_shape.Dims(2);
|
||||
input_dims.c = input_shape.Dims(3);
|
||||
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = filter_shape.Dims(0);
|
||||
filter_dims.h = filter_shape.Dims(1);
|
||||
filter_dims.w = filter_shape.Dims(2);
|
||||
filter_dims.c = output_depth;
|
||||
|
||||
cmsis_nn_dims bias_dims;
|
||||
bias_dims.n = 1;
|
||||
bias_dims.h = 1;
|
||||
bias_dims.w = 1;
|
||||
bias_dims.c = output_depth;
|
||||
|
||||
cmsis_nn_dims output_dims;
|
||||
output_dims.n = batch_size;
|
||||
output_dims.h = output_shape.Dims(1);
|
||||
output_dims.w = output_shape.Dims(2);
|
||||
output_dims.c = output_depth;
|
||||
|
||||
cmsis_nn_context ctx;
|
||||
ctx.buf = nullptr;
|
||||
/* 'size' is unused */
|
||||
ctx.size = 0;
|
||||
|
||||
if (data->buffer_idx > -1) {
|
||||
ctx.buf = context->GetScratchBuffer(context, data->buffer_idx);
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_EQ(
|
||||
arm_depthwise_conv_wrapper_s8(
|
||||
&ctx, &dw_conv_params, &quant_params, &input_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
|
||||
tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(output)),
|
||||
ARM_MATH_SUCCESS);
|
||||
} else {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -data->input_zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = data->output_zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data->input_zero_point;
|
||||
const int32_t filter_offset = -data->filter_zero_point;
|
||||
const int32_t output_offset = data->output_zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data->output_shift;
|
||||
|
||||
if (1 == op_params.dilation_width_factor &&
|
||||
1 == op_params.dilation_height_factor) {
|
||||
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
arm_depthwise_conv_u8_basic_ver1(
|
||||
tflite::micro::GetTensorData<uint8_t>(input), input_width, input_height,
|
||||
input_depth, tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
filter_width, filter_height, op_params.depth_multiplier,
|
||||
op_params.padding_values.width, op_params.padding_values.height,
|
||||
op_params.stride_width, op_params.stride_height,
|
||||
op_params.dilation_width_factor, op_params.dilation_height_factor,
|
||||
tflite::micro::GetTensorData<int32_t>(bias), op_params.input_offset,
|
||||
op_params.weights_offset, op_params.output_offset,
|
||||
tflite::micro::GetTensorData<uint8_t>(output), output_width,
|
||||
output_height, op_params.quantized_activation_min,
|
||||
op_params.quantized_activation_max, op_params.output_shift,
|
||||
op_params.output_multiplier);
|
||||
} else {
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
OpData& data = *(static_cast<OpData*>(node->user_data));
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
|
||||
// TODO(aselle): Consider whether float conv and quantized conv should be
|
||||
// separate ops to avoid dispatch overhead here.
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace depthwise_conv
|
||||
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
|
||||
return {/*init=*/depthwise_conv::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/depthwise_conv::Prepare,
|
||||
/*invoke=*/depthwise_conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,350 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
|
||||
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace fully_connected {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
// Index to buffer for optimizations if applicable.
|
||||
int buffer_idx;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
TfLiteFusedActivation activation,
|
||||
TfLiteType data_type, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
OpData* data) {
|
||||
TfLiteStatus status = kTfLiteOk;
|
||||
// Set buffer index to a reset value
|
||||
data->buffer_idx = -1;
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
double real_multiplier = 0.0;
|
||||
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
|
||||
context, input, filter, bias, output, &real_multiplier));
|
||||
int exponent;
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
|
||||
data->output_shift = -exponent;
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const auto params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params->activation,
|
||||
input->type, input, filter, bias,
|
||||
output, data));
|
||||
|
||||
if (input->type == kTfLiteInt8 && nullptr != GetTensorData<int32_t>(bias)) {
|
||||
RuntimeShape filter_shape = GetTensorShape(filter);
|
||||
RuntimeShape output_shape = GetTensorShape(output);
|
||||
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = filter_shape.Dims(filter_dim_count - 1);
|
||||
filter_dims.h = 1;
|
||||
filter_dims.w = 1;
|
||||
filter_dims.c = output_shape.Dims(1);
|
||||
|
||||
const int32_t buf_size =
|
||||
arm_fully_connected_s8_get_buffer_size(&filter_dims);
|
||||
|
||||
if (buf_size > 0) {
|
||||
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
|
||||
context, buf_size, &data->buffer_idx));
|
||||
} else {
|
||||
data->buffer_idx = -1;
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
// The 'if' condition can be removed when null handling of bias is added to
|
||||
// arm_fully_connected_s8
|
||||
if (nullptr != tflite::micro::GetTensorData<int32_t>(bias)) {
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
||||
const int batches = output_shape.Dims(0);
|
||||
const int output_depth = output_shape.Dims(1);
|
||||
const RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
const RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
|
||||
|
||||
cmsis_nn_fc_params fc_params;
|
||||
fc_params.input_offset = -data.input_zero_point;
|
||||
fc_params.output_offset = data.output_zero_point;
|
||||
fc_params.activation.min = data.output_activation_min;
|
||||
fc_params.activation.max = data.output_activation_max;
|
||||
|
||||
cmsis_nn_per_tensor_quant_params quant_params;
|
||||
quant_params.multiplier = data.output_multiplier;
|
||||
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||
quant_params.shift = -data.output_shift;
|
||||
|
||||
cmsis_nn_dims input_dims;
|
||||
input_dims.n = batches;
|
||||
input_dims.h = input_shape.Dims(1);
|
||||
input_dims.w = input_shape.Dims(2);
|
||||
input_dims.c = input_shape.Dims(3);
|
||||
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = accum_depth;
|
||||
filter_dims.h = 1;
|
||||
filter_dims.w = 1;
|
||||
filter_dims.c = output_depth;
|
||||
|
||||
cmsis_nn_dims bias_dims;
|
||||
bias_dims.n = 1;
|
||||
bias_dims.h = 1;
|
||||
bias_dims.w = 1;
|
||||
bias_dims.c = output_depth;
|
||||
|
||||
cmsis_nn_dims output_dims;
|
||||
output_dims.n = batches;
|
||||
output_dims.h = 1;
|
||||
output_dims.w = 1;
|
||||
output_dims.c = output_depth;
|
||||
|
||||
cmsis_nn_context ctx;
|
||||
ctx.buf = nullptr;
|
||||
ctx.size = 0;
|
||||
|
||||
if (data.buffer_idx > -1) {
|
||||
ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context,
|
||||
arm_fully_connected_s8(
|
||||
&ctx, &fc_params, &quant_params, &input_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
|
||||
tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(output)),
|
||||
ARM_MATH_SUCCESS);
|
||||
} else {
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::FullyConnected(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<uint8_t>(input), \
|
||||
tflite::micro::GetTensorShape(filter), \
|
||||
tflite::micro::GetTensorData<uint8_t>(filter), \
|
||||
tflite::micro::GetTensorShape(bias), \
|
||||
tflite::micro::GetTensorData<int32_t>(bias), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_data_type>(output))
|
||||
switch (output->type) {
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_FULLY_CONNECTED(uint8_t);
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
TF_LITE_FULLY_CONNECTED(int16_t);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteFusedActivation activation,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
tflite::reference_ops::FullyConnected(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
const auto* params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
tflite::micro::GetEvalInput(context, node, kBiasTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
// Checks in Prepare ensure input, output and filter types are all the same.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
return EvalFloat(context, node, params->activation, input, filter, bias,
|
||||
output);
|
||||
case kTfLiteInt8:
|
||||
return EvalQuantizedInt8(context, node, data, input, filter, bias,
|
||||
output);
|
||||
|
||||
case kTfLiteUInt8:
|
||||
return EvalQuantized(context, node, data, input, filter, bias, output);
|
||||
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace fully_connected
|
||||
|
||||
TfLiteRegistration Register_FULLY_CONNECTED() {
|
||||
return {/*init=*/fully_connected::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/fully_connected::Prepare,
|
||||
/*invoke=*/fully_connected::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,218 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/mul.h"
|
||||
|
||||
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace mul {
|
||||
|
||||
constexpr int kInput1Tensor = 0;
|
||||
constexpr int kInput2Tensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input1_zero_point;
|
||||
int32_t input2_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
double real_multiplier =
|
||||
input1->params.scale * input2->params.scale / output->params.scale;
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->dims->size == 0) {
|
||||
return AllocateOutputDimensionsFromInput(context, input1, input2, output);
|
||||
}
|
||||
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
data->input1_zero_point = input1->params.zero_point;
|
||||
data->input2_zero_point = input2->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
CalculateOpData(context, node, params, data);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data.output_activation_min, data.output_activation_max,
|
||||
&op_params);
|
||||
op_params.input1_offset = -data.input1_zero_point;
|
||||
op_params.input2_offset = -data.input2_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = data.output_shift;
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
#define TF_LITE_MUL(type, opname, dtype) \
|
||||
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<dtype>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<dtype>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<dtype>(output));
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
|
||||
} else {
|
||||
arm_elementwise_mul_s8(
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
op_params.input1_offset, op_params.input2_offset,
|
||||
tflite::micro::GetTensorData<int8_t>(output),
|
||||
op_params.output_offset, op_params.output_multiplier,
|
||||
op_params.output_shift, op_params.quantized_activation_min,
|
||||
op_params.quantized_activation_max,
|
||||
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorShape(output)));
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_ops, Mul, uint8_t);
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_MUL(opname) \
|
||||
reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<float>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<float>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(BroadcastMul4DSlow);
|
||||
} else {
|
||||
TF_LITE_MUL(Mul);
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
switch (input1->type) {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
EvalQuantized(context, node, params, data, input1, input2, output);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, input1, input2, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace mul
|
||||
|
||||
TfLiteRegistration Register_MUL() {
|
||||
return {mul::Init, nullptr /* Free */, mul::Prepare, mul::Eval};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,397 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
|
||||
|
||||
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
|
||||
#include "flatbuffers/base.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pooling {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
// Index to buffer for optimizations if applicable.
|
||||
int buffer_idx;
|
||||
|
||||
int32_t activation_min;
|
||||
int32_t activation_max;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
const TfLitePoolParams* params,
|
||||
const TfLiteTensor* input, TfLiteTensor* output,
|
||||
OpData* data) {
|
||||
// input: batch, height, width, channel
|
||||
int height = SizeOfDimension(input, 1);
|
||||
int width = SizeOfDimension(input, 2);
|
||||
|
||||
int out_height, out_width;
|
||||
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
/*dilation_rate_height=*/1,
|
||||
/*dilation_rate_width=*/1, height, width, params->filter_height,
|
||||
params->filter_width, params->padding, &out_height, &out_width);
|
||||
|
||||
if (input->type != kTfLiteFloat32) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->activation_min,
|
||||
&data->activation_max));
|
||||
TFLITE_DCHECK_LE(data->activation_min, data->activation_max);
|
||||
}
|
||||
|
||||
// Set buffer index to a reset value
|
||||
data->buffer_idx = -1;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.quantized_activation_min = data.activation_min;
|
||||
op_params.quantized_activation_max = data.activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
|
||||
cmsis_nn_dims input_dims;
|
||||
input_dims.n = 1;
|
||||
input_dims.h = input_shape.Dims(1);
|
||||
input_dims.w = input_shape.Dims(2);
|
||||
input_dims.c = depth;
|
||||
|
||||
cmsis_nn_dims output_dims;
|
||||
output_dims.n = 1;
|
||||
output_dims.h = output_shape.Dims(1);
|
||||
output_dims.w = output_shape.Dims(2);
|
||||
output_dims.c = depth;
|
||||
|
||||
cmsis_nn_pool_params pool_params;
|
||||
pool_params.stride.h = params->stride_height;
|
||||
pool_params.stride.w = params->stride_width;
|
||||
pool_params.padding.h = data.padding.height;
|
||||
pool_params.padding.w = data.padding.width;
|
||||
pool_params.activation.min = data.activation_min;
|
||||
pool_params.activation.max = data.activation_max;
|
||||
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = 1;
|
||||
filter_dims.h = params->filter_height;
|
||||
filter_dims.w = params->filter_width;
|
||||
filter_dims.c = 1;
|
||||
|
||||
cmsis_nn_context ctx;
|
||||
ctx.buf = nullptr;
|
||||
ctx.size = 0;
|
||||
if (data.buffer_idx > -1) {
|
||||
ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_EQ(
|
||||
arm_avgpool_s8(&ctx, &pool_params, &input_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
&filter_dims, &output_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(output)),
|
||||
ARM_MATH_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.quantized_activation_min = data.activation_min;
|
||||
op_params.quantized_activation_max = data.activation_max;
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
|
||||
cmsis_nn_dims input_dims;
|
||||
input_dims.n = 1;
|
||||
input_dims.h = input_shape.Dims(1);
|
||||
input_dims.w = input_shape.Dims(2);
|
||||
input_dims.c = depth;
|
||||
|
||||
cmsis_nn_dims output_dims;
|
||||
output_dims.n = 1;
|
||||
output_dims.h = output_shape.Dims(1);
|
||||
output_dims.w = output_shape.Dims(2);
|
||||
output_dims.c = depth;
|
||||
|
||||
cmsis_nn_pool_params pool_params;
|
||||
pool_params.stride.h = params->stride_height;
|
||||
pool_params.stride.w = params->stride_width;
|
||||
pool_params.padding.h = data.padding.height;
|
||||
pool_params.padding.w = data.padding.width;
|
||||
pool_params.activation.min = data.activation_min;
|
||||
pool_params.activation.max = data.activation_max;
|
||||
|
||||
cmsis_nn_dims filter_dims;
|
||||
filter_dims.n = 1;
|
||||
filter_dims.h = params->filter_height;
|
||||
filter_dims.w = params->filter_width;
|
||||
filter_dims.c = 1;
|
||||
|
||||
cmsis_nn_context ctx;
|
||||
ctx.buf = nullptr;
|
||||
ctx.size = 0;
|
||||
if (data.buffer_idx > -1) {
|
||||
ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_EQ(
|
||||
arm_max_pool_s8(&ctx, &pool_params, &input_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
|
||||
&output_dims,
|
||||
tflite::micro::GetTensorData<int8_t>(output)),
|
||||
ARM_MATH_SUCCESS);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus MaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus AveragePrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
RuntimeShape input_shape = GetTensorShape(input);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
|
||||
RuntimeShape output_shape = GetTensorShape(output);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
|
||||
const int32_t buffer_size =
|
||||
arm_avgpool_s8_get_buffer_size(output_width, depth);
|
||||
|
||||
if (buffer_size > 0) {
|
||||
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
|
||||
context, buffer_size, &data->buffer_idx));
|
||||
} else {
|
||||
data->buffer_idx = -1;
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// Inputs and outputs share the same type, guaranteed by the converter.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
AverageEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
AverageEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
MaxEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
MaxEvalQuantizedUInt8(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
MaxEvalInt8(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::AveragePrepare,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MAX_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::MaxPrepare,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,175 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
|
||||
|
||||
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
const TfLiteSoftmaxParams* params,
|
||||
SoftmaxParams* op_data) {
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
|
||||
// NOTE: Current int16_t softmax output does not require symmetric
|
||||
// scaling
|
||||
// - so no need to verify scale here.
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
|
||||
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
|
||||
}
|
||||
}
|
||||
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
op_data->beta = static_cast<double>(params->beta);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
return CalculateSoftmaxParams(context, input, output, params, data);
|
||||
}
|
||||
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
const auto input_shape = tflite::micro::GetTensorShape(input);
|
||||
const auto output_shape = tflite::micro::GetTensorShape(output);
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
|
||||
arm_softmax_s8(tflite::micro::GetTensorData<int8_t>(input), outer_size,
|
||||
depth, op_data.input_multiplier, op_data.input_left_shift,
|
||||
op_data.diff_min,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const SoftmaxParams& data =
|
||||
*(static_cast<const SoftmaxParams*>(node->user_data));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
SoftmaxFloat(input, output, data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8:
|
||||
case kTfLiteUInt8: {
|
||||
SoftmaxQuantized(input, output, data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration Register_SOFTMAX() {
|
||||
return {/*init=*/activations::SoftmaxInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::SoftmaxPrepare,
|
||||
/*invoke=*/activations::SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,722 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace comparisons {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
ComparisonParams params;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// TODO(renjieliu): Refactor the logic to avoid duplications.
|
||||
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
|
||||
auto input1_offset = -input1->params.zero_point;
|
||||
auto input2_offset = -input2->params.zero_point;
|
||||
const int kLeftShift = 8;
|
||||
|
||||
int32_t input1_multiplier;
|
||||
int input1_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier,
|
||||
&input1_shift);
|
||||
int32_t input2_multiplier;
|
||||
int input2_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier,
|
||||
&input2_shift);
|
||||
|
||||
data->params.left_shift = kLeftShift;
|
||||
data->params.input1_offset = input1_offset;
|
||||
data->params.input1_multiplier = input1_multiplier;
|
||||
data->params.input1_shift = input1_shift;
|
||||
data->params.input2_offset = input2_offset;
|
||||
data->params.input2_multiplier = input2_multiplier;
|
||||
data->params.input2_shift = input2_shift;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace comparisons
|
||||
|
||||
TfLiteRegistration Register_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_NOT_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_GREATER() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_GREATER_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_LESS() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_LESS_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,264 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace concatenation {
|
||||
|
||||
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
ConcatenationParams params;
|
||||
};
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllInputTensorShapes(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
RuntimeShape shape = tflite::micro::GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllInputTensorData(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
all_data[i] = tflite::micro::GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::ConcatenationWithScaling(
|
||||
data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// This function only checks the types. Additional shape validations are
|
||||
// performed in the reference implementation called during Eval().
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
TfLiteType input_type = GetInput(context, node, 0)->type;
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
|
||||
// Check activation and input type
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
TF_LITE_ENSURE(context,
|
||||
input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
|
||||
input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
|
||||
input_type == kTfLiteInt64);
|
||||
|
||||
// Output type must match input type
|
||||
TF_LITE_ENSURE_EQ(context, output_type, input_type);
|
||||
|
||||
// This implementation does not support large number of input tensors
|
||||
const int num_inputs = NumInputs(node);
|
||||
TF_LITE_ENSURE(context, num_inputs <= kMaxInputNum);
|
||||
|
||||
// Shapes with dimensions >4 are not yet supported with static allocation.
|
||||
for (int i = 0; i < num_inputs; ++i) {
|
||||
const TfLiteTensor* input = GetInput(context, node, i);
|
||||
int num_dimensions = NumDimensions(input);
|
||||
|
||||
if (num_dimensions > 4) {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Op Concatenation does not currently support num dimensions >4 "
|
||||
"Tensor has %d dimensions.",
|
||||
num_dimensions);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate OpData.
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
case kTfLiteInt32:
|
||||
case kTfLiteInt64: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
break;
|
||||
}
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
|
||||
float* input_scales =
|
||||
reinterpret_cast<float*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(float)));
|
||||
|
||||
int32_t* input_zero_points =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(int32_t)));
|
||||
|
||||
// Allocate persistent scale and zeropoint buffers.
|
||||
// Store input scale and zero point values in OpParams:
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
input_scales[i] = t->params.scale;
|
||||
input_zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
|
||||
data->params.input_scale = input_scales;
|
||||
data->params.input_zeropoint = input_zero_points;
|
||||
data->params.output_zeropoint = output->params.zero_point;
|
||||
data->params.output_scale = output->params.scale;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Op Concatenation does not currently support Type '%s'.",
|
||||
TfLiteTypeGetName(output_type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalUnquantized<float>(context, node);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
EvalUnquantized<int32_t>(context, node);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedUInt8(context, node);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalUnquantized<int8_t>(context, node);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
EvalUnquantized<int64_t>(context, node);
|
||||
break;
|
||||
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Op Concatenation does not currently support Type '%s'.",
|
||||
TfLiteTypeGetName(output_type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace concatenation
|
||||
|
||||
TfLiteRegistration Register_CONCATENATION() {
|
||||
return {/*init=*/concatenation::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
334
components/tflite_micro/tensorflow/lite/micro/kernels/conv.cc
Normal file
334
components/tflite_micro/tensorflow/lite/micro/kernels/conv.cc
Normal file
@@ -0,0 +1,334 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
// This file has 2 implementation of Conv.
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
int out_width, int out_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
params->dilation_height_factor, params->dilation_width_factor, height,
|
||||
width, filter_height, filter_width, padding, &out_height, &out_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
int filter_width = filter->dims->data[2];
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
} // namespace conv
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output,
|
||||
TfLiteEvalTensor* im2col) {
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
|
||||
TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||
output, nullptr);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
|
||||
TfLiteRegistration Register_CONV_2D() {
|
||||
return {/*init=*/conv::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/conv::Prepare,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,327 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace depthwise_conv {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// Depthwise conv is quantized along dimension 3:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
int unused_output_height, unused_output_width;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width, 1, 1, height, width,
|
||||
filter_height, filter_width, params->padding, &unused_output_height,
|
||||
&unused_output_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
|
||||
return tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
|
||||
const TfLiteType data_type = input->type;
|
||||
int width = SizeOfDimension(input, 2);
|
||||
int height = SizeOfDimension(input, 1);
|
||||
int filter_width = SizeOfDimension(filter, 2);
|
||||
int filter_height = SizeOfDimension(filter, 1);
|
||||
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
TF_LITE_ENSURE(
|
||||
context, affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kDepthwiseConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
|
||||
filter_width, filter_height, data_type,
|
||||
data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params,
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data.output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
|
||||
// TODO(aselle): Consider whether float conv and quantized conv should be
|
||||
// separate ops to avoid dispatch overhead here.
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||
output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace depthwise_conv
|
||||
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
|
||||
return {/*init=*/depthwise_conv::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/depthwise_conv::Prepare,
|
||||
/*invoke=*/depthwise_conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,164 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace dequantize {
|
||||
|
||||
struct OpData {
|
||||
tflite::DequantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteInt16);
|
||||
TF_LITE_ENSURE(
|
||||
context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
|
||||
|
||||
if (output->type == kTfLiteInt32) {
|
||||
const double effective_output_scale =
|
||||
static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = input->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(input->params.scale);
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
switch (input->type) {
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (output->type == kTfLiteInt32) {
|
||||
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
switch (input->type) {
|
||||
case kTfLiteInt16: {
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int8_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace dequantize
|
||||
|
||||
TfLiteRegistration Register_DEQUANTIZE() {
|
||||
return {/*init=*/dequantize::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,212 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace elementwise {
|
||||
namespace {
|
||||
|
||||
bool IsNumericSupportedType(const TfLiteType type) {
|
||||
return type == kTfLiteFloat32;
|
||||
}
|
||||
|
||||
bool IsLogicalSupportedType(const TfLiteType type) {
|
||||
return type == kTfLiteBool;
|
||||
}
|
||||
|
||||
typedef bool (*IsSupportedType)(TfLiteType);
|
||||
template <IsSupportedType>
|
||||
TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (!IsSupportedType(input->type)) {
|
||||
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
T func(T), TfLiteType expected_type) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type);
|
||||
const size_t num_elements = ElementCount(*input->dims);
|
||||
const T* in_data = tflite::micro::GetTensorData<T>(input);
|
||||
T* out_data = tflite::micro::GetTensorData<T>(output);
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
out_data[i] = func(in_data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
inline TfLiteStatus EvalNumeric(TfLiteContext* context, TfLiteNode* node,
|
||||
float float_func(float)) {
|
||||
return EvalImpl<float>(context, node, float_func, kTfLiteFloat32);
|
||||
}
|
||||
|
||||
inline TfLiteStatus EvalLogical(TfLiteContext* context, TfLiteNode* node,
|
||||
bool bool_func(bool)) {
|
||||
return EvalImpl<bool>(context, node, bool_func, kTfLiteBool);
|
||||
}
|
||||
|
||||
TfLiteStatus AbsEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::abs);
|
||||
}
|
||||
|
||||
TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::sin);
|
||||
}
|
||||
|
||||
TfLiteStatus CosEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::cos);
|
||||
}
|
||||
|
||||
TfLiteStatus LogEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::log);
|
||||
}
|
||||
|
||||
TfLiteStatus SqrtEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::sqrt);
|
||||
}
|
||||
|
||||
TfLiteStatus RsqrtEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, [](float f) { return 1.f / std::sqrt(f); });
|
||||
}
|
||||
|
||||
TfLiteStatus SquareEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, [](float f) { return f * f; });
|
||||
}
|
||||
|
||||
TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalLogical(context, node, [](bool v) { return !v; });
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace elementwise
|
||||
|
||||
TfLiteRegistration Register_ABS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_SIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_COS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_LOG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_SQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_RSQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_SQUARE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_LOGICAL_NOT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,32 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
//
|
||||
// This is a stub file for non-Ethos platforms
|
||||
//
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace custom {
|
||||
TfLiteRegistration* Register_ETHOSU() { return nullptr; }
|
||||
|
||||
const char* GetString_ETHOSU() { return ""; }
|
||||
|
||||
} // namespace custom
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,57 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/floor.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace floor {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace floor
|
||||
|
||||
TfLiteRegistration Register_FLOOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,256 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace fully_connected {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
// Cached zero point values of tensors.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
TfLiteFusedActivation activation,
|
||||
TfLiteType data_type, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
OpData* data) {
|
||||
TfLiteStatus status = kTfLiteOk;
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
double real_multiplier = 0.0;
|
||||
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
|
||||
context, input, filter, bias, output, &real_multiplier));
|
||||
int exponent;
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
|
||||
data->output_shift = -exponent;
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const auto params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
return CalculateOpData(context, params->activation, input->type, input,
|
||||
filter, bias, output, data);
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::FullyConnected(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<uint8_t>(input), \
|
||||
tflite::micro::GetTensorShape(filter), \
|
||||
tflite::micro::GetTensorData<uint8_t>(filter), \
|
||||
tflite::micro::GetTensorShape(bias), \
|
||||
tflite::micro::GetTensorData<int32_t>(bias), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_data_type>(output))
|
||||
switch (output->type) {
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_FULLY_CONNECTED(uint8_t);
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
TF_LITE_FULLY_CONNECTED(int16_t);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteFusedActivation activation,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
tflite::reference_ops::FullyConnected(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
const auto* params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
tflite::micro::GetEvalInput(context, node, kBiasTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
// Checks in Prepare ensure input, output and filter types are all the same.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
return EvalFloat(context, node, params->activation, input, filter, bias,
|
||||
output);
|
||||
case kTfLiteInt8:
|
||||
return EvalQuantizedInt8(context, node, data, input, filter, bias,
|
||||
output);
|
||||
|
||||
case kTfLiteUInt8:
|
||||
return EvalQuantized(context, node, data, input, filter, bias, output);
|
||||
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace fully_connected
|
||||
|
||||
TfLiteRegistration Register_FULLY_CONNECTED() {
|
||||
return {/*init=*/fully_connected::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/fully_connected::Prepare,
|
||||
/*invoke=*/fully_connected::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,140 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace hard_swish {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
|
||||
}
|
||||
|
||||
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
params->input_zero_point = input->params.zero_point;
|
||||
params->output_zero_point = output->params.zero_point;
|
||||
|
||||
const float input_scale = input->params.scale;
|
||||
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
|
||||
const float reluish_scale = 3.0f / 32768.0f;
|
||||
const float output_scale = output->params.scale;
|
||||
|
||||
const double output_multiplier =
|
||||
static_cast<double>(hires_input_scale / output_scale);
|
||||
int32_t output_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_fixedpoint_int16);
|
||||
|
||||
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
|
||||
|
||||
const double reluish_multiplier =
|
||||
static_cast<double>(hires_input_scale / reluish_scale);
|
||||
int32_t reluish_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_fixedpoint_int16);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::HardSwish<float>(
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
tflite::reference_ops::HardSwish<uint8_t>(
|
||||
*params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
tflite::reference_ops::HardSwish<int8_t>(
|
||||
*params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} break;
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Only float32/int8_t/uint8_t are supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace hard_swish
|
||||
|
||||
TfLiteRegistration Register_HARD_SWISH() {
|
||||
return {/*init=*/hard_swish::HardSwishInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/hard_swish::HardSwishPrepare,
|
||||
/*invoke=*/hard_swish::HardSwishEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,165 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
namespace {
|
||||
constexpr size_t kBufferAlignment = 16;
|
||||
} // namespace
|
||||
|
||||
// TODO(b/161841696): Consider moving away from global arena buffers:
|
||||
constexpr int KernelRunner::kNumScratchBuffers_;
|
||||
constexpr int KernelRunner::kKernelRunnerBufferSize_;
|
||||
uint8_t KernelRunner::kKernelRunnerBuffer_[];
|
||||
|
||||
KernelRunner::KernelRunner(const TfLiteRegistration& registration,
|
||||
TfLiteTensor* tensors, int tensors_size,
|
||||
TfLiteIntArray* inputs, TfLiteIntArray* outputs,
|
||||
void* builtin_data, ErrorReporter* error_reporter)
|
||||
: allocator_(SimpleMemoryAllocator::Create(
|
||||
error_reporter, kKernelRunnerBuffer_, kKernelRunnerBufferSize_)),
|
||||
registration_(registration),
|
||||
tensors_(tensors),
|
||||
error_reporter_(error_reporter) {
|
||||
// Prepare TfLiteContext:
|
||||
context_.impl_ = static_cast<void*>(this);
|
||||
context_.ReportError = ReportOpError;
|
||||
context_.recommended_num_threads = 1;
|
||||
context_.GetTensor = GetTensor;
|
||||
context_.GetEvalTensor = GetEvalTensor;
|
||||
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
|
||||
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
|
||||
context_.GetScratchBuffer = GetScratchBuffer;
|
||||
|
||||
// Prepare TfLiteNode:
|
||||
node_.inputs = inputs;
|
||||
node_.outputs = outputs;
|
||||
node_.builtin_data = builtin_data;
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data) {
|
||||
if (registration_.init) {
|
||||
node_.user_data = registration_.init(&context_, init_data, /*length=*/0);
|
||||
}
|
||||
if (registration_.prepare) {
|
||||
TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::Invoke() {
|
||||
if (registration_.invoke == nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"TfLiteRegistration missing invoke function pointer!");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return registration_.invoke(&context_, &node_);
|
||||
}
|
||||
|
||||
TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
return &runner->tensors_[tensor_index];
|
||||
}
|
||||
|
||||
TfLiteEvalTensor* KernelRunner::GetEvalTensor(
|
||||
const struct TfLiteContext* context, int tensor_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
TfLiteEvalTensor* eval_tensor =
|
||||
reinterpret_cast<TfLiteEvalTensor*>(runner->allocator_->AllocateTemp(
|
||||
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
|
||||
TFLITE_DCHECK(eval_tensor != nullptr);
|
||||
|
||||
// In unit tests, the TfLiteTensor pointer contains the source of truth for
|
||||
// buffers and values:
|
||||
eval_tensor->data = runner->tensors_[tensor_index].data;
|
||||
eval_tensor->dims = runner->tensors_[tensor_index].dims;
|
||||
eval_tensor->type = runner->tensors_[tensor_index].type;
|
||||
return eval_tensor;
|
||||
}
|
||||
|
||||
void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context,
|
||||
size_t bytes) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
return runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
|
||||
size_t bytes,
|
||||
int* buffer_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(buffer_index != nullptr);
|
||||
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
runner->error_reporter_,
|
||||
"Exceeded the maximum number of scratch tensors allowed (%d).",
|
||||
kNumScratchBuffers_);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// For tests, we allocate scratch buffers from the tail and keep them around
|
||||
// for the lifetime of model. This means that the arena size in the tests will
|
||||
// be more than what we would have if the scratch buffers could share memory.
|
||||
runner->scratch_buffers_[runner->scratch_buffer_count_] =
|
||||
runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
|
||||
TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] !=
|
||||
nullptr);
|
||||
|
||||
*buffer_index = runner->scratch_buffer_count_++;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_);
|
||||
if (buffer_index >= runner->scratch_buffer_count_) {
|
||||
return nullptr;
|
||||
}
|
||||
return runner->scratch_buffers_[buffer_index];
|
||||
}
|
||||
|
||||
void KernelRunner::ReportOpError(struct TfLiteContext* context,
|
||||
const char* format, ...) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
TF_LITE_REPORT_ERROR(runner->error_reporter_, format, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
@@ -0,0 +1,83 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
|
||||
// (init, prepare, invoke). All internal allocations are handled by this class.
|
||||
// Simply pass in the registration, list of required tensors, inputs array,
|
||||
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
|
||||
// walk the kernl and outputs will be ready on the the TfLiteTensor output
|
||||
// provided during construction.
|
||||
class KernelRunner {
|
||||
public:
|
||||
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
|
||||
int tensors_size, TfLiteIntArray* inputs,
|
||||
TfLiteIntArray* outputs, void* builtin_data,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
|
||||
// exceptions will be reported through the error_reporter and returned as a
|
||||
// status code here.
|
||||
TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
|
||||
|
||||
// Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
|
||||
// After successful invoke, results will be available in the output tensor as
|
||||
// passed into the constructor of this class.
|
||||
TfLiteStatus Invoke();
|
||||
|
||||
protected:
|
||||
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
|
||||
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
|
||||
size_t bytes,
|
||||
int* buffer_index);
|
||||
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
|
||||
static void ReportOpError(struct TfLiteContext* context, const char* format,
|
||||
...);
|
||||
|
||||
private:
|
||||
static constexpr int kNumScratchBuffers_ = 5;
|
||||
|
||||
static constexpr int kKernelRunnerBufferSize_ = 10000;
|
||||
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
|
||||
|
||||
SimpleMemoryAllocator* allocator_ = nullptr;
|
||||
const TfLiteRegistration& registration_;
|
||||
TfLiteTensor* tensors_ = nullptr;
|
||||
ErrorReporter* error_reporter_ = nullptr;
|
||||
|
||||
TfLiteContext context_ = {};
|
||||
TfLiteNode node_ = {};
|
||||
|
||||
int scratch_buffer_count_ = 0;
|
||||
uint8_t* scratch_buffers_[kNumScratchBuffers_];
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
@@ -0,0 +1,31 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2) {
|
||||
TFLITE_DCHECK(input1 != nullptr);
|
||||
TFLITE_DCHECK(input2 != nullptr);
|
||||
return TfLiteIntArrayEqual(input1->dims, input2->dims);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
@@ -0,0 +1,83 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Returns a mutable tensor for a given input index. is_variable must be checked
|
||||
// during prepare when the full TfLiteTensor is available.
|
||||
inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->inputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given input index in a node.
|
||||
inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return GetMutableEvalInput(context, node, index);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given output index in a node.
|
||||
inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->outputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
T* GetTensorData(TfLiteEvalTensor* tensor) {
|
||||
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
|
||||
}
|
||||
|
||||
// Returns const data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
const T* GetTensorData(const TfLiteEvalTensor* tensor) {
|
||||
TFLITE_DCHECK(tensor != nullptr);
|
||||
return reinterpret_cast<const T*>(tensor->data.raw);
|
||||
}
|
||||
|
||||
// Returns the shape of a TfLiteEvalTensor struct.
|
||||
inline const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
|
||||
if (tensor == nullptr) {
|
||||
return RuntimeShape();
|
||||
}
|
||||
TfLiteIntArray* dims = tensor->dims;
|
||||
const int dims_size = dims->size;
|
||||
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
|
||||
return RuntimeShape(dims_size, dims_data);
|
||||
}
|
||||
|
||||
// Return true if the given tensors have the same shape.
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2);
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
155
components/tflite_micro/tensorflow/lite/micro/kernels/l2norm.cc
Normal file
155
components/tflite_micro/tensorflow/lite/micro/kernels/l2norm.cc
Normal file
@@ -0,0 +1,155 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace l2norm {
|
||||
|
||||
namespace {
|
||||
|
||||
// This file has two implementation of L2Norm.
|
||||
enum KernelType {
|
||||
kReference,
|
||||
kGenericOptimized,
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
|
||||
L2NormalizationParams* data =
|
||||
static_cast<L2NormalizationParams*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
|
||||
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
|
||||
output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
data->input_zero_point = 0;
|
||||
}
|
||||
|
||||
// TODO(ahentz): For some reason our implementations don't support
|
||||
// activations.
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context,
|
||||
sizeof(L2NormalizationParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const L2NormalizationParams& data =
|
||||
*(static_cast<const L2NormalizationParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
|
||||
// from tensorflow, i.e., adding a params.
|
||||
// We don't compute epsilon for quantized kernel:
|
||||
//
|
||||
// epsilon_float = (epsilon_quant - zp) * scale
|
||||
// so
|
||||
// espsilon_quant = epsilon_float / scale + zp
|
||||
// We know epsilon_float is just a very small number to avoid division by
|
||||
// zero error, and scale is > 1, so the integer value of epsilon for quant
|
||||
// is just dominated by the zero point.
|
||||
// Also, GetInvSqrtQuantizedMultiplierExp handles the scenario where the sum
|
||||
// of input value squared is zero case well.
|
||||
// So we don't even need to do handle the epsilon for quantized kernel case.
|
||||
const float epsilon = 1e-6f;
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
epsilon);
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
reference_ops::L2Normalization(
|
||||
data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
const auto input_shape = tflite::micro::GetTensorShape(input);
|
||||
const auto output_shape = tflite::micro::GetTensorShape(output);
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
reference_integer_ops::L2Normalization(
|
||||
data.input_zero_point, outer_size, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace l2norm
|
||||
|
||||
TfLiteRegistration Register_L2NORM_REF() {
|
||||
return {/*init=*/l2norm::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
105
components/tflite_micro/tensorflow/lite/micro/kernels/logical.cc
Normal file
105
components/tflite_micro/tensorflow/lite/micro/kernels/logical.cc
Normal file
@@ -0,0 +1,105 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace logical {
|
||||
namespace {
|
||||
|
||||
// Input/output tensor index.
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
bool (*func)(bool, bool)) {
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (tflite::micro::HaveSameShapes(input1, input2)) {
|
||||
reference_ops::BinaryFunction<bool, bool, bool>(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
} else {
|
||||
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
bool LogicalOr(bool x, bool y) { return x || y; }
|
||||
|
||||
TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return LogicalImpl(context, node, LogicalOr);
|
||||
}
|
||||
|
||||
bool LogicalAnd(bool x, bool y) { return x && y; }
|
||||
|
||||
TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return LogicalImpl(context, node, LogicalAnd);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace logical
|
||||
|
||||
TfLiteRegistration Register_LOGICAL_OR() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_LOGICAL_AND() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,148 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* data) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
std::numeric_limits<int8_t>::min());
|
||||
|
||||
static constexpr int kInputIntegerBits = 4;
|
||||
const double input_real_multiplier =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
data->input_range_radius =
|
||||
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateArithmeticOpData(context, node, data);
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Logistic(
|
||||
data->input_zero_point, data->input_range_radius,
|
||||
data->input_multiplier, data->input_left_shift,
|
||||
NumElements(input->dims),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
// TODO(b/141211002): Also support other data types once we have supported
|
||||
// temporary tensors in TFLM.
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration Register_LOGISTIC() {
|
||||
return {/*init=*/activations::LogisticInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::LogisticPrepare,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,148 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace maximum_minimum {
|
||||
namespace {
|
||||
|
||||
// This file has a reference implementation of TFMaximum/TFMinimum.
|
||||
enum KernelType {
|
||||
kReference,
|
||||
};
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpContext {
|
||||
OpContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
}
|
||||
const TfLiteEvalTensor* input1;
|
||||
const TfLiteEvalTensor* input2;
|
||||
TfLiteEvalTensor* output;
|
||||
};
|
||||
|
||||
struct MaximumOp {
|
||||
template <typename data_type>
|
||||
static data_type op(data_type el1, data_type el2) {
|
||||
return el1 > el2 ? el1 : el2;
|
||||
}
|
||||
};
|
||||
|
||||
struct MinimumOp {
|
||||
template <typename data_type>
|
||||
static data_type op(data_type el1, data_type el2) {
|
||||
return el1 < el2 ? el1 : el2;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename data_type, typename op_type>
|
||||
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpContext& op_context) {
|
||||
reference_ops::MaximumMinimumBroadcastSlow(
|
||||
tflite::micro::GetTensorShape(op_context.input1),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input1),
|
||||
tflite::micro::GetTensorShape(op_context.input2),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input2),
|
||||
tflite::micro::GetTensorShape(op_context.output),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.output),
|
||||
op_type::template op<data_type>);
|
||||
}
|
||||
|
||||
template <KernelType kernel_type, typename OpType>
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpContext op_context(context, node);
|
||||
|
||||
if (kernel_type == kReference) {
|
||||
switch (op_context.output->type) {
|
||||
case kTfLiteFloat32:
|
||||
TFLiteOperation<float, OpType>(context, node, op_context);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
TFLiteOperation<uint8_t, OpType>(context, node, op_context);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
TFLiteOperation<int8_t, OpType>(context, node, op_context);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TFLiteOperation<int32_t, OpType>(context, node, op_context);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TFLiteOperation<int64_t, OpType>(context, node, op_context);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Type %s (%d) is not supported by Maximum/Minimum.",
|
||||
TfLiteTypeGetName(op_context.output->type),
|
||||
op_context.output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Kernel type not supported by Maximum/Minimum.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace maximum_minimum
|
||||
|
||||
TfLiteRegistration Register_MAXIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MINIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,92 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Forward declaration of all micro op kernel registration methods. These
|
||||
// registrations are included with the standard `BuiltinOpResolver`.
|
||||
//
|
||||
// This header is particularly useful in cases where only a subset of ops are
|
||||
// needed. In such cases, the client can selectively add only the registrations
|
||||
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
|
||||
// registration in turn allows the linker to strip unused kernels.
|
||||
|
||||
TfLiteRegistration Register_ABS();
|
||||
TfLiteRegistration Register_ADD();
|
||||
TfLiteRegistration Register_ARG_MAX();
|
||||
TfLiteRegistration Register_ARG_MIN();
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration Register_CEIL();
|
||||
// TODO(b/160234179): Change custom OPs to also return by value.
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER();
|
||||
TfLiteRegistration Register_CONV_2D();
|
||||
TfLiteRegistration Register_CONCATENATION();
|
||||
TfLiteRegistration Register_COS();
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration Register_DEQUANTIZE();
|
||||
TfLiteRegistration Register_EQUAL();
|
||||
TfLiteRegistration Register_FLOOR();
|
||||
TfLiteRegistration Register_FULLY_CONNECTED();
|
||||
TfLiteRegistration Register_GREATER();
|
||||
TfLiteRegistration Register_GREATER_EQUAL();
|
||||
TfLiteRegistration Register_HARD_SWISH();
|
||||
TfLiteRegistration Register_LESS();
|
||||
TfLiteRegistration Register_LESS_EQUAL();
|
||||
TfLiteRegistration Register_LOG();
|
||||
TfLiteRegistration Register_LOGICAL_AND();
|
||||
TfLiteRegistration Register_LOGICAL_NOT();
|
||||
TfLiteRegistration Register_LOGICAL_OR();
|
||||
TfLiteRegistration Register_LOGISTIC();
|
||||
TfLiteRegistration Register_MAXIMUM();
|
||||
TfLiteRegistration Register_MAX_POOL_2D();
|
||||
TfLiteRegistration Register_MEAN();
|
||||
TfLiteRegistration Register_MINIMUM();
|
||||
TfLiteRegistration Register_MUL();
|
||||
TfLiteRegistration Register_NEG();
|
||||
TfLiteRegistration Register_NOT_EQUAL();
|
||||
TfLiteRegistration Register_PACK();
|
||||
TfLiteRegistration Register_PAD();
|
||||
TfLiteRegistration Register_PADV2();
|
||||
TfLiteRegistration Register_PRELU();
|
||||
TfLiteRegistration Register_QUANTIZE();
|
||||
TfLiteRegistration Register_RELU();
|
||||
TfLiteRegistration Register_RELU6();
|
||||
TfLiteRegistration Register_RESHAPE();
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration Register_ROUND();
|
||||
TfLiteRegistration Register_RSQRT();
|
||||
TfLiteRegistration Register_SIN();
|
||||
TfLiteRegistration Register_SOFTMAX();
|
||||
TfLiteRegistration Register_SPLIT();
|
||||
TfLiteRegistration Register_SQRT();
|
||||
TfLiteRegistration Register_SQUARE();
|
||||
TfLiteRegistration Register_STRIDED_SLICE();
|
||||
TfLiteRegistration Register_SUB();
|
||||
TfLiteRegistration Register_SVDF();
|
||||
TfLiteRegistration Register_UNPACK();
|
||||
TfLiteRegistration Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration Register_TANH();
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
|
@@ -0,0 +1,37 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Same as gtl::Greater but defined here to reduce dependencies and
|
||||
// binary size for micro environment.
|
||||
struct Greater {
|
||||
template <typename T>
|
||||
bool operator()(const T& x, const T& y) const {
|
||||
return x > y;
|
||||
}
|
||||
};
|
||||
|
||||
struct Less {
|
||||
template <typename T>
|
||||
bool operator()(const T& x, const T& y) const {
|
||||
return x < y;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
233
components/tflite_micro/tensorflow/lite/micro/kernels/mul.cc
Normal file
233
components/tflite_micro/tensorflow/lite/micro/kernels/mul.cc
Normal file
@@ -0,0 +1,233 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/mul.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace mul {
|
||||
namespace {
|
||||
|
||||
constexpr int kInput1Tensor = 0;
|
||||
constexpr int kInput2Tensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input1_zero_point;
|
||||
int32_t input2_zero_point;
|
||||
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
int32_t output_zero_point;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
double real_multiplier = static_cast<double>(input1->params.scale) *
|
||||
static_cast<double>(input2->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
|
||||
data->input1_zero_point = input1->params.zero_point;
|
||||
data->input2_zero_point = input2->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
} else {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
op_params.input1_offset = -data->input1_zero_point;
|
||||
op_params.input2_offset = -data->input2_zero_point;
|
||||
op_params.output_offset = data->output_zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.float_activation_min = data->output_activation_min_f32;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (need_broadcast) {
|
||||
reference_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateOpData(context, node, params, data);
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input1->type) {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
EvalQuantized(context, node, data, input1, input2, output);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input1, input2, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace mul
|
||||
|
||||
TfLiteRegistration Register_MUL() {
|
||||
return {/*init=*/mul::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/mul::Prepare,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
66
components/tflite_micro/tensorflow/lite/micro/kernels/neg.cc
Normal file
66
components/tflite_micro/tensorflow/lite/micro/kernels/neg.cc
Normal file
@@ -0,0 +1,66 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/neg.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace neg {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (input->type) {
|
||||
// TODO(wangtz): handle for kTfLiteInt8
|
||||
case kTfLiteFloat32:
|
||||
reference_ops::Negate(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace neg
|
||||
|
||||
TfLiteRegistration Register_NEG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
127
components/tflite_micro/tensorflow/lite/micro/kernels/pack.cc
Normal file
127
components/tflite_micro/tensorflow/lite/micro/kernels/pack.cc
Normal file
@@ -0,0 +1,127 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pack {
|
||||
namespace {
|
||||
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteEvalTensor* output, int values_count, int axis) {
|
||||
const TfLiteEvalTensor* input0 =
|
||||
tflite::micro::GetEvalInput(context, node, 0);
|
||||
|
||||
const int dimensions = output->dims->size;
|
||||
const TfLiteIntArray* input_dims = input0->dims;
|
||||
const TfLiteIntArray* output_dims = output->dims;
|
||||
|
||||
if (axis < 0) {
|
||||
axis += dimensions;
|
||||
}
|
||||
|
||||
int outer_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outer_size *= output_dims->data[i];
|
||||
}
|
||||
int copy_size = 1;
|
||||
for (int i = axis + 1; i < dimensions; ++i) {
|
||||
copy_size *= output_dims->data[i];
|
||||
}
|
||||
int input_size = 1;
|
||||
for (int i = 0; i < input_dims->size; ++i) {
|
||||
input_size *= input_dims->data[i];
|
||||
}
|
||||
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
|
||||
|
||||
T* output_data = tflite::micro::GetTensorData<T>(output);
|
||||
|
||||
for (int i = 0; i < values_count; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
const T* input_data = tflite::micro::GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
const T* input_ptr = input_data + copy_size * k;
|
||||
int loc = k * values_count * copy_size + i * copy_size;
|
||||
T* output_ptr = output_data + loc;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLitePackParams* data =
|
||||
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
return PackImpl<float>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
return PackImpl<uint8_t>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
return PackImpl<int8_t>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
case kTfLiteInt32: {
|
||||
return PackImpl<int32_t>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
case kTfLiteInt64: {
|
||||
return PackImpl<int64_t>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by pack.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace pack
|
||||
|
||||
TfLiteRegistration Register_PACK() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
251
components/tflite_micro/tensorflow/lite/micro/kernels/pad.cc
Normal file
251
components/tflite_micro/tensorflow/lite/micro/kernels/pad.cc
Normal file
@@ -0,0 +1,251 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/pad.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pad {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
PadParams params;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
|
||||
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
|
||||
const TfLiteTensor* constant_values =
|
||||
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
|
||||
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <=
|
||||
reference_ops::PadKernelMaxDimensionCount());
|
||||
|
||||
if (constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
|
||||
}
|
||||
|
||||
// There must be a pair of paddings for each output dimension.
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
|
||||
output->dims->size * 2);
|
||||
|
||||
// On Micro, outputs must be properly sized by the converter.
|
||||
// NOTE: This data is only available because the paddings buffer is stored in
|
||||
// the flatbuffer:
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(paddings));
|
||||
const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
|
||||
for (int i = 0; i < output->dims->size; i++) {
|
||||
int output_dim = output->dims->data[i];
|
||||
int expected_dim =
|
||||
input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
|
||||
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
|
||||
}
|
||||
|
||||
// Calculate OpData:
|
||||
data->params.resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
data->params.resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
|
||||
const int num_input_dimensions = NumDimensions(input);
|
||||
data->params.left_padding_count = num_input_dimensions;
|
||||
data->params.right_padding_count = num_input_dimensions;
|
||||
|
||||
for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
|
||||
data->params.left_padding[idx] = paddings_data[idx * 2];
|
||||
data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
if (constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
} else {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
}
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
|
||||
static_cast<double>(constant_values->params.scale));
|
||||
}
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, /*index=*/0);
|
||||
const TfLiteEvalTensor* constant_values =
|
||||
NumInputs(node) == 3
|
||||
? tflite::micro::GetEvalInput(context, node, /*index=*/2)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, /*index=*/0);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
float pad_value =
|
||||
constant_values == nullptr
|
||||
? 0.f
|
||||
: *tflite::micro::GetTensorData<float>(constant_values);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
uint8_t pad_value;
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
pad_value = *tflite::micro::GetTensorData<uint8_t>(constant_values);
|
||||
}
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
int8_t pad_value;
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
|
||||
}
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt32: {
|
||||
int32_t pad_value =
|
||||
constant_values == nullptr
|
||||
? 0
|
||||
: *tflite::micro::GetTensorData<int32_t>(constant_values);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} break;
|
||||
default:
|
||||
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_PAD
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pad
|
||||
|
||||
TfLiteRegistration Register_PAD() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
// Also register Pad as PadV2.
|
||||
TfLiteRegistration Register_PADV2() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
267
components/tflite_micro/tensorflow/lite/micro/kernels/pooling.cc
Normal file
267
components/tflite_micro/tensorflow/lite/micro/kernels/pooling.cc
Normal file
@@ -0,0 +1,267 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pooling {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
int32_t activation_min;
|
||||
int32_t activation_max;
|
||||
float activation_min_f32;
|
||||
float activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
const TfLitePoolParams* params,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* output, OpData* data) {
|
||||
// input: batch, height, width, channel
|
||||
int height = SizeOfDimension(input, 1);
|
||||
int width = SizeOfDimension(input, 2);
|
||||
|
||||
int out_height, out_width;
|
||||
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
/*dilation_rate_height=*/1,
|
||||
/*dilation_rate_width=*/1, height, width, params->filter_height,
|
||||
params->filter_width, params->padding, &out_height, &out_width);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::AveragePool(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::MaxPool(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// Inputs and outputs share the same type, guaranteed by the converter.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
AverageEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
AverageEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
MaxEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
MaxEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation, &data->activation_min_f32,
|
||||
&data->activation_max_f32);
|
||||
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&data->activation_min,
|
||||
&data->activation_max);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MAX_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
166
components/tflite_micro/tensorflow/lite/micro/kernels/prelu.cc
Normal file
166
components/tflite_micro/tensorflow/lite/micro/kernels/prelu.cc
Normal file
@@ -0,0 +1,166 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
|
||||
const TfLiteTensor* alpha,
|
||||
TfLiteTensor* output, PreluParams* params) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt16) {
|
||||
double real_multiplier_1 = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
double real_multiplier_2 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier_1, ¶ms->output_multiplier_1,
|
||||
¶ms->output_shift_1);
|
||||
QuantizeMultiplier(real_multiplier_2, ¶ms->output_multiplier_2,
|
||||
¶ms->output_shift_2);
|
||||
|
||||
params->input_offset = -input->params.zero_point;
|
||||
params->alpha_offset = -alpha->params.zero_point;
|
||||
params->output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
inline void BroadcastPrelu4DSlowFloat(
|
||||
const RuntimeShape& unextended_input1_shape, const float* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape, const float* input2_data,
|
||||
const RuntimeShape& unextended_output_shape, float* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
|
||||
for (int b = 0; b < output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < output_shape.Dims(3); ++c) {
|
||||
auto out_idx = Offset(output_shape, b, y, x, c);
|
||||
auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
|
||||
auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
|
||||
auto in1_val = input1_data[in1_idx];
|
||||
auto in2_val = input2_data[in2_idx];
|
||||
output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(PreluParams));
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
PreluParams* params = static_cast<PreluParams*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* alpha = GetInput(context, node, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
return CalculatePreluParams(input, alpha, output, params);
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const PreluParams& params =
|
||||
*(static_cast<const PreluParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<float>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<uint8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<int8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Only float32 and uint8_t are supported currently, got %d.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration Register_PRELU() {
|
||||
return {/*init=*/activations::PreluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,178 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace quantize {
|
||||
|
||||
struct OpData {
|
||||
tflite::QuantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
|
||||
// Currently this only support affine per-layer quantization.
|
||||
TF_LITE_ENSURE_EQ(context, output->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
|
||||
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
|
||||
input->type == kTfLiteInt16 ||
|
||||
input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context,
|
||||
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
|
||||
|
||||
if ((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) &&
|
||||
output->type == kTfLiteInt8) {
|
||||
double effective_scale =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
|
||||
QuantizeMultiplier(effective_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = output->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
// Int8 to Int8 requantization, required if the input and output tensors
|
||||
// have different scales and/or zero points.
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace quantize
|
||||
|
||||
// This Op (QUANTIZE) quantizes the input and produces quantized output.
|
||||
// AffineQuantize takes scale and zero point and quantizes the float value to
|
||||
// quantized output, in int8_t or uint8_t format.
|
||||
TfLiteRegistration Register_QUANTIZE() {
|
||||
return {/*init=*/quantize::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/quantize::Prepare,
|
||||
/*invoke=*/quantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
139
components/tflite_micro/tensorflow/lite/micro/kernels/reduce.cc
Normal file
139
components/tflite_micro/tensorflow/lite/micro/kernels/reduce.cc
Normal file
@@ -0,0 +1,139 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/reduce.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace reduce {
|
||||
|
||||
constexpr int kMaxNumberOfAxis = 4;
|
||||
constexpr int kMaxNumberOfReducedAxis = 2;
|
||||
|
||||
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Inputs Tensor (dtype depends on quantization):
|
||||
// [0] = Input
|
||||
// [1] = Axis
|
||||
|
||||
// Outputs Tensor (dtype depends on quantization):
|
||||
// [0] = Output
|
||||
|
||||
// Validate number of inputs and outputs
|
||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Validate axis type
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
|
||||
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void ResolveAxis(const int* axis_data, int axis_count,
|
||||
tflite::MeanParams* op_params) {
|
||||
int i = 0;
|
||||
for (; i < axis_count; ++i) {
|
||||
op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
|
||||
}
|
||||
for (; i < 4; ++i) {
|
||||
op_params->axis[i] = 1;
|
||||
}
|
||||
op_params->axis_count = axis_count;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TfLiteReducerParams* params =
|
||||
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
|
||||
|
||||
int num_axis = static_cast<int>(ElementCount(*axis->dims));
|
||||
int temp_index[kMaxNumberOfAxis];
|
||||
int resolved_axis[kMaxNumberOfReducedAxis];
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::MeanParams op_params;
|
||||
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
&op_params);
|
||||
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
|
||||
// scratch tensor allocation has been implemented in (b/132070898)
|
||||
bool is_valid_inputs =
|
||||
(input->dims->size == 4 && op_params.axis_count == 2 &&
|
||||
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
|
||||
TF_LITE_ENSURE_MSG(
|
||||
context, is_valid_inputs == true,
|
||||
"Number of Input "
|
||||
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
|
||||
// TODO(b/139102329): Handle the below special case in the combined
|
||||
// reference method.
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims) {
|
||||
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(
|
||||
tflite::micro::GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
tflite::micro::GetTensorData<float>(output)));
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
|
||||
TF_LITE_ENSURE_MSG(context, false,
|
||||
"Currently, only float32 input type "
|
||||
"is supported.");
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace reduce
|
||||
|
||||
TfLiteRegistration Register_MEAN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
116
components/tflite_micro/tensorflow/lite/micro/kernels/reshape.cc
Normal file
116
components/tflite_micro/tensorflow/lite/micro/kernels/reshape.cc
Normal file
@@ -0,0 +1,116 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace reshape {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
// Tensorflow's Reshape allows one of the shape components to have the
|
||||
// special -1 value, meaning it will be calculated automatically based on the
|
||||
// input. Here we calculate what that dimension should be so that the number
|
||||
// of output elements in the same as the number of input elements.
|
||||
int num_input_elements = NumElements(input);
|
||||
TfLiteIntArray* output_shape = output->dims;
|
||||
|
||||
if (NumInputs(node) == 1 && // Legacy scalar supported with params.
|
||||
output_shape->size == 1 && output_shape->data[0] == 0) {
|
||||
// Legacy tflite models use a shape parameter of [0] to indicate scalars,
|
||||
// so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during
|
||||
// toco conversion.
|
||||
output_shape->size = 0;
|
||||
}
|
||||
|
||||
int num_output_elements = 1;
|
||||
int stretch_dim = -1;
|
||||
for (int i = 0; i < output_shape->size; ++i) {
|
||||
int value = output_shape->data[i];
|
||||
if (value == -1) {
|
||||
TF_LITE_ENSURE_EQ(context, stretch_dim, -1);
|
||||
stretch_dim = i;
|
||||
} else {
|
||||
num_output_elements *= value;
|
||||
}
|
||||
}
|
||||
if (stretch_dim != -1) {
|
||||
output_shape->data[stretch_dim] = num_input_elements / num_output_elements;
|
||||
num_output_elements *= output_shape->data[stretch_dim];
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/162522304): storing input bytes in OpData increases some models
|
||||
// significantly, possibly due to alignment issues.
|
||||
size_t input_bytes;
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
|
||||
input_bytes *= ElementCount(*input->dims);
|
||||
|
||||
// Do nothing for in-place reshape.
|
||||
if (input->data.raw != output->data.raw) {
|
||||
// Otherwise perform reshape with copy.
|
||||
for (size_t i = 0; i < input_bytes; ++i) {
|
||||
output->data.raw[i] = input->data.raw[i];
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace reshape
|
||||
|
||||
TfLiteRegistration Register_RESHAPE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,123 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace resize_nearest_neighbor {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kSizeTensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
// Our current implementations rely on the input being 4D,
|
||||
// and the size being 1D tensor with exactly 2 elements.
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
|
||||
TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
|
||||
TF_LITE_ENSURE_EQ(context, size->dims->data[0], 2);
|
||||
|
||||
output->type = input->type;
|
||||
|
||||
if (!IsConstantTensor(size)) {
|
||||
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* size =
|
||||
tflite::micro::GetEvalInput(context, node, kSizeTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
tflite::ResizeNearestNeighborParams op_params;
|
||||
op_params.align_corners = params->align_corners;
|
||||
op_params.half_pixel_centers = false;
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Output type is %d, requires float, uint8_t or int8_t.",
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace resize_nearest_neighbor
|
||||
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,74 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/round.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace round {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Round(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace round
|
||||
|
||||
TfLiteRegistration Register_ROUND() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
169
components/tflite_micro/tensorflow/lite/micro/kernels/softmax.cc
Normal file
169
components/tflite_micro/tensorflow/lite/micro/kernels/softmax.cc
Normal file
@@ -0,0 +1,169 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
const TfLiteSoftmaxParams* params,
|
||||
SoftmaxParams* op_data) {
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
|
||||
// NOTE: Current int16_t softmax output does not require symmetric
|
||||
// scaling
|
||||
// - so no need to verify scale here.
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
|
||||
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
|
||||
}
|
||||
}
|
||||
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
op_data->beta = static_cast<double>(params->beta);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
return CalculateSoftmaxParams(context, input, output, params, data);
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
SoftmaxFloat(input, output, *data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8:
|
||||
case kTfLiteUInt8: {
|
||||
SoftmaxQuantized(input, output, *data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration Register_SOFTMAX() {
|
||||
return {/*init=*/activations::SoftmaxInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::SoftmaxPrepare,
|
||||
/*invoke=*/activations::SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
134
components/tflite_micro/tensorflow/lite/micro/kernels/split.cc
Normal file
134
components/tflite_micro/tensorflow/lite/micro/kernels/split.cc
Normal file
@@ -0,0 +1,134 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace split {
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input, int axis_value) {
|
||||
const int output_count = NumOutputs(node);
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const TfLiteIntArray* output_dims = output0->dims;
|
||||
|
||||
const int split_dimensions = input_dims->size;
|
||||
int axis = axis_value < 0 ? axis_value + split_dimensions : axis_value;
|
||||
|
||||
TFLITE_DCHECK_LT(axis, split_dimensions);
|
||||
TFLITE_DCHECK_EQ(output_dims->size, split_dimensions);
|
||||
|
||||
int64_t split_size = output_dims->data[axis] * output_count;
|
||||
|
||||
TFLITE_DCHECK_EQ(split_size, input_dims->data[axis]);
|
||||
int64_t outer_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outer_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
int64_t base_inner_size = 1;
|
||||
for (int i = axis + 1; i < split_dimensions; ++i) {
|
||||
base_inner_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(t);
|
||||
const int copy_size = output_dims->data[axis] * base_inner_size;
|
||||
T* output_ptr = output_data + k * copy_size;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
input_ptr += copy_size;
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* axis = GetInput(context, node, 0);
|
||||
|
||||
// Dynamic output tensors are needed if axis tensor is not constant.
|
||||
// But Micro doesn't support dynamic memory allocation, so we only support
|
||||
// constant axis tensor for now.
|
||||
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
|
||||
"Non constant axis tensor not supported");
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1);
|
||||
|
||||
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
|
||||
if (axis_value < 0) {
|
||||
axis_value += input->dims->size;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE(context, axis_value >= 0);
|
||||
TF_LITE_ENSURE(context, axis_value < input->dims->size);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
return SplitImpl<float>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
return SplitImpl<uint8_t>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
return SplitImpl<int8_t>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt16: {
|
||||
return SplitImpl<int16_t>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt32: {
|
||||
return SplitImpl<int32_t>(context, node, input, axis_value);
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s currently not supported.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_SPLIT
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace split
|
||||
|
||||
TfLiteRegistration Register_SPLIT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/split::Prepare,
|
||||
/*invoke=*/split::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
@@ -0,0 +1,192 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace strided_slice {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kBeginTensor = 1;
|
||||
constexpr int kEndTensor = 2;
|
||||
constexpr int kStridesTensor = 3;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct StridedSliceContext {
|
||||
StridedSliceContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
params = reinterpret_cast<TfLiteStridedSliceParams*>(node->builtin_data);
|
||||
input = GetInput(context, node, kInputTensor);
|
||||
begin = GetInput(context, node, kBeginTensor);
|
||||
end = GetInput(context, node, kEndTensor);
|
||||
strides = GetInput(context, node, kStridesTensor);
|
||||
output = GetOutput(context, node, kOutputTensor);
|
||||
dims = NumDimensions(input);
|
||||
}
|
||||
const TfLiteStridedSliceParams* params;
|
||||
const TfLiteTensor* input;
|
||||
const TfLiteTensor* begin;
|
||||
const TfLiteTensor* end;
|
||||
const TfLiteTensor* strides;
|
||||
TfLiteTensor* output;
|
||||
int dims;
|
||||
};
|
||||
|
||||
// This Op only supports 1-4D cases and since we use the reference 4D
|
||||
// implementation, the 1-3D tensors are mapped to 4D.
|
||||
const int kMaxDim = 4;
|
||||
|
||||
tflite::StridedSliceParams BuildStridedSliceParams(
|
||||
StridedSliceContext* op_context) {
|
||||
tflite::StridedSliceParams op_params;
|
||||
op_params.start_indices_count = op_context->dims;
|
||||
op_params.stop_indices_count = op_context->dims;
|
||||
op_params.strides_count = op_context->dims;
|
||||
|
||||
for (int i = 0; i < op_context->dims; ++i) {
|
||||
op_params.start_indices[i] = GetTensorData<int32_t>(op_context->begin)[i];
|
||||
op_params.stop_indices[i] = GetTensorData<int32_t>(op_context->end)[i];
|
||||
op_params.strides[i] = GetTensorData<int32_t>(op_context->strides)[i];
|
||||
}
|
||||
|
||||
op_params.begin_mask = op_context->params->begin_mask;
|
||||
op_params.ellipsis_mask = 0;
|
||||
op_params.end_mask = op_context->params->end_mask;
|
||||
op_params.new_axis_mask = 0;
|
||||
op_params.shrink_axis_mask = op_context->params->shrink_axis_mask;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
// Processes the indexing tensors (begin, end and strides) to resize the
|
||||
// output tensor. This function is callable from both Prepare() and Eval() as
|
||||
// long as the caller ensures the indexing tensors are present.
|
||||
TfLiteStatus CheckOutputSize(TfLiteContext* context,
|
||||
StridedSliceContext* op_context) {
|
||||
using ::tflite::strided_slice::StartForAxis;
|
||||
using ::tflite::strided_slice::StopForAxis;
|
||||
TfLiteIntArray* output_shape = op_context->output->dims;
|
||||
int shape_size = 0;
|
||||
auto op_params = BuildStridedSliceParams(op_context);
|
||||
auto input_shape = GetTensorShape(op_context->input);
|
||||
for (int idx = 0; idx < op_context->dims; ++idx) {
|
||||
int32_t stride = GetTensorData<int32_t>(op_context->strides)[idx];
|
||||
TF_LITE_ENSURE_MSG(context, stride != 0, "stride value has to be non-zero");
|
||||
int32_t begin = StartForAxis(op_params, input_shape, idx);
|
||||
int32_t end = StopForAxis(op_params, input_shape, idx, begin);
|
||||
|
||||
// When shrinking an axis, the end position does not matter (and can be
|
||||
// incorrect when negative indexing is used, see Issue #19260). Always use
|
||||
// begin + 1 to generate a length 1 slice, since begin has
|
||||
// already been adjusted for negative indices by StartForAxis.
|
||||
const bool shrink_axis = op_context->params->shrink_axis_mask & (1 << idx);
|
||||
if (shrink_axis) {
|
||||
end = begin + 1;
|
||||
}
|
||||
|
||||
// This is valid for both positive and negative strides
|
||||
int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
|
||||
dim_shape = dim_shape < 0 ? 0 : dim_shape;
|
||||
if (!shrink_axis) {
|
||||
TF_LITE_ENSURE_EQ(context, output_shape->data[shape_size], dim_shape);
|
||||
shape_size++;
|
||||
}
|
||||
}
|
||||
TF_LITE_ENSURE_EQ(context, output_shape->size, shape_size);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(StridedSliceParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
StridedSliceParams* op_params =
|
||||
static_cast<StridedSliceParams*>(node->user_data);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 4);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
StridedSliceContext op_context(context, node);
|
||||
TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim,
|
||||
"input dim should not exceed 4");
|
||||
auto params = BuildStridedSliceParams(&op_context);
|
||||
memcpy(op_params, ¶ms, sizeof(StridedSliceParams));
|
||||
return CheckOutputSize(context, &op_context);
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const StridedSliceParams& op_params =
|
||||
*(static_cast<const StridedSliceParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32:
|
||||
reference_ops::StridedSlice(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::StridedSlice(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
reference_ops::StridedSlice(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace strided_slice
|
||||
|
||||
TfLiteRegistration Register_STRIDED_SLICE() {
|
||||
return {/*init=*/strided_slice::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/strided_slice::Prepare,
|
||||
/*invoke=*/strided_slice::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
253
components/tflite_micro/tensorflow/lite/micro/kernels/sub.cc
Normal file
253
components/tflite_micro/tensorflow/lite/micro/kernels/sub.cc
Normal file
@@ -0,0 +1,253 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/sub.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace sub {
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
bool requires_broadcast;
|
||||
|
||||
// These fields are used in both the general 8-bit -> 8bit quantized path,
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output,
|
||||
OpData* data) {
|
||||
data->requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
// 8bit -> 8bit general quantized path, with general rescalings
|
||||
data->input1_offset = -input1->params.zero_point;
|
||||
data->input2_offset = -input2->params.zero_point;
|
||||
data->output_offset = output->params.zero_point;
|
||||
data->left_shift = 20;
|
||||
const float twice_max_input_scale =
|
||||
2 * std::max(input1->params.scale, input2->params.scale);
|
||||
const double real_input1_multiplier =
|
||||
static_cast<double>(input1->params.scale / twice_max_input_scale);
|
||||
const double real_input2_multiplier =
|
||||
static_cast<double>(input2->params.scale / twice_max_input_scale);
|
||||
const double real_output_multiplier =
|
||||
static_cast<double>(twice_max_input_scale /
|
||||
((1 << data->left_shift) * output->params.scale));
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_output_multiplier, &data->output_multiplier, &data->output_shift);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
if (data->requires_broadcast) {
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
tflite::reference_ops::SubWithActivation(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteSubParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
op_params.input1_offset = data->input1_offset;
|
||||
op_params.input1_multiplier = data->input1_multiplier;
|
||||
op_params.input1_shift = data->input1_shift;
|
||||
op_params.input2_offset = data->input2_offset;
|
||||
op_params.input2_multiplier = data->input2_multiplier;
|
||||
op_params.input2_shift = data->input2_shift;
|
||||
op_params.output_offset = data->output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalSub(context, node, params, &data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace sub
|
||||
|
||||
TfLiteRegistration Register_SUB() {
|
||||
return {/*init=*/sub::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/sub::Prepare,
|
||||
/*invoke=*/sub::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
548
components/tflite_micro/tensorflow/lite/micro/kernels/svdf.cc
Normal file
548
components/tflite_micro/tensorflow/lite/micro/kernels/svdf.cc
Normal file
@@ -0,0 +1,548 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace svdf {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
int32_t effective_scale_1_a;
|
||||
int32_t effective_scale_2_a;
|
||||
// b versions of each scale are kept at int since the numbers are just the
|
||||
// shift value - typically between [-32, 32].
|
||||
int effective_scale_1_b;
|
||||
int effective_scale_2_b;
|
||||
int scratch_tensor_index;
|
||||
int scratch_output_tensor_index;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int input_zero_point;
|
||||
int output_zero_point;
|
||||
};
|
||||
|
||||
/**
|
||||
* This version of SVDF is specific to TFLite Micro. It contains the following
|
||||
* differences between the TFLite version:
|
||||
*
|
||||
* 1.) Scratch tensor allocation - scratch tensors must be known ahead of time
|
||||
* for the Micro interpreter.
|
||||
* 2.) Output dimensions - the TFLite version determines output size and runtime
|
||||
* and resizes the output tensor. Micro runtime does not support tensor
|
||||
* resizing.
|
||||
*/
|
||||
static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
int batch_size, int memory_size, int num_filters, int num_units, int rank,
|
||||
const float* const __restrict__ weights_time_ptr,
|
||||
const float* const __restrict__ bias_ptr, TfLiteFusedActivation activation,
|
||||
float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr,
|
||||
float* const __restrict__ output_ptr) {
|
||||
// Compute matmul(activation_state, weights_time).
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
// Perform batched vector dot product:
|
||||
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||
const float* vector1_ptr = weights_time_ptr;
|
||||
const float* vector2_ptr = state_ptr + b * memory_size * num_filters;
|
||||
for (int i = 0; i < num_filters; ++i) {
|
||||
*scratch_ptr_batch = 0.f;
|
||||
for (int j = 0; j < memory_size; ++j) {
|
||||
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
|
||||
}
|
||||
scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize output with bias if provided.
|
||||
if (bias_ptr) {
|
||||
// VectorBatchVectorAssign
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
float* output_data = output_ptr + i * num_units;
|
||||
const float* bias_data = bias_ptr;
|
||||
for (int j = 0; j < num_units; ++j) {
|
||||
*output_data++ = *bias_data++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
float* output_data = output_ptr;
|
||||
for (int i = 0; i < batch_size * num_units; ++i) {
|
||||
*output_data++ = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
// Reduction sum.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||
|
||||
// Reduction sum vector
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
for (int j = 0; j < rank; j++) {
|
||||
output_ptr_batch[i] += *scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply activation.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
*output_ptr_batch = ActivationValFloat(activation, *output_ptr_batch);
|
||||
++output_ptr_batch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void EvalFloatSVDF(
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* weights_feature,
|
||||
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
|
||||
const TfLiteSVDFParams* params, int scratch_tensor_index,
|
||||
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
|
||||
const int rank = params->rank;
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int input_size = input->dims->data[1];
|
||||
const int num_filters = weights_feature->dims->data[0];
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
const float* weights_feature_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
|
||||
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
|
||||
|
||||
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
|
||||
float* scratch_ptr = static_cast<float*>(
|
||||
context->GetScratchBuffer(context, scratch_tensor_index));
|
||||
|
||||
float* output_ptr = tflite::micro::GetTensorData<float>(output);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
float* new_state_start = state_ptr;
|
||||
const float* old_state_start = state_ptr + 1;
|
||||
const float* old_state_end =
|
||||
state_ptr + batch_size * num_filters * memory_size;
|
||||
while (old_state_start != old_state_end) {
|
||||
*new_state_start++ = *old_state_start++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no need to clear the latest activation, matmul is not accumulative.
|
||||
|
||||
// Compute conv1d(inputs, weights_feature).
|
||||
// The activation_state's rightmost column is used to save current cycle
|
||||
// activation. This is achieved by starting at state_ptr[memory_size - 1] and
|
||||
// having the stride equal to memory_size.
|
||||
|
||||
// Perform batched matrix vector multiply operation:
|
||||
{
|
||||
const float* matrix = weights_feature_ptr;
|
||||
const float* vector = input_ptr;
|
||||
float* result = &state_ptr[memory_size - 1];
|
||||
float* result_in_batch = result;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
const float* matrix_ptr = matrix;
|
||||
for (int j = 0; j < num_filters; ++j) {
|
||||
float dot_prod = 0.0f;
|
||||
const float* vector_in_batch = vector + i * input_size;
|
||||
for (int k = 0; k < input_size; ++k) {
|
||||
dot_prod += *matrix_ptr++ * *vector_in_batch++;
|
||||
}
|
||||
*result_in_batch = dot_prod;
|
||||
result_in_batch += memory_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ApplyTimeWeightsBiasAndActivation(
|
||||
batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr,
|
||||
bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr);
|
||||
}
|
||||
|
||||
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor, const OpData& data) {
|
||||
const int n_rank = params->rank;
|
||||
const int n_batch = input_tensor->dims->data[0];
|
||||
const int n_input = input_tensor->dims->data[1];
|
||||
const int n_filter = weights_feature_tensor->dims->data[0];
|
||||
const int n_unit = n_filter / n_rank;
|
||||
const int n_memory = weights_time_tensor->dims->data[1];
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
|
||||
int32_t* scratch_tensor = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, data.scratch_tensor_index));
|
||||
int32_t* scratch_output_tensor = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
|
||||
|
||||
// Shift states.
|
||||
int16_t* const state_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
int16_t* new_state_start = state_ptr;
|
||||
const int16_t* old_state_start = state_ptr + 1;
|
||||
const int16_t* old_state_end = state_ptr + n_batch * n_filter * n_memory;
|
||||
while (old_state_start != old_state_end) {
|
||||
*new_state_start++ = *old_state_start++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no need to clear the latest activation, matmul is not accumulative.
|
||||
|
||||
// Feature matmul.
|
||||
{
|
||||
int16_t* state =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
|
||||
const int8_t* weight_feature =
|
||||
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
|
||||
const int32_t output_max = std::numeric_limits<int16_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int16_t>::min();
|
||||
int16_t* result_in_batch = state + (n_memory - 1);
|
||||
for (int b = 0; b < n_batch; b++) {
|
||||
const int8_t* matrix_ptr = weight_feature;
|
||||
for (int r = 0; r < n_filter; r++) {
|
||||
int32_t dot_prod = 0;
|
||||
const int8_t* vector_in_batch = input + b * n_input;
|
||||
for (int c = 0; c < n_input; c++) {
|
||||
dot_prod +=
|
||||
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
|
||||
}
|
||||
dot_prod = MultiplyByQuantizedMultiplier(
|
||||
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
|
||||
dot_prod = std::min(std::max(output_min, dot_prod), output_max);
|
||||
// This assumes state is symmetrically quantized. Otherwise last bit of
|
||||
// state should be initialized to its zero point and accumulate the
|
||||
// dot_prod.
|
||||
// Equivalent as the following:
|
||||
// result_in_batch = zero point, which happens to be zero.
|
||||
// result_in_batch += dot_prod_56.
|
||||
*result_in_batch = dot_prod;
|
||||
result_in_batch += n_memory;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Time.
|
||||
{
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Perform batched vector dot product:
|
||||
const int16_t* vector1_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
|
||||
const int16_t* vector2_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
|
||||
b * n_memory * n_filter;
|
||||
|
||||
for (int i = 0; i < n_filter; i++) {
|
||||
*scratch_ptr_batch = 0;
|
||||
for (int j = 0; j < n_memory; j++) {
|
||||
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
|
||||
}
|
||||
scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce, add bias, rescale, activation.
|
||||
{
|
||||
// Add bias.
|
||||
if (bias_tensor) {
|
||||
// Vector batch assign:
|
||||
const int32_t* bias_data =
|
||||
tflite::micro::GetTensorData<int32_t>(bias_tensor);
|
||||
for (int i = 0; i < n_batch; ++i) {
|
||||
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
|
||||
const int32_t* bias_ptr = bias_data;
|
||||
for (int j = 0; j < n_unit; ++j) {
|
||||
*output_ptr++ = *bias_ptr++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int32_t* output_ptr = scratch_output_tensor;
|
||||
for (int i = 0; i < n_batch * n_unit; ++i) {
|
||||
*output_ptr++ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce.
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit;
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Reduction sum vector
|
||||
for (int i = 0; i < n_unit; ++i) {
|
||||
for (int j = 0; j < n_rank; ++j) {
|
||||
output_temp_ptr[i] += *scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rescale.
|
||||
const int32_t output_max = std::numeric_limits<int8_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int8_t>::min();
|
||||
for (int i = 0; i < n_batch * n_unit; ++i) {
|
||||
int32_t x1 = scratch_output_tensor[i];
|
||||
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
|
||||
data.effective_scale_2_b);
|
||||
int32_t x3 = x2 + data.output_zero_point;
|
||||
int32_t x4 = std::min(std::max(output_min, x3), output_max);
|
||||
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
|
||||
static_cast<int8_t>(x4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Input tensors.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsFeatureTensor = 1;
|
||||
constexpr int kWeightsTimeTensor = 2;
|
||||
constexpr int kBiasTensor = 3;
|
||||
// This is a variable tensor, and will be modified by this op.
|
||||
constexpr int kInputActivationStateTensor = 4;
|
||||
|
||||
// Output tensor.
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
|
||||
|
||||
// Validate Tensor Inputs (dtype depends on quantization):
|
||||
// [0] = Input, {2, batch_size, input_size}
|
||||
// [1] = Weights Feature, {2, num_filters, input_size}
|
||||
// [2] = Weights Time, {2, num_filters, memory_size}
|
||||
// [3] = Bias (optional), {1, num_units}
|
||||
// [4] = Activation State (variable),
|
||||
// {2, batch_size, memory_size * num_filters}
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* weights_feature =
|
||||
GetInput(context, node, kWeightsFeatureTensor);
|
||||
const TfLiteTensor* weights_time =
|
||||
GetInput(context, node, kWeightsTimeTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
const TfLiteTensor* activation_state =
|
||||
GetInput(context, node, kInputActivationStateTensor);
|
||||
|
||||
// Define input constants based on input tensor definition above:
|
||||
const int rank = params->rank;
|
||||
const int input_size = input->dims->data[1];
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int num_filters = weights_feature->dims->data[0];
|
||||
TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
// Validate Input Tensor:
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
|
||||
|
||||
// Validate Tensor Output:
|
||||
// [0] = float/int8_t, {2, batch_size, num_units}
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
|
||||
|
||||
// Validate Weights Feature Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2);
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size);
|
||||
|
||||
// Validate Weights Time Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
|
||||
|
||||
// Validate Optional Bias Input Tensor:
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
|
||||
}
|
||||
|
||||
// Validate Activation State Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
|
||||
memory_size * num_filters);
|
||||
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
|
||||
const double effective_scale_1 = static_cast<double>(
|
||||
input->params.scale * weights_feature->params.scale /
|
||||
activation_state->params.scale);
|
||||
const double effective_scale_2 =
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale / output->params.scale);
|
||||
|
||||
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
std::abs(static_cast<double>(bias->params.scale) -
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale)) < 1e-5);
|
||||
|
||||
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
|
||||
&(data->effective_scale_1_b));
|
||||
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
|
||||
&(data->effective_scale_2_b));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_filters * sizeof(int32_t),
|
||||
&(data->scratch_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||
|
||||
const TfLiteStatus scratch_output_status =
|
||||
context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_units * sizeof(int32_t),
|
||||
&(data->scratch_output_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_output_status);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
|
||||
}
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_filters * sizeof(float),
|
||||
&(data->scratch_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* weights_feature =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsFeatureTensor);
|
||||
const TfLiteEvalTensor* weights_time =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 5)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
|
||||
context, node, kInputActivationStateTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (weights_feature->type) {
|
||||
case kTfLiteFloat32: {
|
||||
EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
params, data.scratch_tensor_index, activation_state,
|
||||
output);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
|
||||
case kTfLiteInt8: {
|
||||
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
params, activation_state, output, data);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
|
||||
TfLiteTypeGetName(weights_feature->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace svdf
|
||||
|
||||
TfLiteRegistration Register_SVDF() {
|
||||
return {/*init=*/svdf::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/svdf::Prepare,
|
||||
/*invoke=*/svdf::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
154
components/tflite_micro/tensorflow/lite/micro/kernels/tanh.cc
Normal file
154
components/tflite_micro/tensorflow/lite/micro/kernels/tanh.cc
Normal file
@@ -0,0 +1,154 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/tanh.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* data) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
static constexpr int kInputIntegerBits = 4;
|
||||
const double input_real_multiplier =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
data->input_range_radius =
|
||||
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return CalculateArithmeticOpData(context, node, data);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Tanh(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt16: {
|
||||
TanhParams params;
|
||||
params.input_left_shift = data.input_left_shift;
|
||||
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
TanhParams params;
|
||||
params.input_zero_point = data.input_zero_point;
|
||||
params.input_range_radius = data.input_range_radius;
|
||||
params.input_multiplier = data.input_multiplier;
|
||||
params.input_left_shift = data.input_left_shift;
|
||||
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Tanh(
|
||||
data.input_zero_point, data.input_range_radius, data.input_multiplier,
|
||||
data.input_left_shift, NumElements(input->dims),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration Register_TANH() {
|
||||
return {/*init=*/activations::TanhInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::TanhPrepare,
|
||||
/*invoke=*/activations::TanhEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
121
components/tflite_micro/tensorflow/lite/micro/kernels/unpack.cc
Normal file
121
components/tflite_micro/tensorflow/lite/micro/kernels/unpack.cc
Normal file
@@ -0,0 +1,121 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace unpack {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input, int output_count,
|
||||
int axis) {
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteIntArray* output_dims = output0->dims;
|
||||
const int dimensions = input_dims->size;
|
||||
|
||||
if (axis < 0) {
|
||||
axis += input->dims->size;
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_LT(axis, dimensions);
|
||||
|
||||
int outer_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outer_size *= input_dims->data[i];
|
||||
}
|
||||
int copy_size = 1;
|
||||
for (int i = axis + 1; i < dimensions; ++i) {
|
||||
copy_size *= input_dims->data[i];
|
||||
}
|
||||
int output_size = 1;
|
||||
for (int i = 0; i < output_dims->size; ++i) {
|
||||
output_size *= output_dims->data[i];
|
||||
}
|
||||
TFLITE_DCHECK_EQ(output_size, copy_size * outer_size);
|
||||
|
||||
const T* input_data = tflite::micro::GetTensorData<T>(input);
|
||||
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
T* output_ptr = output_data + copy_size * k;
|
||||
int loc = k * output_count * copy_size + i * copy_size;
|
||||
const T* input_ptr = input_data + loc;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteUnpackParams* data =
|
||||
reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
return UnpackImpl<float>(context, node, input, data->num, data->axis);
|
||||
}
|
||||
case kTfLiteInt32: {
|
||||
return UnpackImpl<int32_t>(context, node, input, data->num, data->axis);
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
return UnpackImpl<uint8_t>(context, node, input, data->num, data->axis);
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
return UnpackImpl<int8_t>(context, node, input, data->num, data->axis);
|
||||
}
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by unpack.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace unpack
|
||||
|
||||
TfLiteRegistration Register_UNPACK() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/unpack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
Reference in New Issue
Block a user