tflite micro integrate repo

This commit is contained in:
QingChuanWS
2020-12-09 15:06:27 +08:00
parent 200c0ff460
commit d80d0af1a6
406 changed files with 58235 additions and 1908 deletions

View File

@@ -0,0 +1,91 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
namespace tflite {
namespace ops {
namespace micro {
namespace custom {
TfLiteRegistration* Register_ETHOSU();
const char* GetString_ETHOSU();
} // namespace custom
} // namespace micro
} // namespace ops
AllOpsResolver::AllOpsResolver() {
// Please keep this list of Builtin Operators in alphabetical order.
AddAbs();
AddAdd();
AddArgMax();
AddArgMin();
AddAveragePool2D();
AddCeil();
AddConcatenation();
AddConv2D();
AddCos();
AddDepthwiseConv2D();
AddDequantize();
AddEqual();
AddFloor();
AddFullyConnected();
AddGreater();
AddGreaterEqual();
AddHardSwish();
AddL2Normalization();
AddLess();
AddLessEqual();
AddLog();
AddLogicalAnd();
AddLogicalNot();
AddLogicalOr();
AddLogistic();
AddMaximum();
AddMaxPool2D();
AddMean();
AddMinimum();
AddMul();
AddNeg();
AddNotEqual();
AddPack();
AddPad();
AddPadV2();
AddPrelu();
AddQuantize();
AddRelu();
AddRelu6();
AddReshape();
AddResizeNearestNeighbor();
AddRound();
AddRsqrt();
AddSin();
AddSoftmax();
AddSplit();
AddSqrt();
AddSquare();
AddStridedSlice();
AddSub();
AddSvdf();
AddTanh();
AddUnpack();
// TODO(b/159644355): Figure out if custom Ops belong in AllOpsResolver.
TfLiteRegistration* registration =
tflite::ops::micro::custom::Register_ETHOSU();
if (registration) {
AddCustom(tflite::ops::micro::custom::GetString_ETHOSU(), registration);
}
}
} // namespace tflite

View File

@@ -0,0 +1,35 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
namespace tflite {
// The magic number in the template parameter is the maximum number of ops that
// can be added to AllOpsResolver. It can be increased if needed. And most
// applications that care about the memory footprint will want to directly use
// MicroMutableOpResolver and have an application specific template parameter.
// The examples directory has sample code for this.
class AllOpsResolver : public MicroMutableOpResolver<128> {
public:
AllOpsResolver();
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_

View File

@@ -0,0 +1,22 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
#define TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
extern const unsigned char g_keyword_scrambled_model_data[];
extern const unsigned int g_keyword_scrambled_model_data_length;
#endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_

View File

@@ -0,0 +1,32 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
// C++ will automatically create class-specific delete operators for virtual
// objects, which by default call the global delete function. For embedded
// applications we want to avoid this, and won't be calling new/delete on these
// objects, so we need to override the default implementation with one that does
// nothing to avoid linking in ::delete().
// This macro needs to be included in all subclasses of a virtual base class in
// the private section.
#ifdef TF_LITE_STATIC_MEMORY
#define TF_LITE_REMOVE_VIRTUAL_DELETE \
void operator delete(void* p) {}
#else
#define TF_LITE_REMOVE_VIRTUAL_DELETE
#endif
#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_

View File

@@ -0,0 +1,27 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/debug_log.h"
#include "stdio.h"
#ifdef __cplusplus
extern "C" {
#endif
void DebugLog(const char *s) { printf("%s", s); }
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,23 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
// This function should be implemented by each target platform, and provide a
// way for strings to be output to some text stream. For more information, see
// tensorflow/lite/micro/debug_log.cc.
extern "C" void DebugLog(const char* s);
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_

View File

@@ -0,0 +1,25 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/examples/person_detection_experimental/detection_responder.h"
// This dummy implementation writes person and no person scores to the error
// console. Real applications will want to take some custom action instead, and
// should implement their own versions of this function.
void RespondToDetection(tflite::ErrorReporter* error_reporter,
int8_t person_score, int8_t no_person_score) {
TF_LITE_REPORT_ERROR(error_reporter, "person score:%d no person score %d",
person_score, no_person_score);
}

View File

@@ -0,0 +1,34 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Provides an interface to take an action based on the output from the person
// detection model.
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
// Called every time the results of a person detection run are available. The
// `person_score` has the numerical confidence that the captured image contains
// a person, and `no_person_score` has the numerical confidence that the image
// does not contain a person. Typically if person_score > no person score, the
// image is considered to contain a person. This threshold may be adjusted for
// particular applications.
void RespondToDetection(tflite::ErrorReporter* error_reporter,
int8_t person_score, int8_t no_person_score);
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_

View File

@@ -0,0 +1,26 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/examples/person_detection_experimental/image_provider.h"
#include "tensorflow/lite/micro/examples/person_detection_experimental/model_settings.h"
TfLiteStatus GetImage(tflite::ErrorReporter* error_reporter, int image_width,
int image_height, int channels, int8_t* image_data,
uint8_t * hardware_input) {
for (int i = 0; i < image_width * image_height * channels; ++i) {
image_data[i] = hardware_input[i];
}
return kTfLiteOk;
}

View File

@@ -0,0 +1,40 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
// This is an abstraction around an image source like a camera, and is
// expected to return 8-bit sample data. The assumption is that this will be
// called in a low duty-cycle fashion in a low-power application. In these
// cases, the imaging sensor need not be run in a streaming mode, but rather can
// be idled in a relatively low-power mode between calls to GetImage(). The
// assumption is that the overhead and time of bringing the low-power sensor out
// of this standby mode is commensurate with the expected duty cycle of the
// application. The underlying sensor may actually be put into a streaming
// configuration, but the image buffer provided to GetImage should not be
// overwritten by the driver code until the next call to GetImage();
//
// The reference implementation can have no platform-specific dependencies, so
// it just returns a static image. For real applications, you should
// ensure there's a specialized implementation that accesses hardware APIs.
TfLiteStatus GetImage(tflite::ErrorReporter* error_reporter, int image_width,
int image_height, int channels, int8_t* image_data,
uint8_t * hardware_input);
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_

View File

@@ -0,0 +1,119 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/examples/person_detection_experimental/main_functions.h"
#include "tensorflow/lite/micro/examples/person_detection_experimental/detection_responder.h"
#include "tensorflow/lite/micro/examples/person_detection_experimental/image_provider.h"
#include "tensorflow/lite/micro/examples/person_detection_experimental/model_settings.h"
#include "tensorflow/lite/micro/examples/person_detection_experimental/person_detect_model_data.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/version.h"
// Globals, used for compatibility with Arduino-style sketches.
namespace {
tflite::ErrorReporter* error_reporter = nullptr;
const tflite::Model* model = nullptr;
tflite::MicroInterpreter* interpreter = nullptr;
TfLiteTensor* input = nullptr;
// In order to use optimized tensorflow lite kernels, a signed int8_t quantized
// model is preferred over the legacy unsigned model format. This means that
// throughout this project, input images must be converted from unisgned to
// signed format. The easiest and quickest way to convert from unsigned to
// signed 8-bit integers is to subtract 128 from the unsigned value to get a
// signed value.
// An area of memory to use for input, output, and intermediate arrays.
constexpr int kTensorArenaSize = 115 * 1024;
static uint8_t tensor_arena[kTensorArenaSize];
} // namespace
// The name of this function is important for Arduino compatibility.
void person_detect_init() {
// Set up logging. Google style is to avoid globals or statics because of
// lifetime uncertainty, but since this has a trivial destructor it's okay.
// NOLINTNEXTLINE(runtime-global-variables)
static tflite::MicroErrorReporter micro_error_reporter;
error_reporter = &micro_error_reporter;
// Map the model into a usable data structure. This doesn't involve any
// copying or parsing, it's a very lightweight operation.
model = tflite::GetModel(g_person_detect_model_data);
if (model->version() != TFLITE_SCHEMA_VERSION) {
TF_LITE_REPORT_ERROR(error_reporter,
"Model provided is schema version %d not equal "
"to supported version %d.",
model->version(), TFLITE_SCHEMA_VERSION);
return;
}
// Pull in only the operation implementations we need.
// This relies on a complete list of all the ops needed by this graph.
// An easier approach is to just use the AllOpsResolver, but this will
// incur some penalty in code space for op implementations that are not
// needed by this graph.
//
// tflite::AllOpsResolver resolver;
// NOLINTNEXTLINE(runtime-global-variables)
static tflite::MicroMutableOpResolver<5> micro_op_resolver;
micro_op_resolver.AddAveragePool2D();
micro_op_resolver.AddConv2D();
micro_op_resolver.AddDepthwiseConv2D();
micro_op_resolver.AddReshape();
micro_op_resolver.AddSoftmax();
// Build an interpreter to run the model with.
// NOLINTNEXTLINE(runtime-global-variables)
static tflite::MicroInterpreter static_interpreter(
model, micro_op_resolver, tensor_arena, kTensorArenaSize, error_reporter);
interpreter = &static_interpreter;
// Allocate memory from the tensor_arena for the model's tensors.
TfLiteStatus allocate_status = interpreter->AllocateTensors();
if (allocate_status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter, "AllocateTensors() failed");
return;
}
// Get information about the memory area to use for the model's input.
input = interpreter->input(0);
}
// The name of this function is important for Arduino compatibility.
int person_detect(uint8_t * hardware_input) {
// Get image from provider.
if (kTfLiteOk != GetImage(error_reporter, kNumCols, kNumRows, kNumChannels,
input->data.int8, hardware_input)) {
TF_LITE_REPORT_ERROR(error_reporter, "Image capture failed.");
}
// Run the model on this input and make sure it succeeds.
if (kTfLiteOk != interpreter->Invoke()) {
TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed.");
}
TfLiteTensor* output = interpreter->output(0);
// Process the inference results.
int8_t person_score = output->data.uint8[kPersonIndex];
int8_t no_person_score = output->data.uint8[kNotAPersonIndex];
RespondToDetection(error_reporter, person_score, no_person_score);
if(person_score >= no_person_score + 50) return 1;
else return 0;
}

View File

@@ -0,0 +1,30 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
#include "tensorflow/lite/c/common.h"
// Initializes all data needed for the example. The name is important, and needs
// to be setup() for Arduino compatibility.
extern "C" void person_detect_init();
// Runs one iteration of data gathering and inference. This should be called
// repeatedly from the application code. The name needs to be loop() for Arduino
// compatibility.
extern "C" int person_detect(uint8_t * hardware_input);
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_

View File

@@ -0,0 +1,21 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/examples/person_detection_experimental/model_settings.h"
const char* kCategoryLabels[kCategoryCount] = {
"notperson",
"person",
};

View File

@@ -0,0 +1,35 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
// Keeping these as constant expressions allow us to allocate fixed-sized arrays
// on the stack for our working memory.
// All of these values are derived from the values used during model training,
// if you change your model you'll need to update these constants.
constexpr int kNumCols = 96;
constexpr int kNumRows = 96;
constexpr int kNumChannels = 1;
constexpr int kMaxImageSize = kNumCols * kNumRows * kNumChannels;
constexpr int kCategoryCount = 2;
constexpr int kPersonIndex = 1;
constexpr int kNotAPersonIndex = 0;
extern const char* kCategoryLabels[kCategoryCount];
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_

View File

@@ -0,0 +1,27 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This is a standard TensorFlow Lite model file that has been converted into a
// C data array, so it can be easily compiled into a binary for devices that
// don't have a file system. It was created using the command:
// xxd -i person_detect.tflite > person_detect_model_data.cc
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
extern const unsigned char g_person_detect_model_data[];
extern const int g_person_detect_model_data_len;
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_

View File

@@ -0,0 +1,57 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
#include <algorithm>
#include <cmath>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/max.h"
#include "tensorflow/lite/kernels/internal/min.h"
namespace tflite {
namespace ops {
namespace micro {
// Returns the floating point value for a fused activation:
inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
switch (act) {
case kTfLiteActNone:
return a;
case kTfLiteActRelu:
return TfLiteMax(0.0f, a);
case kTfLiteActReluN1To1:
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
case kTfLiteActRelu6:
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
case kTfLiteActTanh:
return std::tanh(a);
case kTfLiteActSignBit:
return std::signbit(a);
case kTfLiteActSigmoid:
return 1.0f / (1.0f + std::exp(-a));
}
return 0.0f; // To indicate an unsupported activation (i.e. when a new fused
// activation is added to the enum and not handled here).
}
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_

View File

@@ -0,0 +1,285 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
uint8_t six_uint8;
uint8_t zero_uint8;
};
} // namespace
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
template <typename T>
inline void ReluQuantized(const ReluOpData& data,
const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const T* input_data,
T* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<T>(clamped);
}
}
template <typename T>
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0.0f;
const float clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6.0f;
const float lower = 0.0f;
const float clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
template <typename Q>
inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
const Q* input_data,
const RuntimeShape& output_shape, Q* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const Q val = input_data[i];
const Q clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
} else if (input->type == kTfLiteUInt8) {
CalculateReluOpData<uint8_t>(input, output, data);
}
return kTfLiteOk;
}
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
ReluFloat(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
}
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToAsymmetricQuantizedInt8(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
} else if (input->type == kTfLiteUInt8) {
data->six_uint8 = FloatToAsymmetricQuantizedUInt8(6.0f, input->params.scale,
input->params.zero_point);
data->zero_uint8 = input->params.zero_point;
}
return kTfLiteOk;
}
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
Relu6Float(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
}
} // namespace activations
TfLiteRegistration Register_RELU() {
return {/*init=*/activations::ReluInit,
/*free=*/nullptr,
/*prepare=*/activations::ReluPrepare,
/*invoke=*/activations::ReluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_RELU6() {
return {/*init=*/activations::Relu6Init,
/*free=*/nullptr,
/*prepare=*/activations::Relu6Prepare,
/*invoke=*/activations::Relu6Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,378 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "mcu_init.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
#include "tensorflow/lite/micro/testing/micro_test.h"
#include "tensorflow/lite/micro/testing/test_utils.h"
namespace tflite {
namespace testing {
namespace {
void TestReluFloat(const int* input_dims_data, const float* input_data,
const int* output_dims_data, const float* golden,
float* output_data) {
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
const int output_elements_count = ElementCount(*output_dims);
constexpr int inputs_size = 1;
constexpr int outputs_size = 1;
constexpr int tensors_size = inputs_size + outputs_size;
TfLiteTensor tensors[tensors_size] = {
CreateFloatTensor(input_data, input_dims),
CreateFloatTensor(output_data, output_dims),
};
int inputs_array_data[] = {1, 0};
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
int outputs_array_data[] = {1, 1};
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
const TfLiteRegistration registration = ops::micro::Register_RELU();
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
outputs_array,
/*builtin_data=*/nullptr, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
for (int i = 0; i < output_elements_count; ++i) {
TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], 1e-5f);
}
}
void TestRelu6Float(const int* input_dims_data, const float* input_data,
const int* output_dims_data, const float* golden,
float* output_data) {
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
const int output_elements_count = ElementCount(*output_dims);
constexpr int inputs_size = 1;
constexpr int outputs_size = 1;
constexpr int tensors_size = inputs_size + outputs_size;
TfLiteTensor tensors[tensors_size] = {
CreateFloatTensor(input_data, input_dims),
CreateFloatTensor(output_data, output_dims),
};
int inputs_array_data[] = {1, 0};
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
int outputs_array_data[] = {1, 1};
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
const TfLiteRegistration registration = ops::micro::Register_RELU6();
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
outputs_array,
/*builtin_data=*/nullptr, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
for (int i = 0; i < output_elements_count; ++i) {
TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], 1e-5f);
}
}
void TestReluUint8(const int* input_dims_data, const float* input_data,
uint8_t* input_data_quantized, const float input_scale,
const int input_zero_point, const float* golden,
uint8_t* golden_quantized, const int* output_dims_data,
const float output_scale, const int output_zero_point,
uint8_t* output_data) {
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
const int output_elements_count = ElementCount(*output_dims);
constexpr int inputs_size = 1;
constexpr int outputs_size = 1;
constexpr int tensors_size = inputs_size + outputs_size;
TfLiteTensor tensors[tensors_size] = {
CreateQuantizedTensor(input_data, input_data_quantized, input_dims,
input_scale, input_zero_point),
CreateQuantizedTensor(output_data, output_dims, output_scale,
output_zero_point),
};
int inputs_array_data[] = {1, 0};
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
int outputs_array_data[] = {1, 1};
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
const TfLiteRegistration registration = ops::micro::Register_RELU();
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
outputs_array,
/*builtin_data=*/nullptr, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
AsymmetricQuantize(golden, golden_quantized, output_elements_count,
output_scale, output_zero_point);
for (int i = 0; i < output_elements_count; ++i) {
TF_LITE_MICRO_EXPECT_EQ(golden_quantized[i], output_data[i]);
}
}
void TestRelu6Uint8(const int* input_dims_data, const float* input_data,
uint8_t* input_data_quantized, const float input_scale,
const int input_zero_point, const float* golden,
uint8_t* golden_quantized, const int* output_dims_data,
const float output_scale, const int output_zero_point,
uint8_t* output_data) {
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
const int output_elements_count = ElementCount(*output_dims);
constexpr int inputs_size = 1;
constexpr int outputs_size = 1;
constexpr int tensors_size = inputs_size + outputs_size;
TfLiteTensor tensors[tensors_size] = {
CreateQuantizedTensor(input_data, input_data_quantized, input_dims,
input_scale, input_zero_point),
CreateQuantizedTensor(output_data, output_dims, output_scale,
output_zero_point),
};
int inputs_array_data[] = {1, 0};
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
int outputs_array_data[] = {1, 1};
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
const TfLiteRegistration registration = ops::micro::Register_RELU6();
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
outputs_array,
/*builtin_data=*/nullptr, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
AsymmetricQuantize(golden, golden_quantized, output_elements_count,
output_scale, output_zero_point);
for (int i = 0; i < output_elements_count; ++i) {
TF_LITE_MICRO_EXPECT_EQ(golden_quantized[i], output_data[i]);
}
}
void TestReluInt8(const int* input_dims_data, const float* input_data,
int8_t* input_data_quantized, const float input_scale,
const int input_zero_point, const float* golden,
int8_t* golden_quantized, const int* output_dims_data,
const float output_scale, const int output_zero_point,
int8_t* output_data) {
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
const int output_elements_count = ElementCount(*output_dims);
constexpr int inputs_size = 1;
constexpr int outputs_size = 1;
constexpr int tensors_size = inputs_size + outputs_size;
TfLiteTensor tensors[tensors_size] = {
CreateQuantizedTensor(input_data, input_data_quantized, input_dims,
input_scale, input_zero_point),
CreateQuantizedTensor(output_data, output_dims, output_scale,
output_zero_point),
};
int inputs_array_data[] = {1, 0};
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
int outputs_array_data[] = {1, 1};
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
const TfLiteRegistration registration = ops::micro::Register_RELU();
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
outputs_array,
/*builtin_data=*/nullptr, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
AsymmetricQuantize(golden, golden_quantized, output_elements_count,
output_scale, output_zero_point);
for (int i = 0; i < output_elements_count; ++i) {
TF_LITE_MICRO_EXPECT_EQ(golden_quantized[i], output_data[i]);
}
}
void TestRelu6Int8(const int* input_dims_data, const float* input_data,
int8_t* input_data_quantized, const float input_scale,
const int input_zero_point, const float* golden,
int8_t* golden_quantized, const int* output_dims_data,
const float output_scale, const int output_zero_point,
int8_t* output_data) {
TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
const int output_elements_count = ElementCount(*output_dims);
constexpr int inputs_size = 1;
constexpr int outputs_size = 1;
constexpr int tensors_size = inputs_size + outputs_size;
TfLiteTensor tensors[tensors_size] = {
CreateQuantizedTensor(input_data, input_data_quantized, input_dims,
input_scale, input_zero_point),
CreateQuantizedTensor(output_data, output_dims, output_scale,
output_zero_point),
};
int inputs_array_data[] = {1, 0};
TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
int outputs_array_data[] = {1, 1};
TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
const TfLiteRegistration registration = ops::micro::Register_RELU6();
micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array,
outputs_array,
/*builtin_data=*/nullptr, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
AsymmetricQuantize(golden, golden_quantized, output_elements_count,
output_scale, output_zero_point);
for (int i = 0; i < output_elements_count; ++i) {
TF_LITE_MICRO_EXPECT_EQ(golden_quantized[i], output_data[i]);
}
}
} // namespace
} // namespace testing
} // namespace tflite
TF_LITE_MICRO_TESTS_BEGIN
TF_LITE_MICRO_TEST(SimpleReluTestFloat) {
const int output_elements_count = 10;
const int input_shape[] = {2, 1, 5};
const float input_data[] = {
1.0, 2.0, 3.0, 4.0, 5.0, -1.0, -2.0, -3.0, -4.0, -5.0,
};
const float golden[] = {1.0, 2.0, 3.0, 4.0, 5.0, 0, 0, 0, 0, 0};
const int output_shape[] = {2, 1, 5};
float output_data[output_elements_count];
tflite::testing::TestReluFloat(input_shape, input_data, output_shape, golden,
output_data);
}
TF_LITE_MICRO_TEST(SimpleRelu6TestFloat) {
const int output_elements_count = 10;
float output_data[output_elements_count];
const int input_shape[] = {2, 1, 5};
const float input_data[] = {4.0, 5.0, 6.0, 7.0, 8.0,
-4.0, -5.0, -6.0, -7.0, -8.0};
const int output_shape[] = {2, 1, 5};
const float golden[] = {
4.0, 5.0, 6.0, 6.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0,
};
tflite::testing::TestRelu6Float(input_shape, input_data, output_shape, golden,
output_data);
}
TF_LITE_MICRO_TEST(SimpleReluTestUint8) {
const int elements_count = 10;
const int input_shape[] = {2, 1, 5};
const float input_data[] = {1, 2, 3, 4, 5, -1, -2, -3, -4, -5};
uint8_t input_quantized[elements_count];
const int output_shape[] = {2, 1, 5};
const float golden[] = {1, 2, 3, 4, 5, 0, 0, 0, 0, 0};
uint8_t golden_quantized[elements_count];
uint8_t output_data[elements_count];
const float input_scale = 0.5f;
const int input_zero_point = 127;
const float output_scale = 0.5f;
const int output_zero_point = 127;
tflite::testing::TestReluUint8(input_shape, input_data, input_quantized,
input_scale, input_zero_point, golden,
golden_quantized, output_shape, output_scale,
output_zero_point, output_data);
}
TF_LITE_MICRO_TEST(SimpleRelu6TestUint8) {
const int elements_count = 10;
const int input_shape[] = {2, 1, 5};
const float input_data[] = {4, 5, 6, 7, 8, -1, -2, -3, -4, -5};
uint8_t input_quantized[elements_count];
const int output_shape[] = {2, 1, 5};
const float golden[] = {4, 5, 6, 6, 6, 0, 0, 0, 0, 0};
uint8_t golden_quantized[elements_count];
uint8_t output_data[elements_count];
const float input_scale = 0.5f;
const int input_zero_point = 127;
const float output_scale = 0.5f;
const int output_zero_point = 127;
tflite::testing::TestRelu6Uint8(input_shape, input_data, input_quantized,
input_scale, input_zero_point, golden,
golden_quantized, output_shape, output_scale,
output_zero_point, output_data);
}
TF_LITE_MICRO_TEST(SimpleReluTestInt8) {
const int elements_count = 10;
const int input_shape[] = {2, 1, 5};
const float input_data[] = {1, 2, 3, 4, 5, -1, -2, -3, -4, -5};
int8_t input_quantized[elements_count];
const int output_shape[] = {2, 1, 5};
const float golden[] = {1, 2, 3, 4, 5, 0, 0, 0, 0, 0};
int8_t golden_quantized[elements_count];
int8_t output_data[elements_count];
const float input_scale = 0.5f;
const int input_zero_point = 0;
const float output_scale = 0.5f;
const int output_zero_point = 0;
tflite::testing::TestReluInt8(input_shape, input_data, input_quantized,
input_scale, input_zero_point, golden,
golden_quantized, output_shape, output_scale,
output_zero_point, output_data);
}
TF_LITE_MICRO_TEST(SimpleRelu6TestInt8) {
const int elements_count = 10;
const int input_shape[] = {2, 1, 5};
const float input_data[] = {4, 5, 6, 7, 8, -1, -2, -3, -4, -5};
int8_t input_quantized[elements_count];
const int output_shape[] = {2, 1, 5};
const float golden[] = {4, 5, 6, 6, 6, 0, 0, 0, 0, 0};
int8_t golden_quantized[elements_count];
int8_t output_data[elements_count];
const float input_scale = 0.5f;
const int input_zero_point = 127;
const float output_scale = 0.5f;
const int output_zero_point = 127;
tflite::testing::TestRelu6Int8(input_shape, input_data, input_quantized,
input_scale, input_zero_point, golden,
golden_quantized, output_shape, output_scale,
output_zero_point, output_data);
}
TF_LITE_MICRO_TESTS_END

View File

@@ -0,0 +1,240 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace ops {
namespace micro {
namespace add {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpData {
bool requires_broadcast;
// These fields are used in both the general 8-bit -> 8bit quantized path,
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32_t output_activation_min;
int32_t output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32_t input1_multiplier;
int32_t input2_multiplier;
int32_t output_multiplier;
int output_shift;
int left_shift;
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
// Used only for float evals:
float output_activation_min_f32;
float output_activation_max_f32;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output,
OpData* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = 20;
const double twice_max_input_scale =
2 * static_cast<double>(
std::max(input1->params.scale, input2->params.scale));
const double real_input1_multiplier =
static_cast<double>(input1->params.scale) / twice_max_input_scale;
const double real_input2_multiplier =
static_cast<double>(input2->params.scale) / twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
QuantizeMultiplierSmallerThanOneExp(
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
} else if (output->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation,
&data->output_activation_min_f32,
&data->output_activation_max_f32);
}
return kTfLiteOk;
}
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
SetActivationParams(data->output_activation_min_f32,
data->output_activation_max_f32, &op_params);
#define TF_LITE_ADD(opname) \
reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<float>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<float>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<float>(output))
if (data->requires_broadcast) {
TF_LITE_ADD(BroadcastAdd4DSlow);
} else {
TF_LITE_ADD(Add);
}
#undef TF_LITE_ADD
}
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteAddParams* params, const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
#define TF_LITE_ADD(type, opname, dtype) \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<dtype>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<dtype>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<dtype>(output));
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
} else {
TF_LITE_ADD(reference_integer_ops, Add, int8_t);
}
} else {
if (need_broadcast) {
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
} else {
TF_LITE_ADD(reference_ops, Add, uint8_t);
}
}
#undef TF_LITE_ADD
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
OpData* data = static_cast<OpData*>(node->user_data);
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, data));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace add
TfLiteRegistration Register_ADD() {
return {/*init=*/add::Init,
/*free=*/nullptr,
/*prepare=*/add::Prepare,
/*invoke=*/add::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,133 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace arg_min_max {
constexpr int kInputTensor = 0;
constexpr int kAxis = 1;
constexpr int kOutputTensor = 0;
template <typename T1, typename T2, typename T3>
inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
const T1* input1_data, const T3* input2_data,
const RuntimeShape& output_shape, T2* output_data,
bool is_arg_max) {
if (is_arg_max) {
reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
output_shape, output_data, micro::Greater());
} else {
reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
output_shape, output_data, micro::Less());
}
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* axis =
tflite::micro::GetEvalInput(context, node, kAxis);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \
tflite::micro::GetTensorData<data_type>(input), \
tflite::micro::GetTensorData<axis_type>(axis), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<output_type>(output), \
is_arg_max)
if (axis->type == kTfLiteInt32) {
if (output->type == kTfLiteInt32) {
switch (input->type) {
case kTfLiteFloat32:
TF_LITE_ARG_MIN_MAX(float, int32_t, int32_t);
break;
case kTfLiteUInt8:
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
break;
case kTfLiteInt8:
TF_LITE_ARG_MIN_MAX(int8_t, int32_t, int32_t);
break;
default:
TF_LITE_KERNEL_LOG(context,
"Only float32, uint8_t and int8_t are "
"supported currently, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context,
"Only int32_t are supported currently, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.",
TfLiteTypeGetName(axis->type));
return kTfLiteError;
}
#undef TF_LITE_ARG_MIN_MAX
return kTfLiteOk;
}
TfLiteStatus ArgMinEval(TfLiteContext* context, TfLiteNode* node) {
return Eval(context, node, false);
}
TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
return Eval(context, node, true);
}
} // namespace arg_min_max
TfLiteRegistration Register_ARG_MAX() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_ARG_MIN() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,74 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/ceil.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace ceil {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace ceil
TfLiteRegistration Register_CEIL() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/ceil::Prepare,
/*invoke=*/ceil::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,178 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
/*
* The circular buffer custom operator is used to implement strided streaming
* convolutions on TFLite Micro. Each time this operator is invoked, it checks
* whether or not to run, based on a predetermined stride in time. If the op
* runs, it inserts the input into the end of the output buffer and shifts the
* output values towards the start of the buffer. It discards the oldest value
* in the output buffer.
*
* Input: [<input N+1]
* Before shifting:
* Output: [<input 1>, <input 2>, <input ...>, <input N>]
*
* After shifting:
* Output: [<input 2>, <input 3>, <input ...>, <input N+1>]
*
* We make some assumptions in this custom operator:
* - Input shape must be [1, 1, 1, depth]
* - Output shape must be [1, num_slots, 1, depth]
* - Input and output types must match.
* - Input and output quantization params must be identical.
*/
namespace tflite {
namespace ops {
namespace micro {
namespace circular_buffer {
namespace {
// The CircularBuffer op has one input and one output tensor.
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
// TODO(b/149795762): Add this to TfLiteStatus enum.
constexpr int kTfLiteAbort = -9;
// These fields control the stride period of a strided streaming model. This op
// returns kTfLiteAbort until cycles_until_run-- is zero. At this time,
// cycles_until_run is reset to cycles_max.
struct OpData {
int cycles_until_run;
int cycles_max;
};
// These constants represent constants specific to the music detect model.
// They exist until (b/132070898) is fixed.
constexpr int kMaxOpDataSize = 7;
int op_data_counter = 0;
OpData op_data_array[kMaxOpDataSize];
} // namespace
void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[0]);
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[0]);
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]);
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[2]);
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
// The circular buffer custom operator currently only supports int8_t.
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
// TODO(b/132070898): Use statically slotted OpData structures until a
// scratch memory API is ready.
TFLITE_DCHECK_LE(op_data_counter, kMaxOpDataSize);
OpData* op_data = &op_data_array[op_data_counter++];
// The last circular buffer layer (length 5) simply accumulates outputs, and
// does not run periodically.
// TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
if (output->dims->data[1] == 5) {
op_data->cycles_max = 1;
} else {
op_data->cycles_max = 2;
}
op_data->cycles_until_run = op_data->cycles_max;
node->user_data = op_data;
return kTfLiteOk;
}
// Shifts buffer over by the output depth, and write new input to end of buffer.
// num_slots is the number of samples stored in the output buffer.
// depth is the size of each sample.
void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
memmove(output, &output[depth], (num_slots - 1) * depth);
memcpy(&output[(num_slots - 1) * depth], input, depth);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
OpData* data = reinterpret_cast<OpData*>(node->user_data);
int num_slots = output->dims->data[1];
int depth = output->dims->data[3];
if (input->type == kTfLiteInt8) {
EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
if (--data->cycles_until_run != 0) {
// Signal the interpreter to end current run if the delay before op invoke
// has not been reached.
// TODO(b/149795762): Add kTfLiteAbort to TfLiteStatus enum.
return static_cast<TfLiteStatus>(kTfLiteAbort);
}
// If prepare is ever called more than one time (for example, when testing the
// ambient model, the interpreter is created a few times), this op data
// counter needs to be reset so that future instances do not overrun this op
// data array.
op_data_counter = 0;
data->cycles_until_run = data->cycles_max;
return kTfLiteOk;
}
} // namespace circular_buffer
TfLiteRegistration* Register_CIRCULAR_BUFFER() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/circular_buffer::Free,
/*prepare=*/circular_buffer::Prepare,
/*invoke=*/circular_buffer::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,246 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace ops {
namespace micro {
namespace add {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpData {
bool requires_broadcast;
// These fields are used in both the general 8-bit -> 8bit quantized path,
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32_t output_activation_min;
int32_t output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32_t input1_multiplier;
int32_t input2_multiplier;
int32_t output_multiplier;
int output_shift;
int left_shift;
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output,
OpData* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = 20;
const double twice_max_input_scale =
2 * static_cast<double>(
std::max(input1->params.scale, input2->params.scale));
const double real_input1_multiplier =
static_cast<double>(input1->params.scale) / twice_max_input_scale;
const double real_input2_multiplier =
static_cast<double>(input2->params.scale) / twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
QuantizeMultiplierSmallerThanOneExp(
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
}
return kTfLiteOk;
}
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
#define TF_LITE_ADD(opname) \
reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<float>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<float>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<float>(output))
if (data->requires_broadcast) {
TF_LITE_ADD(BroadcastAdd4DSlow);
} else {
TF_LITE_ADD(Add);
}
#undef TF_LITE_ADD
}
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteAddParams* params, const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
#define TF_LITE_ADD(type, opname, dtype) \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<dtype>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<dtype>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<dtype>(output));
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
} else {
arm_elementwise_add_s8(
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorData<int8_t>(input2),
op_params.input1_offset, op_params.input1_multiplier,
op_params.input1_shift, op_params.input2_offset,
op_params.input2_multiplier, op_params.input2_shift,
op_params.left_shift, tflite::micro::GetTensorData<int8_t>(output),
op_params.output_offset, op_params.output_multiplier,
op_params.output_shift, op_params.quantized_activation_min,
op_params.quantized_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output)));
}
} else {
if (need_broadcast) {
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
} else {
TF_LITE_ADD(reference_ops, Add, uint8_t);
}
}
#undef TF_LITE_ADD
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
OpData* data = static_cast<OpData*>(node->user_data);
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, data));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace add
TfLiteRegistration Register_ADD() {
return {/*init=*/add::Init,
/*free=*/nullptr,
/*prepare=*/add::Prepare,
/*invoke=*/add::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,455 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "cmsis/CMSIS/NN/Include/arm_nn_types.h"
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace conv {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
constexpr int kMaxChannels = 256;
// Conv is quantized along dimension 0:
// https://www.tensorflow.org/lite/performance/quantization_spec
constexpr int kConvQuantizedDimension = 0;
struct OpData {
TfLitePaddingValues padding;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
// TODO(b/141139247): Allocate these dynamically when possible.
int32_t per_channel_output_multiplier[kMaxChannels];
int32_t per_channel_output_shift[kMaxChannels];
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
// Index to buffer for optimizations if applicable.
int buffer_idx;
};
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
switch (padding) {
case TfLitePadding::kTfLitePaddingSame:
return PaddingType::kSame;
case TfLitePadding::kTfLitePaddingValid:
return PaddingType::kValid;
case TfLitePadding::kTfLitePaddingUnknown:
default:
return PaddingType::kNone;
}
}
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, int width, int height,
int filter_width, int filter_height, int out_width,
int out_height, const TfLiteType data_type,
OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Matching GetWindowedOutputSize in TensorFlow.
auto padding = params->padding;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
params->dilation_height_factor, params->dilation_width_factor, height,
width, filter_height, filter_width, padding, &out_height, &out_width);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
int num_channels = filter->dims->data[kConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params->activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE)
int32_t buf_size = 0;
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
auto* data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
RuntimeShape input_shape = GetTensorShape(input);
RuntimeShape output_shape = GetTensorShape(output);
// Initialize cmsis-nn input dimensions
cmsis_nn_dims input_dims;
input_dims.n = MatchingDim(input_shape, 0, output_shape, 0);
input_dims.h = input->dims->data[1];
input_dims.w = input->dims->data[2];
input_dims.c = input_shape.Dims(3);
// Initialize cmsis-nn filter dimensions
cmsis_nn_dims filter_dims;
filter_dims.n = output_shape.Dims(3);
filter_dims.h = filter->dims->data[1];
filter_dims.w = filter->dims->data[2];
filter_dims.c = input_dims.c;
// Initialize cmsis-nn output dimensions
cmsis_nn_dims output_dims;
output_dims.n = input_dims.n;
output_dims.h = output->dims->data[1];
output_dims.w = output->dims->data[2];
output_dims.c = output_shape.Dims(3);
TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, input_dims.w, input_dims.h, filter_dims.w,
filter_dims.h, output_dims.w, output_dims.h, input->type, data));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
if (input->type == kTfLiteInt8) {
// Initialize cmsis-nn convolution parameters
cmsis_nn_conv_params conv_params;
conv_params.input_offset = -input->params.zero_point;
conv_params.output_offset = output->params.zero_point;
conv_params.stride.h = params->stride_height;
conv_params.stride.w = params->stride_width;
conv_params.dilation.h = params->dilation_height_factor;
conv_params.dilation.w = params->dilation_width_factor;
conv_params.padding.h = data->padding.height;
conv_params.padding.w = data->padding.width;
conv_params.activation.min = data->output_activation_min;
conv_params.activation.max = data->output_activation_max;
buf_size = arm_convolve_wrapper_s8_get_buffer_size(
&conv_params, &input_dims, &filter_dims, &output_dims);
}
if (buf_size > 0) {
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, buf_size, &data->buffer_idx));
} else {
data->buffer_idx = -1;
}
#endif
return kTfLiteOk;
}
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* im2col,
TfLiteEvalTensor* hwcn_weights,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data.output_multiplier;
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output),
tflite::micro::GetTensorShape(im2col),
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
return kTfLiteOk;
}
TfLiteStatus EvalQuantizedPerChannel(
TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params,
const OpData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output, TfLiteEvalTensor* im2col) {
// Initialize cmsis-nn convolution parameters
cmsis_nn_conv_params conv_params;
conv_params.input_offset = -data.input_zero_point;
conv_params.output_offset = data.output_zero_point;
conv_params.stride.h = params->stride_height;
conv_params.stride.w = params->stride_width;
conv_params.dilation.h = params->dilation_height_factor;
conv_params.dilation.w = params->dilation_width_factor;
conv_params.padding.h = data.padding.height;
conv_params.padding.w = data.padding.width;
conv_params.activation.min = data.output_activation_min;
conv_params.activation.max = data.output_activation_max;
// Initialize cmsis-nn per channel quantization parameters
cmsis_nn_per_channel_quant_params quant_params;
quant_params.multiplier =
const_cast<int32_t*>(data.per_channel_output_multiplier);
quant_params.shift = const_cast<int32_t*>(data.per_channel_output_shift);
#if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE)
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
// Consistency check.
TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (tflite::micro::GetTensorData<int8_t>(bias)) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
// Initialize cmsis-nn dimensions
// Input
cmsis_nn_dims input_dims;
input_dims.n = batch_size;
input_dims.h = input_shape.Dims(1);
input_dims.w = input_shape.Dims(2);
input_dims.c = input_depth;
// Filter
cmsis_nn_dims filter_dims;
filter_dims.n = output_depth;
filter_dims.h = filter_shape.Dims(1);
filter_dims.w = filter_shape.Dims(2);
filter_dims.c = input_depth;
// Bias
cmsis_nn_dims bias_dims;
bias_dims.n = 1;
bias_dims.h = 1;
bias_dims.w = 1;
bias_dims.c = output_depth;
// Output
cmsis_nn_dims output_dims;
output_dims.n = batch_size;
output_dims.h = output_shape.Dims(1);
output_dims.w = output_shape.Dims(2);
output_dims.c = output_depth;
// Initialize cmsis-nn context
cmsis_nn_context ctx;
ctx.buf = nullptr;
ctx.size = 0;
if (data.buffer_idx > -1) {
ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
// Note: ctx.size is currently not used in cmsis-nn.
// The buffer should be allocated in the Prepare function through
// arm_convolve_wrapper_s8_get_buffer_size
}
// arm_convolve_wrapper_s8 dispatches the optimized kernel accordingly with
// the parameters passed
arm_status status = arm_convolve_wrapper_s8(
&ctx, &conv_params, &quant_params, &input_dims,
tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
tflite::micro::GetTensorData<int8_t>(output));
if (status == ARM_MATH_SUCCESS) {
return kTfLiteOk;
} else {
return kTfLiteError;
}
#else
#pragma message( \
"CMSIS-NN optimization for conv not available for this target. Using reference kernel.")
ConvParams op_params;
conv_params.input_offset = -data.input_zero_point;
conv_params.output_offset = data.output_zero_point;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.padding_values.height = data.padding.height;
op_params.padding_values.width = data.padding.width;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
reference_integer_ops::ConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
return kTfLiteOk;
}
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
TfLiteEvalTensor* hwcn_weights,
TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
// TODO(b/154032858): Investigate removing extra copies.
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(im2col),
tflite::micro::GetTensorData<float>(im2col));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFilterTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
nullptr, output);
break;
case kTfLiteInt8:
return EvalQuantizedPerChannel(context, node, params, data, input, filter,
bias, output, nullptr);
break;
case kTfLiteUInt8:
return EvalQuantized(context, node, params, data, input, filter, bias,
nullptr, nullptr, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace conv
TfLiteRegistration Register_CONV_2D() {
return {/*init=*/conv::Init,
/*free=*/nullptr,
/*prepare=*/conv::Prepare,
/*invoke=*/conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,478 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace depthwise_conv {
namespace {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
constexpr int kMaxChannels = 256;
// Depthwise conv is quantized along dimension 3:
// https://www.tensorflow.org/lite/performance/quantization_spec
constexpr int kDepthwiseConvQuantizedDimension = 3;
struct OpData {
TfLitePaddingValues padding;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
// TODO: Allocate dynamic buffers when b/158779832 is resolved
int32_t per_channel_output_multiplier[kMaxChannels];
int32_t per_channel_output_shift[kMaxChannels];
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
// Index to buffer for optimizations if applicable.
int buffer_idx;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, int width,
int height, int filter_width, int filter_height,
const TfLiteType data_type, OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
int unused_output_height, unused_output_width;
// Set buffer index to a reset value
data->buffer_idx = -1;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width, 1, 1, height, width,
filter_height, filter_width, params->padding, &unused_output_height,
&unused_output_width);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
return tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params->activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
}
return kTfLiteOk;
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
auto* params =
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteType data_type = input->type;
int width = SizeOfDimension(input, 2);
int height = SizeOfDimension(input, 1);
int filter_width = SizeOfDimension(filter, 2);
int filter_height = SizeOfDimension(filter, 1);
if (input->type == kTfLiteInt8) {
// Allocate memory for per-channel quantization parameters
const int num_channels =
filter->dims->data[kDepthwiseConvQuantizedDimension];
TFLITE_DCHECK_LE(num_channels, kMaxChannels);
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
// All per-channel quantized tensors need valid zero point and scale arrays.
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
filter->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->zero_point);
TF_LITE_ENSURE(
context, affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kDepthwiseConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
filter_width, filter_height, data_type,
data));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
if (input->type == kTfLiteInt8) {
RuntimeShape input_shape = GetTensorShape(input);
RuntimeShape output_shape = GetTensorShape(output);
RuntimeShape filter_shape = GetTensorShape(filter);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(output_shape, 3, filter_shape, 3);
TFLITE_DCHECK_EQ(batch_size, 1); /* Only batch = 1 is supported */
cmsis_nn_dims input_dims;
input_dims.n = batch_size;
input_dims.h = height;
input_dims.w = width;
input_dims.c = input_shape.Dims(3);
cmsis_nn_dims filter_dims;
filter_dims.n = 1;
filter_dims.h = filter_height;
filter_dims.w = filter_width;
filter_dims.c = output_depth;
cmsis_nn_dims output_dims;
output_dims.n = batch_size;
output_dims.h = output_shape.Dims(1);
output_dims.w = output_shape.Dims(2);
output_dims.c = output_depth;
cmsis_nn_dw_conv_params dw_conv_params;
dw_conv_params.padding.h = data->padding.height;
dw_conv_params.padding.w = data->padding.width;
const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
&dw_conv_params, &input_dims, &filter_dims, &output_dims);
if (buf_size > 0) {
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, buf_size, &data->buffer_idx));
} else {
data->buffer_idx = -1;
}
}
return kTfLiteOk;
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, const OpData* data,
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::DepthwiseConv(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, OpData* data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
cmsis_nn_dw_conv_params dw_conv_params;
dw_conv_params.dilation.h = params->dilation_height_factor;
dw_conv_params.dilation.w = params->dilation_width_factor;
// Call to reference implementation can be removed when dilation is supported
// in the optimized implementations.
if (1 == dw_conv_params.dilation.h && 1 == dw_conv_params.dilation.w) {
dw_conv_params.input_offset = -data->input_zero_point;
dw_conv_params.output_offset = data->output_zero_point;
dw_conv_params.stride.h = params->stride_height;
dw_conv_params.stride.w = params->stride_width;
dw_conv_params.padding.h = data->padding.height;
dw_conv_params.padding.w = data->padding.width;
// TODO(b/130439627): Use calculated value for clamping.
dw_conv_params.activation.min = std::numeric_limits<int8_t>::min();
dw_conv_params.activation.max = std::numeric_limits<int8_t>::max();
dw_conv_params.ch_mult = params->depth_multiplier;
cmsis_nn_per_channel_quant_params quant_params;
quant_params.multiplier = data->per_channel_output_multiplier;
quant_params.shift = data->per_channel_output_shift;
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
TFLITE_DCHECK_LE(dw_conv_params.activation.min,
dw_conv_params.activation.max);
const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
if (tflite::micro::GetTensorData<int8_t>(bias)) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
cmsis_nn_dims input_dims;
input_dims.n = batch_size;
input_dims.h = input_shape.Dims(1);
input_dims.w = input_shape.Dims(2);
input_dims.c = input_shape.Dims(3);
cmsis_nn_dims filter_dims;
filter_dims.n = filter_shape.Dims(0);
filter_dims.h = filter_shape.Dims(1);
filter_dims.w = filter_shape.Dims(2);
filter_dims.c = output_depth;
cmsis_nn_dims bias_dims;
bias_dims.n = 1;
bias_dims.h = 1;
bias_dims.w = 1;
bias_dims.c = output_depth;
cmsis_nn_dims output_dims;
output_dims.n = batch_size;
output_dims.h = output_shape.Dims(1);
output_dims.w = output_shape.Dims(2);
output_dims.c = output_depth;
cmsis_nn_context ctx;
ctx.buf = nullptr;
/* 'size' is unused */
ctx.size = 0;
if (data->buffer_idx > -1) {
ctx.buf = context->GetScratchBuffer(context, data->buffer_idx);
}
TFLITE_DCHECK_EQ(
arm_depthwise_conv_wrapper_s8(
&ctx, &dw_conv_params, &quant_params, &input_dims,
tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
tflite::micro::GetTensorData<int8_t>(output)),
ARM_MATH_SUCCESS);
} else {
DepthwiseParams op_params;
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.input_offset = -data->input_zero_point;
op_params.weights_offset = 0;
op_params.output_offset = data->output_zero_point;
// TODO(b/130439627): Use calculated value for clamping.
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
reference_integer_ops::DepthwiseConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, const OpData* data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data->input_zero_point;
const int32_t filter_offset = -data->filter_zero_point;
const int32_t output_offset = data->output_zero_point;
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data->output_multiplier;
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data->output_shift;
if (1 == op_params.dilation_width_factor &&
1 == op_params.dilation_height_factor) {
RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
arm_depthwise_conv_u8_basic_ver1(
tflite::micro::GetTensorData<uint8_t>(input), input_width, input_height,
input_depth, tflite::micro::GetTensorData<uint8_t>(filter),
filter_width, filter_height, op_params.depth_multiplier,
op_params.padding_values.width, op_params.padding_values.height,
op_params.stride_width, op_params.stride_height,
op_params.dilation_width_factor, op_params.dilation_height_factor,
tflite::micro::GetTensorData<int32_t>(bias), op_params.input_offset,
op_params.weights_offset, op_params.output_offset,
tflite::micro::GetTensorData<uint8_t>(output), output_width,
output_height, op_params.quantized_activation_min,
op_params.quantized_activation_max, op_params.output_shift,
op_params.output_multiplier);
} else {
tflite::reference_ops::DepthwiseConv(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params =
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
OpData& data = *(static_cast<OpData*>(node->user_data));
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFilterTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
// TODO(aselle): Consider whether float conv and quantized conv should be
// separate ops to avoid dispatch overhead here.
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, &data, input, filter, bias, output);
break;
case kTfLiteInt8:
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
output);
break;
case kTfLiteUInt8:
EvalQuantized(context, node, params, &data, input, filter, bias, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace depthwise_conv
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
return {/*init=*/depthwise_conv::Init,
/*free=*/nullptr,
/*prepare=*/depthwise_conv::Prepare,
/*invoke=*/depthwise_conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,350 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace fully_connected {
namespace {
struct OpData {
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
// The index of the temporary tensor where the quantized inputs are cached.
int input_quantized_index;
// Index to buffer for optimizations if applicable.
int buffer_idx;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
};
constexpr int kInputTensor = 0;
constexpr int kWeightsTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context,
TfLiteFusedActivation activation,
TfLiteType data_type, const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
OpData* data) {
TfLiteStatus status = kTfLiteOk;
// Set buffer index to a reset value
data->buffer_idx = -1;
if (data_type != kTfLiteFloat32) {
double real_multiplier = 0.0;
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
context, input, filter, bias, output, &real_multiplier));
int exponent;
QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
data->output_shift = -exponent;
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, activation, output, &data->output_activation_min,
&data->output_activation_max));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
}
return status;
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const auto params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params->activation,
input->type, input, filter, bias,
output, data));
if (input->type == kTfLiteInt8 && nullptr != GetTensorData<int32_t>(bias)) {
RuntimeShape filter_shape = GetTensorShape(filter);
RuntimeShape output_shape = GetTensorShape(output);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
const int filter_dim_count = filter_shape.DimensionsCount();
cmsis_nn_dims filter_dims;
filter_dims.n = filter_shape.Dims(filter_dim_count - 1);
filter_dims.h = 1;
filter_dims.w = 1;
filter_dims.c = output_shape.Dims(1);
const int32_t buf_size =
arm_fully_connected_s8_get_buffer_size(&filter_dims);
if (buf_size > 0) {
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, buf_size, &data->buffer_idx));
} else {
data->buffer_idx = -1;
}
}
return kTfLiteOk;
}
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
// The 'if' condition can be removed when null handling of bias is added to
// arm_fully_connected_s8
if (nullptr != tflite::micro::GetTensorData<int32_t>(bias)) {
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
const int batches = output_shape.Dims(0);
const int output_depth = output_shape.Dims(1);
const RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
const int filter_dim_count = filter_shape.DimensionsCount();
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
const RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
cmsis_nn_fc_params fc_params;
fc_params.input_offset = -data.input_zero_point;
fc_params.output_offset = data.output_zero_point;
fc_params.activation.min = data.output_activation_min;
fc_params.activation.max = data.output_activation_max;
cmsis_nn_per_tensor_quant_params quant_params;
quant_params.multiplier = data.output_multiplier;
// TODO(b/138810107): Figure out whether output shift should be inverted
quant_params.shift = -data.output_shift;
cmsis_nn_dims input_dims;
input_dims.n = batches;
input_dims.h = input_shape.Dims(1);
input_dims.w = input_shape.Dims(2);
input_dims.c = input_shape.Dims(3);
cmsis_nn_dims filter_dims;
filter_dims.n = accum_depth;
filter_dims.h = 1;
filter_dims.w = 1;
filter_dims.c = output_depth;
cmsis_nn_dims bias_dims;
bias_dims.n = 1;
bias_dims.h = 1;
bias_dims.w = 1;
bias_dims.c = output_depth;
cmsis_nn_dims output_dims;
output_dims.n = batches;
output_dims.h = 1;
output_dims.w = 1;
output_dims.c = output_depth;
cmsis_nn_context ctx;
ctx.buf = nullptr;
ctx.size = 0;
if (data.buffer_idx > -1) {
ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
}
TF_LITE_ENSURE_EQ(
context,
arm_fully_connected_s8(
&ctx, &fc_params, &quant_params, &input_dims,
tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
tflite::micro::GetTensorData<int8_t>(output)),
ARM_MATH_SUCCESS);
} else {
tflite::FullyConnectedParams op_params;
op_params.input_offset = -data.input_zero_point;
op_params.weights_offset = -data.filter_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.output_multiplier = data.output_multiplier;
// TODO(b/138810107): Figure out whether output shift should be inverted
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
reference_integer_ops::FullyConnected(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
return kTfLiteOk;
}
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
const OpData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
tflite::FullyConnectedParams op_params;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data.output_multiplier;
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
reference_ops::FullyConnected( \
op_params, tflite::micro::GetTensorShape(input), \
tflite::micro::GetTensorData<uint8_t>(input), \
tflite::micro::GetTensorShape(filter), \
tflite::micro::GetTensorData<uint8_t>(filter), \
tflite::micro::GetTensorShape(bias), \
tflite::micro::GetTensorData<int32_t>(bias), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<output_data_type>(output))
switch (output->type) {
case kTfLiteUInt8:
TF_LITE_FULLY_CONNECTED(uint8_t);
break;
case kTfLiteInt16:
TF_LITE_FULLY_CONNECTED(int16_t);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteFusedActivation activation,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(activation, &output_activation_min,
&output_activation_max);
tflite::FullyConnectedParams op_params;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::FullyConnected(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto* params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
const TfLiteEvalTensor* bias =
tflite::micro::GetEvalInput(context, node, kBiasTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
// Checks in Prepare ensure input, output and filter types are all the same.
switch (input->type) {
case kTfLiteFloat32:
return EvalFloat(context, node, params->activation, input, filter, bias,
output);
case kTfLiteInt8:
return EvalQuantizedInt8(context, node, data, input, filter, bias,
output);
case kTfLiteUInt8:
return EvalQuantized(context, node, data, input, filter, bias, output);
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace fully_connected
TfLiteRegistration Register_FULLY_CONNECTED() {
return {/*init=*/fully_connected::Init,
/*free=*/nullptr,
/*prepare=*/fully_connected::Prepare,
/*invoke=*/fully_connected::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,218 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/mul.h"
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace ops {
namespace micro {
namespace mul {
constexpr int kInput1Tensor = 0;
constexpr int kInput2Tensor = 1;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t output_activation_min;
int32_t output_activation_max;
int32_t output_multiplier;
int output_shift;
// Cached tensor zero point values for quantized operations.
int32_t input1_zero_point;
int32_t input2_zero_point;
int32_t output_zero_point;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data) {
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
double real_multiplier =
input1->params.scale * input2->params.scale / output->params.scale;
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
if (output->dims->size == 0) {
return AllocateOutputDimensionsFromInput(context, input1, input2, output);
}
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
data->input1_zero_point = input1->params.zero_point;
data->input2_zero_point = input2->params.zero_point;
data->output_zero_point = output->params.zero_point;
CalculateOpData(context, node, params, data);
return kTfLiteOk;
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, const OpData& data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
tflite::ArithmeticParams op_params;
SetActivationParams(data.output_activation_min, data.output_activation_max,
&op_params);
op_params.input1_offset = -data.input1_zero_point;
op_params.input2_offset = -data.input2_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.output_multiplier = data.output_multiplier;
op_params.output_shift = data.output_shift;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
#define TF_LITE_MUL(type, opname, dtype) \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<dtype>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<dtype>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<dtype>(output));
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
} else {
arm_elementwise_mul_s8(
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorData<int8_t>(input2),
op_params.input1_offset, op_params.input2_offset,
tflite::micro::GetTensorData<int8_t>(output),
op_params.output_offset, op_params.output_multiplier,
op_params.output_shift, op_params.quantized_activation_min,
op_params.quantized_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output)));
}
} else if (output->type == kTfLiteUInt8) {
if (need_broadcast) {
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
} else {
TF_LITE_MUL(reference_ops, Mul, uint8_t);
}
}
#undef TF_LITE_MUL
}
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
#define TF_LITE_MUL(opname) \
reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<float>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<float>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<float>(output));
if (need_broadcast) {
TF_LITE_MUL(BroadcastMul4DSlow);
} else {
TF_LITE_MUL(Mul);
}
#undef TF_LITE_MUL
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
switch (input1->type) {
case kTfLiteUInt8:
case kTfLiteInt8:
EvalQuantized(context, node, params, data, input1, input2, output);
break;
case kTfLiteFloat32:
EvalFloat(context, node, params, input1, input2, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace mul
TfLiteRegistration Register_MUL() {
return {mul::Init, nullptr /* Free */, mul::Prepare, mul::Eval};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,397 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
#include "flatbuffers/base.h" // from @flatbuffers
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pooling {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
TfLitePaddingValues padding;
// Index to buffer for optimizations if applicable.
int buffer_idx;
int32_t activation_min;
int32_t activation_max;
};
TfLiteStatus CalculateOpData(TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input, TfLiteTensor* output,
OpData* data) {
// input: batch, height, width, channel
int height = SizeOfDimension(input, 1);
int width = SizeOfDimension(input, 2);
int out_height, out_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
/*dilation_rate_height=*/1,
/*dilation_rate_width=*/1, height, width, params->filter_height,
params->filter_width, params->padding, &out_height, &out_width);
if (input->type != kTfLiteFloat32) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->activation_min,
&data->activation_max));
TFLITE_DCHECK_LE(data->activation_min, data->activation_max);
}
// Set buffer index to a reset value
data->buffer_idx = -1;
return kTfLiteOk;
}
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData& data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
float activation_min, activation_max;
CalculateActivationRange(params->activation, &activation_min,
&activation_max);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data.padding.height;
op_params.padding_values.width = data.padding.width;
op_params.float_activation_min = activation_min;
op_params.float_activation_max = activation_max;
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData& data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data.padding.height;
op_params.padding_values.width = data.padding.width;
op_params.quantized_activation_min = data.activation_min;
op_params.quantized_activation_max = data.activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
cmsis_nn_dims input_dims;
input_dims.n = 1;
input_dims.h = input_shape.Dims(1);
input_dims.w = input_shape.Dims(2);
input_dims.c = depth;
cmsis_nn_dims output_dims;
output_dims.n = 1;
output_dims.h = output_shape.Dims(1);
output_dims.w = output_shape.Dims(2);
output_dims.c = depth;
cmsis_nn_pool_params pool_params;
pool_params.stride.h = params->stride_height;
pool_params.stride.w = params->stride_width;
pool_params.padding.h = data.padding.height;
pool_params.padding.w = data.padding.width;
pool_params.activation.min = data.activation_min;
pool_params.activation.max = data.activation_max;
cmsis_nn_dims filter_dims;
filter_dims.n = 1;
filter_dims.h = params->filter_height;
filter_dims.w = params->filter_width;
filter_dims.c = 1;
cmsis_nn_context ctx;
ctx.buf = nullptr;
ctx.size = 0;
if (data.buffer_idx > -1) {
ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
}
TFLITE_DCHECK_EQ(
arm_avgpool_s8(&ctx, &pool_params, &input_dims,
tflite::micro::GetTensorData<int8_t>(input),
&filter_dims, &output_dims,
tflite::micro::GetTensorData<int8_t>(output)),
ARM_MATH_SUCCESS);
}
}
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpData& data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
float activation_min, activation_max;
CalculateActivationRange(params->activation, &activation_min,
&activation_max);
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data.padding.height;
op_params.padding_values.width = data.padding.width;
op_params.float_activation_min = activation_min;
op_params.float_activation_max = activation_max;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpData& data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data.padding.height;
op_params.padding_values.width = data.padding.width;
op_params.quantized_activation_min = data.activation_min;
op_params.quantized_activation_max = data.activation_max;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData& data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
cmsis_nn_dims input_dims;
input_dims.n = 1;
input_dims.h = input_shape.Dims(1);
input_dims.w = input_shape.Dims(2);
input_dims.c = depth;
cmsis_nn_dims output_dims;
output_dims.n = 1;
output_dims.h = output_shape.Dims(1);
output_dims.w = output_shape.Dims(2);
output_dims.c = depth;
cmsis_nn_pool_params pool_params;
pool_params.stride.h = params->stride_height;
pool_params.stride.w = params->stride_width;
pool_params.padding.h = data.padding.height;
pool_params.padding.w = data.padding.width;
pool_params.activation.min = data.activation_min;
pool_params.activation.max = data.activation_max;
cmsis_nn_dims filter_dims;
filter_dims.n = 1;
filter_dims.h = params->filter_height;
filter_dims.w = params->filter_width;
filter_dims.c = 1;
cmsis_nn_context ctx;
ctx.buf = nullptr;
ctx.size = 0;
if (data.buffer_idx > -1) {
ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
}
TFLITE_DCHECK_EQ(
arm_max_pool_s8(&ctx, &pool_params, &input_dims,
tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
&output_dims,
tflite::micro::GetTensorData<int8_t>(output)),
ARM_MATH_SUCCESS);
return kTfLiteOk;
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus MaxPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
return kTfLiteOk;
}
TfLiteStatus AveragePrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
if (input->type == kTfLiteInt8) {
RuntimeShape input_shape = GetTensorShape(input);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
RuntimeShape output_shape = GetTensorShape(output);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int output_width = output_shape.Dims(2);
const int32_t buffer_size =
arm_avgpool_s8_get_buffer_size(output_width, depth);
if (buffer_size > 0) {
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, buffer_size, &data->buffer_idx));
} else {
data->buffer_idx = -1;
}
}
return kTfLiteOk;
}
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AverageEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
AverageEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32:
MaxEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
MaxEvalQuantizedUInt8(context, node, params, data, input, output);
break;
case kTfLiteInt8:
MaxEvalInt8(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace pooling
TfLiteRegistration Register_AVERAGE_POOL_2D() {
return {/*init=*/pooling::Init,
/*free=*/nullptr,
/*prepare=*/pooling::AveragePrepare,
/*invoke=*/pooling::AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_MAX_POOL_2D() {
return {/*init=*/pooling::Init,
/*free=*/nullptr,
/*prepare=*/pooling::MaxPrepare,
/*invoke=*/pooling::MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,175 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
#include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
const TfLiteSoftmaxParams* params,
SoftmaxParams* op_data) {
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
if (input->type == kTfLiteUInt8) {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
// NOTE: Current int16_t softmax output does not require symmetric
// scaling
// - so no need to verify scale here.
} else {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
}
}
static const int kScaledDiffIntegerBits = 5;
int input_left_shift;
tflite::PreprocessSoftmaxScaling(
static_cast<double>(params->beta),
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
&op_data->input_multiplier, &input_left_shift);
op_data->input_left_shift = input_left_shift;
op_data->diff_min =
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
op_data->input_left_shift);
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
op_data->beta = static_cast<double>(params->beta);
}
return kTfLiteOk;
}
} // namespace
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
}
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
return CalculateSoftmaxParams(context, input, output, params, data);
}
// Takes a tensor and performs softmax along the last dimension.
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
const SoftmaxParams& op_data) {
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
const SoftmaxParams& op_data) {
const auto input_shape = tflite::micro::GetTensorShape(input);
const auto output_shape = tflite::micro::GetTensorShape(output);
if (input->type == kTfLiteUInt8) {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
if (output->type == kTfLiteInt16) {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
arm_softmax_s8(tflite::micro::GetTensorData<int8_t>(input), outer_size,
depth, op_data.input_multiplier, op_data.input_left_shift,
op_data.diff_min,
tflite::micro::GetTensorData<int8_t>(output));
}
}
}
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
const SoftmaxParams& data =
*(static_cast<const SoftmaxParams*>(node->user_data));
switch (input->type) {
case kTfLiteFloat32: {
SoftmaxFloat(input, output, data);
return kTfLiteOk;
}
case kTfLiteInt8:
case kTfLiteUInt8: {
SoftmaxQuantized(input, output, data);
return kTfLiteOk;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
}
} // namespace activations
TfLiteRegistration Register_SOFTMAX() {
return {/*init=*/activations::SoftmaxInit,
/*free=*/nullptr,
/*prepare=*/activations::SoftmaxPrepare,
/*invoke=*/activations::SoftmaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,722 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace comparisons {
namespace {
struct OpData {
ComparisonParams params;
};
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteBool:
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data);
break;
case kTfLiteFloat32:
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::EqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::EqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
// TODO(renjieliu): Refactor the logic to avoid duplications.
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteBool:
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data);
break;
case kTfLiteFloat32:
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::GreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::GreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::GreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
requires_broadcast
? reference_ops::Broadcast4DSlowLessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::LessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
requires_broadcast
? reference_ops::Broadcast4DSlowLessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::LessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
requires_broadcast
? reference_ops::Broadcast4DSlowLessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::LessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowLessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::LessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowLessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::LessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::LessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
auto input1_offset = -input1->params.zero_point;
auto input2_offset = -input2->params.zero_point;
const int kLeftShift = 8;
int32_t input1_multiplier;
int input1_shift;
QuantizeMultiplierSmallerThanOneExp(
static_cast<double>(input1->params.scale), &input1_multiplier,
&input1_shift);
int32_t input2_multiplier;
int input2_shift;
QuantizeMultiplierSmallerThanOneExp(
static_cast<double>(input2->params.scale), &input2_multiplier,
&input2_shift);
data->params.left_shift = kLeftShift;
data->params.input1_offset = input1_offset;
data->params.input1_multiplier = input1_multiplier;
data->params.input1_shift = input1_shift;
data->params.input2_offset = input2_offset;
data->params.input2_multiplier = input2_multiplier;
data->params.input2_shift = input2_shift;
}
return kTfLiteOk;
}
} // namespace comparisons
TfLiteRegistration Register_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::EqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_NOT_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::NotEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_GREATER() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::GreaterEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_GREATER_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::GreaterEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_LESS() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::LessEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_LESS_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::LessEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,264 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace concatenation {
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
constexpr int kOutputTensor = 0;
struct OpData {
ConcatenationParams params;
};
// Handles negative axis index, coerces to positive index value.
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
if (axis >= 0) {
return axis;
} else {
return NumDimensions(output_tensor) + axis;
}
}
// The following functions are helpers to get tensor data in the format that the
// reference op implementation expects. They provide the same functionality as
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
// Gets shapes from a list of tensors.
inline void GetAllInputTensorShapes(const TfLiteContext* context,
const TfLiteNode* node,
RuntimeShape all_shapes[kMaxInputNum]) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
RuntimeShape shape = tflite::micro::GetTensorShape(t);
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
}
}
// Get shape pointers from a list of shapes.
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
const RuntimeShape* pointers[]) {
for (size_t i = 0; i < num; ++i) {
pointers[i] = &shapes[i];
}
}
// Gets data pointers from a list of tensors.
template <typename T>
inline void GetAllInputTensorData(const TfLiteContext* context,
const TfLiteNode* node,
T* all_data[kMaxInputNum]) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
all_data[i] = tflite::micro::GetTensorData<T>(t);
}
}
template <typename data_type>
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const data_type* inputs_data[kMaxInputNum];
GetAllInputTensorShapes(context, node, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllInputTensorData(context, node, inputs_data);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<data_type>(output));
}
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const uint8_t* inputs_data[kMaxInputNum];
GetAllInputTensorShapes(context, node, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllInputTensorData(context, node, inputs_data);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
reference_ops::ConcatenationWithScaling(
data->params, inputs_shape_ptr, inputs_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// This function only checks the types. Additional shape validations are
// performed in the reference implementation called during Eval().
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
TfLiteType input_type = GetInput(context, node, 0)->type;
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
// Check activation and input type
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
TF_LITE_ENSURE(context,
input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
input_type == kTfLiteInt64);
// Output type must match input type
TF_LITE_ENSURE_EQ(context, output_type, input_type);
// This implementation does not support large number of input tensors
const int num_inputs = NumInputs(node);
TF_LITE_ENSURE(context, num_inputs <= kMaxInputNum);
// Shapes with dimensions >4 are not yet supported with static allocation.
for (int i = 0; i < num_inputs; ++i) {
const TfLiteTensor* input = GetInput(context, node, i);
int num_dimensions = NumDimensions(input);
if (num_dimensions > 4) {
TF_LITE_KERNEL_LOG(context,
"Op Concatenation does not currently support num dimensions >4 "
"Tensor has %d dimensions.",
num_dimensions);
return kTfLiteError;
}
}
// Calculate OpData.
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
switch (output_type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
case kTfLiteInt32:
case kTfLiteInt64: {
data->params.axis = CalculatePositiveAxis(params->axis, output);
data->params.inputs_count = node->inputs->size;
break;
}
case kTfLiteUInt8:
case kTfLiteInt8: {
data->params.axis = CalculatePositiveAxis(params->axis, output);
data->params.inputs_count = node->inputs->size;
float* input_scales =
reinterpret_cast<float*>(context->AllocatePersistentBuffer(
context, node->inputs->size * sizeof(float)));
int32_t* input_zero_points =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, node->inputs->size * sizeof(int32_t)));
// Allocate persistent scale and zeropoint buffers.
// Store input scale and zero point values in OpParams:
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteTensor* t = GetInput(context, node, i);
input_scales[i] = t->params.scale;
input_zero_points[i] = t->params.zero_point;
}
data->params.input_scale = input_scales;
data->params.input_zeropoint = input_zero_points;
data->params.output_zeropoint = output->params.zero_point;
data->params.output_scale = output->params.scale;
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Op Concatenation does not currently support Type '%s'.",
TfLiteTypeGetName(output_type));
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
switch (output_type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
EvalUnquantized<float>(context, node);
break;
case kTfLiteInt32:
EvalUnquantized<int32_t>(context, node);
break;
case kTfLiteUInt8:
EvalQuantizedUInt8(context, node);
break;
case kTfLiteInt8:
EvalUnquantized<int8_t>(context, node);
break;
case kTfLiteInt64:
EvalUnquantized<int64_t>(context, node);
break;
default:
TF_LITE_KERNEL_LOG(context, "Op Concatenation does not currently support Type '%s'.",
TfLiteTypeGetName(output_type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace concatenation
TfLiteRegistration Register_CONCATENATION() {
return {/*init=*/concatenation::Init,
/*free=*/nullptr,
/*prepare=*/concatenation::Prepare,
/*invoke=*/concatenation::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,334 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace conv {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
// Conv is quantized along dimension 0:
// https://www.tensorflow.org/lite/performance/quantization_spec
constexpr int kConvQuantizedDimension = 0;
// This file has 2 implementation of Conv.
struct OpData {
TfLitePaddingValues padding;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
int32_t* per_channel_output_multiplier;
int32_t* per_channel_output_shift;
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
};
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
switch (padding) {
case TfLitePadding::kTfLitePaddingSame:
return PaddingType::kSame;
case TfLitePadding::kTfLitePaddingValid:
return PaddingType::kValid;
case TfLitePadding::kTfLitePaddingUnknown:
default:
return PaddingType::kNone;
}
}
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
const TfLiteConvParams* params, int width,
int height, int filter_width, int filter_height,
int out_width, int out_height,
const TfLiteType data_type, OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Matching GetWindowedOutputSize in TensorFlow.
auto padding = params->padding;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
params->dilation_height_factor, params->dilation_width_factor, height,
width, filter_height, filter_width, padding, &out_height, &out_width);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
int output_channels = filter->dims->data[kConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params->activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
output_channels));
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
int input_width = input->dims->data[2];
int input_height = input->dims->data[1];
int filter_width = filter->dims->data[2];
int filter_height = filter->dims->data[1];
int output_width = output->dims->data[2];
int output_height = output->dims->data[1];
// Dynimically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kConvQuantizedDimension];
data->per_channel_output_multiplier =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->per_channel_output_shift =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->zero_point);
TF_LITE_ENSURE(context,
affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
return kTfLiteOk;
} // namespace conv
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
// TODO(b/154032858): Investigate removing extra copies.
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data.output_multiplier;
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output),
tflite::micro::GetTensorShape(im2col),
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output,
TfLiteEvalTensor* im2col) {
// TODO(b/154032858): Investigate removing extra copies.
ConvParams op_params;
op_params.input_offset = -data.input_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.padding_values.height = data.padding.height;
op_params.padding_values.width = data.padding.width;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
reference_integer_ops::ConvPerChannel(
op_params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
// TODO(b/154032858): Investigate removing extra copies.
ConvParams op_params;
op_params.padding_type = RuntimePaddingType(params->padding);
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(im2col),
tflite::micro::GetTensorData<float>(im2col));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFilterTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
nullptr, output);
break;
case kTfLiteInt8:
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
output, nullptr);
break;
case kTfLiteUInt8:
EvalQuantized(context, node, params, data, input, filter, bias, nullptr,
nullptr, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace conv
TfLiteRegistration Register_CONV_2D() {
return {/*init=*/conv::Init,
/*free=*/nullptr,
/*prepare=*/conv::Prepare,
/*invoke=*/conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,327 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace depthwise_conv {
namespace {
constexpr int kInputTensor = 0;
constexpr int kFilterTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
// Depthwise conv is quantized along dimension 3:
// https://www.tensorflow.org/lite/performance/quantization_spec
constexpr int kDepthwiseConvQuantizedDimension = 3;
struct OpData {
TfLitePaddingValues padding;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
int32_t* per_channel_output_multiplier;
int32_t* per_channel_output_shift;
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, int width,
int height, int filter_width, int filter_height,
const TfLiteType data_type, OpData* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
int unused_output_height, unused_output_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width, 1, 1, height, width,
filter_height, filter_width, params->padding, &unused_output_height,
&unused_output_width);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
return tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params->activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
}
return kTfLiteOk;
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params =
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
OpData* data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteType data_type = input->type;
int width = SizeOfDimension(input, 2);
int height = SizeOfDimension(input, 1);
int filter_width = SizeOfDimension(filter, 2);
int filter_height = SizeOfDimension(filter, 1);
// Per channel quantization is only needed for int8_t inference. For other
// quantized types, only a single scale and zero point is needed.
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
// Dynimically allocate per-channel quantization parameters.
data->per_channel_output_multiplier =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->per_channel_output_shift =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(
filter->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->zero_point);
TF_LITE_ENSURE(
context, affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kDepthwiseConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
filter_width, filter_height, data_type,
data));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
return kTfLiteOk;
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, const OpData& data,
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::DepthwiseConv(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params,
const OpData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
DepthwiseParams op_params;
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.input_offset = -data.input_zero_point;
op_params.weights_offset = 0;
op_params.output_offset = data.output_zero_point;
// TODO(b/130439627): Use calculated value for clamping.
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
reference_integer_ops::DepthwiseConvPerChannel(
op_params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data.output_multiplier;
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data.output_shift;
tflite::reference_ops::DepthwiseConv(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params =
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFilterTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
// TODO(aselle): Consider whether float conv and quantized conv should be
// separate ops to avoid dispatch overhead here.
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, data, input, filter, bias, output);
break;
case kTfLiteInt8:
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
output);
break;
case kTfLiteUInt8:
EvalQuantized(context, node, params, data, input, filter, bias, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace depthwise_conv
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
return {/*init=*/depthwise_conv::Init,
/*free=*/nullptr,
/*prepare=*/depthwise_conv::Prepare,
/*invoke=*/depthwise_conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,164 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace dequantize {
struct OpData {
tflite::DequantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
int32_t output_zero_point;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
input->type == kTfLiteInt8 ||
input->type == kTfLiteInt16);
TF_LITE_ENSURE(
context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
if (output->type == kTfLiteInt32) {
const double effective_output_scale =
static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
&data->output_shift);
}
data->quantization_params.zero_point = input->params.zero_point;
data->quantization_params.scale = static_cast<double>(input->params.scale);
data->output_zero_point = output->params.zero_point;
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (output->type == kTfLiteFloat32) {
switch (input->type) {
case kTfLiteUInt8:
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt16:
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (output->type == kTfLiteInt32) {
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output));
switch (input->type) {
case kTfLiteInt16: {
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), flat_size,
data->output_multiplier, data->output_shift,
data->quantization_params.zero_point, data->output_zero_point,
tflite::micro::GetTensorData<int32_t>(output));
break;
}
case kTfLiteInt8: {
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), flat_size,
data->output_multiplier, data->output_shift,
data->quantization_params.zero_point, data->output_zero_point,
tflite::micro::GetTensorData<int32_t>(output));
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace dequantize
TfLiteRegistration Register_DEQUANTIZE() {
return {/*init=*/dequantize::Init,
/*free=*/nullptr,
/*prepare=*/dequantize::Prepare,
/*invoke=*/dequantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,212 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cmath>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace elementwise {
namespace {
bool IsNumericSupportedType(const TfLiteType type) {
return type == kTfLiteFloat32;
}
bool IsLogicalSupportedType(const TfLiteType type) {
return type == kTfLiteBool;
}
typedef bool (*IsSupportedType)(TfLiteType);
template <IsSupportedType>
TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (!IsSupportedType(input->type)) {
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
template <typename T>
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
T func(T), TfLiteType expected_type) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type);
const size_t num_elements = ElementCount(*input->dims);
const T* in_data = tflite::micro::GetTensorData<T>(input);
T* out_data = tflite::micro::GetTensorData<T>(output);
for (size_t i = 0; i < num_elements; ++i) {
out_data[i] = func(in_data[i]);
}
return kTfLiteOk;
}
inline TfLiteStatus EvalNumeric(TfLiteContext* context, TfLiteNode* node,
float float_func(float)) {
return EvalImpl<float>(context, node, float_func, kTfLiteFloat32);
}
inline TfLiteStatus EvalLogical(TfLiteContext* context, TfLiteNode* node,
bool bool_func(bool)) {
return EvalImpl<bool>(context, node, bool_func, kTfLiteBool);
}
TfLiteStatus AbsEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::abs);
}
TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::sin);
}
TfLiteStatus CosEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::cos);
}
TfLiteStatus LogEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::log);
}
TfLiteStatus SqrtEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::sqrt);
}
TfLiteStatus RsqrtEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, [](float f) { return 1.f / std::sqrt(f); });
}
TfLiteStatus SquareEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, [](float f) { return f * f; });
}
TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
return EvalLogical(context, node, [](bool v) { return !v; });
}
} // namespace
} // namespace elementwise
TfLiteRegistration Register_ABS() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::AbsEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_SIN() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_COS() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::CosEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_LOG() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::LogEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_SQRT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_RSQRT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::RsqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_SQUARE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SquareEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_LOGICAL_NOT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
/*invoke=*/elementwise::LogicalNotEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,32 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
//
// This is a stub file for non-Ethos platforms
//
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace ops {
namespace micro {
namespace custom {
TfLiteRegistration* Register_ETHOSU() { return nullptr; }
const char* GetString_ETHOSU() { return ""; }
} // namespace custom
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,57 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/floor.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace floor {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Floor(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace floor
TfLiteRegistration Register_FLOOR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/floor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,256 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace fully_connected {
namespace {
struct OpData {
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
// The index of the temporary tensor where the quantized inputs are cached.
int input_quantized_index;
// Cached zero point values of tensors.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
};
constexpr int kInputTensor = 0;
constexpr int kWeightsTensor = 1;
constexpr int kBiasTensor = 2;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context,
TfLiteFusedActivation activation,
TfLiteType data_type, const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
OpData* data) {
TfLiteStatus status = kTfLiteOk;
if (data_type != kTfLiteFloat32) {
double real_multiplier = 0.0;
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
context, input, filter, bias, output, &real_multiplier));
int exponent;
QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
data->output_shift = -exponent;
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, activation, output, &data->output_activation_min,
&data->output_activation_max));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
}
return status;
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const auto params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
return CalculateOpData(context, params->activation, input->type, input,
filter, bias, output, data);
}
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
tflite::FullyConnectedParams op_params;
op_params.input_offset = -data.input_zero_point;
op_params.weights_offset = -data.filter_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.output_multiplier = data.output_multiplier;
// TODO(b/138810107): Figure out whether output shift should be inverted
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
reference_integer_ops::FullyConnected(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
const OpData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
tflite::FullyConnectedParams op_params;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data.output_multiplier;
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
reference_ops::FullyConnected( \
op_params, tflite::micro::GetTensorShape(input), \
tflite::micro::GetTensorData<uint8_t>(input), \
tflite::micro::GetTensorShape(filter), \
tflite::micro::GetTensorData<uint8_t>(filter), \
tflite::micro::GetTensorShape(bias), \
tflite::micro::GetTensorData<int32_t>(bias), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<output_data_type>(output))
switch (output->type) {
case kTfLiteUInt8:
TF_LITE_FULLY_CONNECTED(uint8_t);
break;
case kTfLiteInt16:
TF_LITE_FULLY_CONNECTED(int16_t);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteFusedActivation activation,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(activation, &output_activation_min,
&output_activation_max);
tflite::FullyConnectedParams op_params;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::FullyConnected(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto* params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
const TfLiteEvalTensor* bias =
tflite::micro::GetEvalInput(context, node, kBiasTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
// Checks in Prepare ensure input, output and filter types are all the same.
switch (input->type) {
case kTfLiteFloat32:
return EvalFloat(context, node, params->activation, input, filter, bias,
output);
case kTfLiteInt8:
return EvalQuantizedInt8(context, node, data, input, filter, bias,
output);
case kTfLiteUInt8:
return EvalQuantized(context, node, data, input, filter, bias, output);
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace fully_connected
TfLiteRegistration Register_FULLY_CONNECTED() {
return {/*init=*/fully_connected::Init,
/*free=*/nullptr,
/*prepare=*/fully_connected::Prepare,
/*invoke=*/fully_connected::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,140 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace hard_swish {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
}
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
params->input_zero_point = input->params.zero_point;
params->output_zero_point = output->params.zero_point;
const float input_scale = input->params.scale;
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
const float reluish_scale = 3.0f / 32768.0f;
const float output_scale = output->params.scale;
const double output_multiplier =
static_cast<double>(hires_input_scale / output_scale);
int32_t output_multiplier_fixedpoint_int32;
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
&params->output_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
output_multiplier_fixedpoint_int32,
&params->output_multiplier_fixedpoint_int16);
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
const double reluish_multiplier =
static_cast<double>(hires_input_scale / reluish_scale);
int32_t reluish_multiplier_fixedpoint_int32;
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_fixedpoint_int16);
}
return kTfLiteOk;
}
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
switch (input->type) {
case kTfLiteFloat32: {
tflite::reference_ops::HardSwish<float>(
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} break;
case kTfLiteUInt8: {
tflite::reference_ops::HardSwish<uint8_t>(
*params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} break;
case kTfLiteInt8: {
tflite::reference_ops::HardSwish<int8_t>(
*params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} break;
default: {
TF_LITE_KERNEL_LOG(
context,
"Only float32/int8_t/uint8_t are supported currently, got %s",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
return kTfLiteOk;
}
} // namespace hard_swish
TfLiteRegistration Register_HARD_SWISH() {
return {/*init=*/hard_swish::HardSwishInit,
/*free=*/nullptr,
/*prepare=*/hard_swish::HardSwishPrepare,
/*invoke=*/hard_swish::HardSwishEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,165 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
namespace tflite {
namespace micro {
namespace {
constexpr size_t kBufferAlignment = 16;
} // namespace
// TODO(b/161841696): Consider moving away from global arena buffers:
constexpr int KernelRunner::kNumScratchBuffers_;
constexpr int KernelRunner::kKernelRunnerBufferSize_;
uint8_t KernelRunner::kKernelRunnerBuffer_[];
KernelRunner::KernelRunner(const TfLiteRegistration& registration,
TfLiteTensor* tensors, int tensors_size,
TfLiteIntArray* inputs, TfLiteIntArray* outputs,
void* builtin_data, ErrorReporter* error_reporter)
: allocator_(SimpleMemoryAllocator::Create(
error_reporter, kKernelRunnerBuffer_, kKernelRunnerBufferSize_)),
registration_(registration),
tensors_(tensors),
error_reporter_(error_reporter) {
// Prepare TfLiteContext:
context_.impl_ = static_cast<void*>(this);
context_.ReportError = ReportOpError;
context_.recommended_num_threads = 1;
context_.GetTensor = GetTensor;
context_.GetEvalTensor = GetEvalTensor;
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
context_.GetScratchBuffer = GetScratchBuffer;
// Prepare TfLiteNode:
node_.inputs = inputs;
node_.outputs = outputs;
node_.builtin_data = builtin_data;
}
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data) {
if (registration_.init) {
node_.user_data = registration_.init(&context_, init_data, /*length=*/0);
}
if (registration_.prepare) {
TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
}
return kTfLiteOk;
}
TfLiteStatus KernelRunner::Invoke() {
if (registration_.invoke == nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"TfLiteRegistration missing invoke function pointer!");
return kTfLiteError;
}
return registration_.invoke(&context_, &node_);
}
TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context,
int tensor_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
return &runner->tensors_[tensor_index];
}
TfLiteEvalTensor* KernelRunner::GetEvalTensor(
const struct TfLiteContext* context, int tensor_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TfLiteEvalTensor* eval_tensor =
reinterpret_cast<TfLiteEvalTensor*>(runner->allocator_->AllocateTemp(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
TFLITE_DCHECK(eval_tensor != nullptr);
// In unit tests, the TfLiteTensor pointer contains the source of truth for
// buffers and values:
eval_tensor->data = runner->tensors_[tensor_index].data;
eval_tensor->dims = runner->tensors_[tensor_index].dims;
eval_tensor->type = runner->tensors_[tensor_index].type;
return eval_tensor;
}
void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context,
size_t bytes) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
return runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
}
TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
size_t bytes,
int* buffer_index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(buffer_index != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
TF_LITE_REPORT_ERROR(
runner->error_reporter_,
"Exceeded the maximum number of scratch tensors allowed (%d).",
kNumScratchBuffers_);
return kTfLiteError;
}
// For tests, we allocate scratch buffers from the tail and keep them around
// for the lifetime of model. This means that the arena size in the tests will
// be more than what we would have if the scratch buffers could share memory.
runner->scratch_buffers_[runner->scratch_buffer_count_] =
runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] !=
nullptr);
*buffer_index = runner->scratch_buffer_count_++;
return kTfLiteOk;
}
void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_);
if (buffer_index >= runner->scratch_buffer_count_) {
return nullptr;
}
return runner->scratch_buffers_[buffer_index];
}
void KernelRunner::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(runner->error_reporter_, format, args);
va_end(args);
}
} // namespace micro
} // namespace tflite

View File

@@ -0,0 +1,83 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
namespace micro {
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
// (init, prepare, invoke). All internal allocations are handled by this class.
// Simply pass in the registration, list of required tensors, inputs array,
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
// walk the kernl and outputs will be ready on the the TfLiteTensor output
// provided during construction.
class KernelRunner {
public:
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
int tensors_size, TfLiteIntArray* inputs,
TfLiteIntArray* outputs, void* builtin_data,
ErrorReporter* error_reporter);
// Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
// exceptions will be reported through the error_reporter and returned as a
// status code here.
TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
// Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
// After successful invoke, results will be available in the output tensor as
// passed into the constructor of this class.
TfLiteStatus Invoke();
protected:
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_index);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_index);
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
size_t bytes,
int* buffer_index);
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
private:
static constexpr int kNumScratchBuffers_ = 5;
static constexpr int kKernelRunnerBufferSize_ = 10000;
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
SimpleMemoryAllocator* allocator_ = nullptr;
const TfLiteRegistration& registration_;
TfLiteTensor* tensors_ = nullptr;
ErrorReporter* error_reporter_ = nullptr;
TfLiteContext context_ = {};
TfLiteNode node_ = {};
int scratch_buffer_count_ = 0;
uint8_t* scratch_buffers_[kNumScratchBuffers_];
};
} // namespace micro
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_

View File

@@ -0,0 +1,31 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace micro {
bool HaveSameShapes(const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2) {
TFLITE_DCHECK(input1 != nullptr);
TFLITE_DCHECK(input2 != nullptr);
return TfLiteIntArrayEqual(input1->dims, input2->dims);
}
} // namespace micro
} // namespace tflite

View File

@@ -0,0 +1,83 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace micro {
// Returns a mutable tensor for a given input index. is_variable must be checked
// during prepare when the full TfLiteTensor is available.
inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
const TfLiteNode* node,
int index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
return context->GetEvalTensor(context, node->inputs->data[index]);
}
// Returns the TfLiteEvalTensor struct for a given input index in a node.
inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
return GetMutableEvalInput(context, node, index);
}
// Returns the TfLiteEvalTensor struct for a given output index in a node.
inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
return context->GetEvalTensor(context, node->outputs->data[index]);
}
// Returns data for a TfLiteEvalTensor struct.
template <typename T>
T* GetTensorData(TfLiteEvalTensor* tensor) {
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
}
// Returns const data for a TfLiteEvalTensor struct.
template <typename T>
const T* GetTensorData(const TfLiteEvalTensor* tensor) {
TFLITE_DCHECK(tensor != nullptr);
return reinterpret_cast<const T*>(tensor->data.raw);
}
// Returns the shape of a TfLiteEvalTensor struct.
inline const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
if (tensor == nullptr) {
return RuntimeShape();
}
TfLiteIntArray* dims = tensor->dims;
const int dims_size = dims->size;
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
return RuntimeShape(dims_size, dims_data);
}
// Return true if the given tensors have the same shape.
bool HaveSameShapes(const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2);
} // namespace micro
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_

View File

@@ -0,0 +1,155 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace l2norm {
namespace {
// This file has two implementation of L2Norm.
enum KernelType {
kReference,
kGenericOptimized,
};
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
} // namespace
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
L2NormalizationParams* data =
static_cast<L2NormalizationParams*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
output->type == kTfLiteUInt8 ||
output->type == kTfLiteInt8);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
data->input_zero_point = input->params.zero_point;
} else if (output->type == kTfLiteFloat32) {
data->input_zero_point = 0;
}
// TODO(ahentz): For some reason our implementations don't support
// activations.
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context,
sizeof(L2NormalizationParams));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const L2NormalizationParams& data =
*(static_cast<const L2NormalizationParams*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
// from tensorflow, i.e., adding a params.
// We don't compute epsilon for quantized kernel:
//
// epsilon_float = (epsilon_quant - zp) * scale
// so
// espsilon_quant = epsilon_float / scale + zp
// We know epsilon_float is just a very small number to avoid division by
// zero error, and scale is > 1, so the integer value of epsilon for quant
// is just dominated by the zero point.
// Also, GetInvSqrtQuantizedMultiplierExp handles the scenario where the sum
// of input value squared is zero case well.
// So we don't even need to do handle the epsilon for quantized kernel case.
const float epsilon = 1e-6f;
if (output->type == kTfLiteFloat32) {
reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
epsilon);
} else if (output->type == kTfLiteUInt8) {
reference_ops::L2Normalization(
data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else if (output->type == kTfLiteInt8) {
const auto input_shape = tflite::micro::GetTensorShape(input);
const auto output_shape = tflite::micro::GetTensorShape(output);
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
reference_integer_ops::L2Normalization(
data.input_zero_point, outer_size, depth,
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace l2norm
TfLiteRegistration Register_L2NORM_REF() {
return {/*init=*/l2norm::Init,
/*free=*/nullptr,
/*prepare=*/l2norm::Prepare,
/*invoke=*/l2norm::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,105 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace logical {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool)) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (tflite::micro::HaveSameShapes(input1, input2)) {
reference_ops::BinaryFunction<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
} else {
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
}
return kTfLiteOk;
}
bool LogicalOr(bool x, bool y) { return x || y; }
TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalOr);
}
bool LogicalAnd(bool x, bool y) { return x && y; }
TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalAnd);
}
} // namespace
} // namespace logical
TfLiteRegistration Register_LOGICAL_OR() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
// TfLiteRegistration.
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalOrEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_LOGICAL_AND() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
// TfLiteRegistration.
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalAndEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,148 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
OpData* data) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
std::numeric_limits<int8_t>::min());
static constexpr int kInputIntegerBits = 4;
const double input_real_multiplier =
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
data->input_zero_point = input->params.zero_point;
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
return kTfLiteOk;
}
} // namespace
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
return CalculateArithmeticOpData(context, node, data);
}
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteFloat32: {
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
switch (output->type) {
case kTfLiteInt8: {
reference_integer_ops::Logistic(
data->input_zero_point, data->input_range_radius,
data->input_multiplier, data->input_left_shift,
NumElements(input->dims),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
// TODO(b/141211002): Also support other data types once we have supported
// temporary tensors in TFLM.
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace activations
TfLiteRegistration Register_LOGISTIC() {
return {/*init=*/activations::LogisticInit,
/*free=*/nullptr,
/*prepare=*/activations::LogisticPrepare,
/*invoke=*/activations::LogisticEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,148 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace maximum_minimum {
namespace {
// This file has a reference implementation of TFMaximum/TFMinimum.
enum KernelType {
kReference,
};
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpContext {
OpContext(TfLiteContext* context, TfLiteNode* node) {
input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
}
const TfLiteEvalTensor* input1;
const TfLiteEvalTensor* input2;
TfLiteEvalTensor* output;
};
struct MaximumOp {
template <typename data_type>
static data_type op(data_type el1, data_type el2) {
return el1 > el2 ? el1 : el2;
}
};
struct MinimumOp {
template <typename data_type>
static data_type op(data_type el1, data_type el2) {
return el1 < el2 ? el1 : el2;
}
};
} // namespace
template <typename data_type, typename op_type>
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
const OpContext& op_context) {
reference_ops::MaximumMinimumBroadcastSlow(
tflite::micro::GetTensorShape(op_context.input1),
tflite::micro::GetTensorData<data_type>(op_context.input1),
tflite::micro::GetTensorShape(op_context.input2),
tflite::micro::GetTensorData<data_type>(op_context.input2),
tflite::micro::GetTensorShape(op_context.output),
tflite::micro::GetTensorData<data_type>(op_context.output),
op_type::template op<data_type>);
}
template <KernelType kernel_type, typename OpType>
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
OpContext op_context(context, node);
if (kernel_type == kReference) {
switch (op_context.output->type) {
case kTfLiteFloat32:
TFLiteOperation<float, OpType>(context, node, op_context);
break;
case kTfLiteUInt8:
TFLiteOperation<uint8_t, OpType>(context, node, op_context);
break;
case kTfLiteInt8:
TFLiteOperation<int8_t, OpType>(context, node, op_context);
break;
case kTfLiteInt32:
TFLiteOperation<int32_t, OpType>(context, node, op_context);
break;
case kTfLiteInt64:
TFLiteOperation<int64_t, OpType>(context, node, op_context);
break;
default:
TF_LITE_KERNEL_LOG(context,
"Type %s (%d) is not supported by Maximum/Minimum.",
TfLiteTypeGetName(op_context.output->type),
op_context.output->type);
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context,
"Kernel type not supported by Maximum/Minimum.");
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace maximum_minimum
TfLiteRegistration Register_MAXIMUM() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MaximumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_MINIMUM() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MinimumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,92 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace ops {
namespace micro {
// Forward declaration of all micro op kernel registration methods. These
// registrations are included with the standard `BuiltinOpResolver`.
//
// This header is particularly useful in cases where only a subset of ops are
// needed. In such cases, the client can selectively add only the registrations
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
// registration in turn allows the linker to strip unused kernels.
TfLiteRegistration Register_ABS();
TfLiteRegistration Register_ADD();
TfLiteRegistration Register_ARG_MAX();
TfLiteRegistration Register_ARG_MIN();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_CEIL();
// TODO(b/160234179): Change custom OPs to also return by value.
TfLiteRegistration* Register_CIRCULAR_BUFFER();
TfLiteRegistration Register_CONV_2D();
TfLiteRegistration Register_CONCATENATION();
TfLiteRegistration Register_COS();
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
TfLiteRegistration Register_DEQUANTIZE();
TfLiteRegistration Register_EQUAL();
TfLiteRegistration Register_FLOOR();
TfLiteRegistration Register_FULLY_CONNECTED();
TfLiteRegistration Register_GREATER();
TfLiteRegistration Register_GREATER_EQUAL();
TfLiteRegistration Register_HARD_SWISH();
TfLiteRegistration Register_LESS();
TfLiteRegistration Register_LESS_EQUAL();
TfLiteRegistration Register_LOG();
TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_NOT();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAXIMUM();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_MEAN();
TfLiteRegistration Register_MINIMUM();
TfLiteRegistration Register_MUL();
TfLiteRegistration Register_NEG();
TfLiteRegistration Register_NOT_EQUAL();
TfLiteRegistration Register_PACK();
TfLiteRegistration Register_PAD();
TfLiteRegistration Register_PADV2();
TfLiteRegistration Register_PRELU();
TfLiteRegistration Register_QUANTIZE();
TfLiteRegistration Register_RELU();
TfLiteRegistration Register_RELU6();
TfLiteRegistration Register_RESHAPE();
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
TfLiteRegistration Register_ROUND();
TfLiteRegistration Register_RSQRT();
TfLiteRegistration Register_SIN();
TfLiteRegistration Register_SOFTMAX();
TfLiteRegistration Register_SPLIT();
TfLiteRegistration Register_SQRT();
TfLiteRegistration Register_SQUARE();
TfLiteRegistration Register_STRIDED_SLICE();
TfLiteRegistration Register_SUB();
TfLiteRegistration Register_SVDF();
TfLiteRegistration Register_UNPACK();
TfLiteRegistration Register_L2_NORMALIZATION();
TfLiteRegistration Register_TANH();
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_

View File

@@ -0,0 +1,37 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
namespace tflite {
namespace ops {
namespace micro {
// Same as gtl::Greater but defined here to reduce dependencies and
// binary size for micro environment.
struct Greater {
template <typename T>
bool operator()(const T& x, const T& y) const {
return x > y;
}
};
struct Less {
template <typename T>
bool operator()(const T& x, const T& y) const {
return x < y;
}
};
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_

View File

@@ -0,0 +1,233 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/mul.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace ops {
namespace micro {
namespace mul {
namespace {
constexpr int kInput1Tensor = 0;
constexpr int kInput2Tensor = 1;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t input1_zero_point;
int32_t input2_zero_point;
int32_t output_activation_min;
int32_t output_activation_max;
int32_t output_zero_point;
int32_t output_multiplier;
int output_shift;
float output_activation_min_f32;
float output_activation_max_f32;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data) {
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
double real_multiplier = static_cast<double>(input1->params.scale) *
static_cast<double>(input2->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
data->input1_zero_point = input1->params.zero_point;
data->input2_zero_point = input2->params.zero_point;
data->output_zero_point = output->params.zero_point;
} else {
CalculateActivationRange(params->activation,
&data->output_activation_min_f32,
&data->output_activation_max_f32);
}
return kTfLiteOk;
}
} // namespace
void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.float_activation_max = data->output_activation_max_f32;
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else if (output->type == kTfLiteUInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
}
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, const OpData* data,
const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.float_activation_min = data->output_activation_min_f32;
op_params.float_activation_max = data->output_activation_max_f32;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (need_broadcast) {
reference_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
return CalculateOpData(context, node, params, data);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input1->type) {
case kTfLiteUInt8:
case kTfLiteInt8:
EvalQuantized(context, node, data, input1, input2, output);
break;
case kTfLiteFloat32:
EvalFloat(context, node, params, data, input1, input2, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace mul
TfLiteRegistration Register_MUL() {
return {/*init=*/mul::Init,
/*free=*/nullptr,
/*prepare=*/mul::Prepare,
/*invoke=*/mul::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,66 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/neg.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace neg {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
// TODO(wangtz): handle for kTfLiteInt8
case kTfLiteFloat32:
reference_ops::Negate(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace neg
TfLiteRegistration Register_NEG() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/neg::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,127 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pack {
namespace {
constexpr int kOutputTensor = 0;
template <typename T>
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output, int values_count, int axis) {
const TfLiteEvalTensor* input0 =
tflite::micro::GetEvalInput(context, node, 0);
const int dimensions = output->dims->size;
const TfLiteIntArray* input_dims = input0->dims;
const TfLiteIntArray* output_dims = output->dims;
if (axis < 0) {
axis += dimensions;
}
int outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= output_dims->data[i];
}
int copy_size = 1;
for (int i = axis + 1; i < dimensions; ++i) {
copy_size *= output_dims->data[i];
}
int input_size = 1;
for (int i = 0; i < input_dims->size; ++i) {
input_size *= input_dims->data[i];
}
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
T* output_data = tflite::micro::GetTensorData<T>(output);
for (int i = 0; i < values_count; ++i) {
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
const T* input_data = tflite::micro::GetTensorData<T>(t);
for (int k = 0; k < outer_size; ++k) {
const T* input_ptr = input_data + copy_size * k;
int loc = k * values_count * copy_size + i * copy_size;
T* output_ptr = output_data + loc;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
}
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLitePackParams* data =
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (output->type) {
case kTfLiteFloat32: {
return PackImpl<float>(context, node, output, data->values_count,
data->axis);
}
case kTfLiteUInt8: {
return PackImpl<uint8_t>(context, node, output, data->values_count,
data->axis);
}
case kTfLiteInt8: {
return PackImpl<int8_t>(context, node, output, data->values_count,
data->axis);
}
case kTfLiteInt32: {
return PackImpl<int32_t>(context, node, output, data->values_count,
data->axis);
}
case kTfLiteInt64: {
return PackImpl<int64_t>(context, node, output, data->values_count,
data->axis);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by pack.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
}
return kTfLiteOk;
}
} // namespace
} // namespace pack
TfLiteRegistration Register_PACK() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,251 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/pad.h"
#include <string.h>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pad {
namespace {
struct OpData {
PadParams params;
int32_t output_zero_point;
};
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
const TfLiteTensor* constant_values =
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
// Current implementations rely on the inputs being <= 4D.
TF_LITE_ENSURE(context, NumDimensions(input) <=
reference_ops::PadKernelMaxDimensionCount());
if (constant_values != nullptr) {
TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
// Ensure that constant_values is a scalar.
TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
}
// There must be a pair of paddings for each output dimension.
TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
output->dims->size * 2);
// On Micro, outputs must be properly sized by the converter.
// NOTE: This data is only available because the paddings buffer is stored in
// the flatbuffer:
TF_LITE_ENSURE(context, IsConstantTensor(paddings));
const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
for (int i = 0; i < output->dims->size; i++) {
int output_dim = output->dims->data[i];
int expected_dim =
input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
}
// Calculate OpData:
data->params.resizing_category = ResizingCategory::kGenericResize;
const int paddings_total = GetTensorShape(paddings).FlatSize();
if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
data->params.resizing_category = ResizingCategory::kImageStyle;
}
const int num_input_dimensions = NumDimensions(input);
data->params.left_padding_count = num_input_dimensions;
data->params.right_padding_count = num_input_dimensions;
for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
data->params.left_padding[idx] = paddings_data[idx * 2];
data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
}
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
if (constant_values == nullptr) {
// Quantized Pad requires that 0 is represented in the quantized
// range.
if (input->type == kTfLiteUInt8) {
TF_LITE_ENSURE(context, output->params.zero_point >=
std::numeric_limits<uint8_t>::min());
TF_LITE_ENSURE(context, output->params.zero_point <=
std::numeric_limits<uint8_t>::max());
} else {
TF_LITE_ENSURE(context, output->params.zero_point >=
std::numeric_limits<int8_t>::min());
TF_LITE_ENSURE(context, output->params.zero_point <=
std::numeric_limits<int8_t>::max());
}
} else {
// Quantized Pad requires that 'constant_values' is represented in the
// same quantized range as the input and output tensors.
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
constant_values->params.zero_point);
TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
static_cast<double>(constant_values->params.scale));
}
data->output_zero_point = output->params.zero_point;
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, /*index=*/0);
const TfLiteEvalTensor* constant_values =
NumInputs(node) == 3
? tflite::micro::GetEvalInput(context, node, /*index=*/2)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, /*index=*/0);
switch (input->type) {
case kTfLiteFloat32: {
float pad_value =
constant_values == nullptr
? 0.f
: *tflite::micro::GetTensorData<float>(constant_values);
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
reference_ops::PadImageStyle(
data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input), &pad_value,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
} break;
case kTfLiteUInt8: {
uint8_t pad_value;
if (constant_values == nullptr) {
pad_value = static_cast<uint8_t>(data->output_zero_point);
} else {
pad_value = *tflite::micro::GetTensorData<uint8_t>(constant_values);
}
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
reference_ops::PadImageStyle(
data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input), &pad_value,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
} break;
case kTfLiteInt8: {
int8_t pad_value;
if (constant_values == nullptr) {
pad_value = static_cast<uint8_t>(data->output_zero_point);
} else {
pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
}
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
reference_ops::PadImageStyle(
data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input), &pad_value,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} break;
case kTfLiteInt32: {
int32_t pad_value =
constant_values == nullptr
? 0
: *tflite::micro::GetTensorData<int32_t>(constant_values);
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int32_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int32_t>(output));
} break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
#undef TF_LITE_PAD
return kTfLiteOk;
}
} // namespace pad
TfLiteRegistration Register_PAD() {
return {/*init=*/pad::Init,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
// Also register Pad as PadV2.
TfLiteRegistration Register_PADV2() {
return {/*init=*/pad::Init,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,267 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pooling {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
TfLitePaddingValues padding;
int32_t activation_min;
int32_t activation_max;
float activation_min_f32;
float activation_max_f32;
};
TfLiteStatus CalculateOpData(const TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input,
const TfLiteTensor* output, OpData* data) {
// input: batch, height, width, channel
int height = SizeOfDimension(input, 1);
int width = SizeOfDimension(input, 2);
int out_height, out_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
/*dilation_rate_height=*/1,
/*dilation_rate_width=*/1, height, width, params->filter_height,
params->filter_width, params->padding, &out_height, &out_width);
return kTfLiteOk;
}
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::AveragePool(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::MaxPool(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
} // namespace
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AverageEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
AverageEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32:
MaxEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
MaxEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
if (input->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation, &data->activation_min_f32,
&data->activation_max_f32);
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
CalculateActivationRangeQuantized(context, params->activation, output,
&data->activation_min,
&data->activation_max);
}
return kTfLiteOk;
}
} // namespace pooling
TfLiteRegistration Register_AVERAGE_POOL_2D() {
return {/*init=*/pooling::Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_MAX_POOL_2D() {
return {/*init=*/pooling::Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,166 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
const TfLiteTensor* alpha,
TfLiteTensor* output, PreluParams* params) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 ||
output->type == kTfLiteInt16) {
double real_multiplier_1 = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
double real_multiplier_2 = static_cast<double>(input->params.scale) *
static_cast<double>(alpha->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier_1, &params->output_multiplier_1,
&params->output_shift_1);
QuantizeMultiplier(real_multiplier_2, &params->output_multiplier_2,
&params->output_shift_2);
params->input_offset = -input->params.zero_point;
params->alpha_offset = -alpha->params.zero_point;
params->output_offset = output->params.zero_point;
}
return kTfLiteOk;
}
} // namespace
inline void BroadcastPrelu4DSlowFloat(
const RuntimeShape& unextended_input1_shape, const float* input1_data,
const RuntimeShape& unextended_input2_shape, const float* input2_data,
const RuntimeShape& unextended_output_shape, float* output_data) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
for (int b = 0; b < output_shape.Dims(0); ++b) {
for (int y = 0; y < output_shape.Dims(1); ++y) {
for (int x = 0; x < output_shape.Dims(2); ++x) {
for (int c = 0; c < output_shape.Dims(3); ++c) {
auto out_idx = Offset(output_shape, b, y, x, c);
auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
auto in1_val = input1_data[in1_idx];
auto in2_val = input2_data[in2_idx];
output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
}
}
}
}
}
void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(PreluParams));
}
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
PreluParams* params = static_cast<PreluParams*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* alpha = GetInput(context, node, 1);
TfLiteTensor* output = GetOutput(context, node, 0);
return CalculatePreluParams(input, alpha, output, params);
}
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const PreluParams& params =
*(static_cast<const PreluParams*>(node->user_data));
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
switch (input->type) {
case kTfLiteFloat32: {
BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(alpha),
tflite::micro::GetTensorData<float>(alpha),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteUInt8: {
reference_ops::BroadcastPrelu4DSlow(
params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(alpha),
tflite::micro::GetTensorData<uint8_t>(alpha),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
reference_ops::BroadcastPrelu4DSlow(
params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(alpha),
tflite::micro::GetTensorData<int8_t>(alpha),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
} break;
default:
TF_LITE_KERNEL_LOG(
context, "Only float32 and uint8_t are supported currently, got %d.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
} // namespace activations
TfLiteRegistration Register_PRELU() {
return {/*init=*/activations::PreluInit,
/*free=*/nullptr,
/*prepare=*/activations::PreluPrepare,
/*invoke=*/activations::PreluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,178 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace quantize {
struct OpData {
tflite::QuantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
int32_t input_zero_point;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
// Currently this only support affine per-layer quantization.
TF_LITE_ENSURE_EQ(context, output->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
input->type == kTfLiteInt16 ||
input->type == kTfLiteInt8);
TF_LITE_ENSURE(context,
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
if ((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) &&
output->type == kTfLiteInt8) {
double effective_scale =
static_cast<double>(input->params.scale / output->params.scale);
QuantizeMultiplier(effective_scale, &data->output_multiplier,
&data->output_shift);
}
data->quantization_params.zero_point = output->params.zero_point;
data->quantization_params.scale = static_cast<double>(output->params.scale);
data->input_zero_point = input->params.zero_point;
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteUInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt16) {
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
// Int8 to Int8 requantization, required if the input and output tensors
// have different scales and/or zero points.
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace quantize
// This Op (QUANTIZE) quantizes the input and produces quantized output.
// AffineQuantize takes scale and zero point and quantizes the float value to
// quantized output, in int8_t or uint8_t format.
TfLiteRegistration Register_QUANTIZE() {
return {/*init=*/quantize::Init,
/*free=*/nullptr,
/*prepare=*/quantize::Prepare,
/*invoke=*/quantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,139 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/reduce.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace reduce {
constexpr int kMaxNumberOfAxis = 4;
constexpr int kMaxNumberOfReducedAxis = 2;
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
// Inputs Tensor (dtype depends on quantization):
// [0] = Input
// [1] = Axis
// Outputs Tensor (dtype depends on quantization):
// [0] = Output
// Validate number of inputs and outputs
TF_LITE_ENSURE_EQ(context, node->inputs->size, 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Validate axis type
const TfLiteTensor* axis = GetInput(context, node, 1);
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
return kTfLiteOk;
}
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
return kTfLiteOk;
}
void ResolveAxis(const int* axis_data, int axis_count,
tflite::MeanParams* op_params) {
int i = 0;
for (; i < axis_count; ++i) {
op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
}
for (; i < 4; ++i) {
op_params->axis[i] = 1;
}
op_params->axis_count = axis_count;
}
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TfLiteReducerParams* params =
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
int num_axis = static_cast<int>(ElementCount(*axis->dims));
int temp_index[kMaxNumberOfAxis];
int resolved_axis[kMaxNumberOfReducedAxis];
switch (input->type) {
case kTfLiteFloat32: {
tflite::MeanParams op_params;
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis,
&op_params);
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
// scratch tensor allocation has been implemented in (b/132070898)
bool is_valid_inputs =
(input->dims->size == 4 && op_params.axis_count == 2 &&
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
TF_LITE_ENSURE_MSG(
context, is_valid_inputs == true,
"Number of Input "
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
// TODO(b/139102329): Handle the below special case in the combined
// reference method.
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims) {
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_ENSURE(
context,
reference_ops::Mean(
tflite::micro::GetTensorData<float>(input), input->dims->data,
input->dims->size, tflite::micro::GetTensorData<float>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_index, resolved_axis,
tflite::micro::GetTensorData<float>(output)));
}
} break;
default:
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
TF_LITE_ENSURE_MSG(context, false,
"Currently, only float32 input type "
"is supported.");
}
return kTfLiteOk;
}
} // namespace reduce
TfLiteRegistration Register_MEAN() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reduce::PrepareMeanOrSum,
/*invoke=*/reduce::EvalMean,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,116 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace reshape {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
// Tensorflow's Reshape allows one of the shape components to have the
// special -1 value, meaning it will be calculated automatically based on the
// input. Here we calculate what that dimension should be so that the number
// of output elements in the same as the number of input elements.
int num_input_elements = NumElements(input);
TfLiteIntArray* output_shape = output->dims;
if (NumInputs(node) == 1 && // Legacy scalar supported with params.
output_shape->size == 1 && output_shape->data[0] == 0) {
// Legacy tflite models use a shape parameter of [0] to indicate scalars,
// so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during
// toco conversion.
output_shape->size = 0;
}
int num_output_elements = 1;
int stretch_dim = -1;
for (int i = 0; i < output_shape->size; ++i) {
int value = output_shape->data[i];
if (value == -1) {
TF_LITE_ENSURE_EQ(context, stretch_dim, -1);
stretch_dim = i;
} else {
num_output_elements *= value;
}
}
if (stretch_dim != -1) {
output_shape->data[stretch_dim] = num_input_elements / num_output_elements;
num_output_elements *= output_shape->data[stretch_dim];
}
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// TODO(b/162522304): storing input bytes in OpData increases some models
// significantly, possibly due to alignment issues.
size_t input_bytes;
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
input_bytes *= ElementCount(*input->dims);
// Do nothing for in-place reshape.
if (input->data.raw != output->data.raw) {
// Otherwise perform reshape with copy.
for (size_t i = 0; i < input_bytes; ++i) {
output->data.raw[i] = input->data.raw[i];
}
}
return kTfLiteOk;
}
} // namespace reshape
TfLiteRegistration Register_RESHAPE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reshape::Prepare,
/*invoke=*/reshape::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,123 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace resize_nearest_neighbor {
constexpr int kInputTensor = 0;
constexpr int kSizeTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(DEBUG)
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
// Our current implementations rely on the input being 4D,
// and the size being 1D tensor with exactly 2 elements.
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
TF_LITE_ENSURE_EQ(context, size->dims->data[0], 2);
output->type = input->type;
if (!IsConstantTensor(size)) {
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
return kTfLiteError;
}
#endif
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* size =
tflite::micro::GetEvalInput(context, node, kSizeTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::ResizeNearestNeighborParams op_params;
op_params.align_corners = params->align_corners;
op_params.half_pixel_centers = false;
if (output->type == kTfLiteFloat32) {
reference_ops::ResizeNearestNeighbor(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int32_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int32_t>(output));
} else if (output->type == kTfLiteUInt8) {
reference_ops::ResizeNearestNeighbor(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else if (output->type == kTfLiteInt8) {
reference_ops::ResizeNearestNeighbor(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context,
"Output type is %d, requires float, uint8_t or int8_t.",
output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace resize_nearest_neighbor
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/resize_nearest_neighbor::Prepare,
/*invoke=*/resize_nearest_neighbor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,74 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/round.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace round {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Round(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace round
TfLiteRegistration Register_ROUND() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/round::Prepare,
/*invoke=*/round::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,169 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
const TfLiteSoftmaxParams* params,
SoftmaxParams* op_data) {
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
if (input->type == kTfLiteUInt8) {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
// NOTE: Current int16_t softmax output does not require symmetric
// scaling
// - so no need to verify scale here.
} else {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
}
}
static const int kScaledDiffIntegerBits = 5;
int input_left_shift;
tflite::PreprocessSoftmaxScaling(
static_cast<double>(params->beta),
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
&op_data->input_multiplier, &input_left_shift);
op_data->input_left_shift = input_left_shift;
op_data->diff_min =
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
op_data->input_left_shift);
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
op_data->beta = static_cast<double>(params->beta);
}
return kTfLiteOk;
}
} // namespace
// Takes a tensor and performs softmax along the last dimension.
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
const SoftmaxParams& op_data) {
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
const SoftmaxParams& op_data) {
if (input->type == kTfLiteUInt8) {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
if (output->type == kTfLiteInt16) {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
}
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
}
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
return CalculateSoftmaxParams(context, input, output, params, data);
}
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TFLITE_DCHECK(node->user_data != nullptr);
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
switch (input->type) {
case kTfLiteFloat32: {
SoftmaxFloat(input, output, *data);
return kTfLiteOk;
}
case kTfLiteInt8:
case kTfLiteUInt8: {
SoftmaxQuantized(input, output, *data);
return kTfLiteOk;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
}
} // namespace activations
TfLiteRegistration Register_SOFTMAX() {
return {/*init=*/activations::SoftmaxInit,
/*free=*/nullptr,
/*prepare=*/activations::SoftmaxPrepare,
/*invoke=*/activations::SoftmaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,134 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace split {
template <typename T>
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input, int axis_value) {
const int output_count = NumOutputs(node);
const TfLiteIntArray* input_dims = input->dims;
const TfLiteEvalTensor* output0 =
tflite::micro::GetEvalOutput(context, node, 0);
const TfLiteIntArray* output_dims = output0->dims;
const int split_dimensions = input_dims->size;
int axis = axis_value < 0 ? axis_value + split_dimensions : axis_value;
TFLITE_DCHECK_LT(axis, split_dimensions);
TFLITE_DCHECK_EQ(output_dims->size, split_dimensions);
int64_t split_size = output_dims->data[axis] * output_count;
TFLITE_DCHECK_EQ(split_size, input_dims->data[axis]);
int64_t outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= input_dims->data[i];
}
int64_t base_inner_size = 1;
for (int i = axis + 1; i < split_dimensions; ++i) {
base_inner_size *= input_dims->data[i];
}
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
for (int k = 0; k < outer_size; ++k) {
for (int i = 0; i < output_count; ++i) {
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
T* output_data = tflite::micro::GetTensorData<T>(t);
const int copy_size = output_dims->data[axis] * base_inner_size;
T* output_ptr = output_data + k * copy_size;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
input_ptr += copy_size;
}
}
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* axis = GetInput(context, node, 0);
// Dynamic output tensors are needed if axis tensor is not constant.
// But Micro doesn't support dynamic memory allocation, so we only support
// constant axis tensor for now.
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1);
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
if (axis_value < 0) {
axis_value += input->dims->size;
}
TF_LITE_ENSURE(context, axis_value >= 0);
TF_LITE_ENSURE(context, axis_value < input->dims->size);
switch (input->type) {
case kTfLiteFloat32: {
return SplitImpl<float>(context, node, input, axis_value);
}
case kTfLiteUInt8: {
return SplitImpl<uint8_t>(context, node, input, axis_value);
}
case kTfLiteInt8: {
return SplitImpl<int8_t>(context, node, input, axis_value);
}
case kTfLiteInt16: {
return SplitImpl<int16_t>(context, node, input, axis_value);
}
case kTfLiteInt32: {
return SplitImpl<int32_t>(context, node, input, axis_value);
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s currently not supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
#undef TF_LITE_SPLIT
return kTfLiteOk;
}
} // namespace split
TfLiteRegistration Register_SPLIT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/split::Prepare,
/*invoke=*/split::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,192 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
#include <cmath>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace strided_slice {
constexpr int kInputTensor = 0;
constexpr int kBeginTensor = 1;
constexpr int kEndTensor = 2;
constexpr int kStridesTensor = 3;
constexpr int kOutputTensor = 0;
struct StridedSliceContext {
StridedSliceContext(TfLiteContext* context, TfLiteNode* node) {
params = reinterpret_cast<TfLiteStridedSliceParams*>(node->builtin_data);
input = GetInput(context, node, kInputTensor);
begin = GetInput(context, node, kBeginTensor);
end = GetInput(context, node, kEndTensor);
strides = GetInput(context, node, kStridesTensor);
output = GetOutput(context, node, kOutputTensor);
dims = NumDimensions(input);
}
const TfLiteStridedSliceParams* params;
const TfLiteTensor* input;
const TfLiteTensor* begin;
const TfLiteTensor* end;
const TfLiteTensor* strides;
TfLiteTensor* output;
int dims;
};
// This Op only supports 1-4D cases and since we use the reference 4D
// implementation, the 1-3D tensors are mapped to 4D.
const int kMaxDim = 4;
tflite::StridedSliceParams BuildStridedSliceParams(
StridedSliceContext* op_context) {
tflite::StridedSliceParams op_params;
op_params.start_indices_count = op_context->dims;
op_params.stop_indices_count = op_context->dims;
op_params.strides_count = op_context->dims;
for (int i = 0; i < op_context->dims; ++i) {
op_params.start_indices[i] = GetTensorData<int32_t>(op_context->begin)[i];
op_params.stop_indices[i] = GetTensorData<int32_t>(op_context->end)[i];
op_params.strides[i] = GetTensorData<int32_t>(op_context->strides)[i];
}
op_params.begin_mask = op_context->params->begin_mask;
op_params.ellipsis_mask = 0;
op_params.end_mask = op_context->params->end_mask;
op_params.new_axis_mask = 0;
op_params.shrink_axis_mask = op_context->params->shrink_axis_mask;
return op_params;
}
// Processes the indexing tensors (begin, end and strides) to resize the
// output tensor. This function is callable from both Prepare() and Eval() as
// long as the caller ensures the indexing tensors are present.
TfLiteStatus CheckOutputSize(TfLiteContext* context,
StridedSliceContext* op_context) {
using ::tflite::strided_slice::StartForAxis;
using ::tflite::strided_slice::StopForAxis;
TfLiteIntArray* output_shape = op_context->output->dims;
int shape_size = 0;
auto op_params = BuildStridedSliceParams(op_context);
auto input_shape = GetTensorShape(op_context->input);
for (int idx = 0; idx < op_context->dims; ++idx) {
int32_t stride = GetTensorData<int32_t>(op_context->strides)[idx];
TF_LITE_ENSURE_MSG(context, stride != 0, "stride value has to be non-zero");
int32_t begin = StartForAxis(op_params, input_shape, idx);
int32_t end = StopForAxis(op_params, input_shape, idx, begin);
// When shrinking an axis, the end position does not matter (and can be
// incorrect when negative indexing is used, see Issue #19260). Always use
// begin + 1 to generate a length 1 slice, since begin has
// already been adjusted for negative indices by StartForAxis.
const bool shrink_axis = op_context->params->shrink_axis_mask & (1 << idx);
if (shrink_axis) {
end = begin + 1;
}
// This is valid for both positive and negative strides
int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
dim_shape = dim_shape < 0 ? 0 : dim_shape;
if (!shrink_axis) {
TF_LITE_ENSURE_EQ(context, output_shape->data[shape_size], dim_shape);
shape_size++;
}
}
TF_LITE_ENSURE_EQ(context, output_shape->size, shape_size);
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(StridedSliceParams));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
StridedSliceParams* op_params =
static_cast<StridedSliceParams*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 4);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
StridedSliceContext op_context(context, node);
TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim,
"input dim should not exceed 4");
auto params = BuildStridedSliceParams(&op_context);
memcpy(op_params, &params, sizeof(StridedSliceParams));
return CheckOutputSize(context, &op_context);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const StridedSliceParams& op_params =
*(static_cast<const StridedSliceParams*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (output->type) {
case kTfLiteFloat32:
reference_ops::StridedSlice(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteUInt8:
reference_ops::StridedSlice(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt8:
reference_ops::StridedSlice(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace strided_slice
TfLiteRegistration Register_STRIDED_SLICE() {
return {/*init=*/strided_slice::Init,
/*free=*/nullptr,
/*prepare=*/strided_slice::Prepare,
/*invoke=*/strided_slice::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,253 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/sub.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace sub {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpData {
bool requires_broadcast;
// These fields are used in both the general 8-bit -> 8bit quantized path,
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32_t output_activation_min;
int32_t output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32_t input1_multiplier;
int32_t input2_multiplier;
int32_t output_multiplier;
int output_shift;
int left_shift;
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output,
OpData* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = 20;
const float twice_max_input_scale =
2 * std::max(input1->params.scale, input2->params.scale);
const double real_input1_multiplier =
static_cast<double>(input1->params.scale / twice_max_input_scale);
const double real_input2_multiplier =
static_cast<double>(input2->params.scale / twice_max_input_scale);
const double real_output_multiplier =
static_cast<double>(twice_max_input_scale /
((1 << data->left_shift) * output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
QuantizeMultiplierSmallerThanOneExp(
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, data));
return kTfLiteOk;
}
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
if (data->requires_broadcast) {
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
tflite::reference_ops::SubWithActivation(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
}
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteSubParams* params, const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
if (need_broadcast) {
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
}
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
if (output->type == kTfLiteFloat32) {
EvalSub(context, node, params, &data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace sub
TfLiteRegistration Register_SUB() {
return {/*init=*/sub::Init,
/*free=*/nullptr,
/*prepare=*/sub::Prepare,
/*invoke=*/sub::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,548 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <math.h>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activation_utils.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace svdf {
namespace {
struct OpData {
int32_t effective_scale_1_a;
int32_t effective_scale_2_a;
// b versions of each scale are kept at int since the numbers are just the
// shift value - typically between [-32, 32].
int effective_scale_1_b;
int effective_scale_2_b;
int scratch_tensor_index;
int scratch_output_tensor_index;
// Cached tensor zero point values for quantized operations.
int input_zero_point;
int output_zero_point;
};
/**
* This version of SVDF is specific to TFLite Micro. It contains the following
* differences between the TFLite version:
*
* 1.) Scratch tensor allocation - scratch tensors must be known ahead of time
* for the Micro interpreter.
* 2.) Output dimensions - the TFLite version determines output size and runtime
* and resizes the output tensor. Micro runtime does not support tensor
* resizing.
*/
static inline void ApplyTimeWeightsBiasAndActivation(
int batch_size, int memory_size, int num_filters, int num_units, int rank,
const float* const __restrict__ weights_time_ptr,
const float* const __restrict__ bias_ptr, TfLiteFusedActivation activation,
float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr,
float* const __restrict__ output_ptr) {
// Compute matmul(activation_state, weights_time).
for (int b = 0; b < batch_size; ++b) {
// Perform batched vector dot product:
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
const float* vector1_ptr = weights_time_ptr;
const float* vector2_ptr = state_ptr + b * memory_size * num_filters;
for (int i = 0; i < num_filters; ++i) {
*scratch_ptr_batch = 0.f;
for (int j = 0; j < memory_size; ++j) {
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
}
scratch_ptr_batch++;
}
}
// Initialize output with bias if provided.
if (bias_ptr) {
// VectorBatchVectorAssign
for (int i = 0; i < batch_size; ++i) {
float* output_data = output_ptr + i * num_units;
const float* bias_data = bias_ptr;
for (int j = 0; j < num_units; ++j) {
*output_data++ = *bias_data++;
}
}
} else {
float* output_data = output_ptr;
for (int i = 0; i < batch_size * num_units; ++i) {
*output_data++ = 0.0f;
}
}
// Reduction sum.
for (int b = 0; b < batch_size; ++b) {
float* output_ptr_batch = output_ptr + b * num_units;
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
// Reduction sum vector
for (int i = 0; i < num_units; ++i) {
for (int j = 0; j < rank; j++) {
output_ptr_batch[i] += *scratch_ptr_batch++;
}
}
}
// Apply activation.
for (int b = 0; b < batch_size; ++b) {
float* output_ptr_batch = output_ptr + b * num_units;
for (int i = 0; i < num_units; ++i) {
*output_ptr_batch = ActivationValFloat(activation, *output_ptr_batch);
++output_ptr_batch;
}
}
}
inline void EvalFloatSVDF(
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* weights_feature,
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
const TfLiteSVDFParams* params, int scratch_tensor_index,
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
const int rank = params->rank;
const int batch_size = input->dims->data[0];
const int input_size = input->dims->data[1];
const int num_filters = weights_feature->dims->data[0];
const int num_units = num_filters / rank;
const int memory_size = weights_time->dims->data[1];
const float* weights_feature_ptr =
tflite::micro::GetTensorData<float>(weights_feature);
const float* weights_time_ptr =
tflite::micro::GetTensorData<float>(weights_time);
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
float* scratch_ptr = static_cast<float*>(
context->GetScratchBuffer(context, scratch_tensor_index));
float* output_ptr = tflite::micro::GetTensorData<float>(output);
// Left shift the activation_state.
{
float* new_state_start = state_ptr;
const float* old_state_start = state_ptr + 1;
const float* old_state_end =
state_ptr + batch_size * num_filters * memory_size;
while (old_state_start != old_state_end) {
*new_state_start++ = *old_state_start++;
}
}
// Note: no need to clear the latest activation, matmul is not accumulative.
// Compute conv1d(inputs, weights_feature).
// The activation_state's rightmost column is used to save current cycle
// activation. This is achieved by starting at state_ptr[memory_size - 1] and
// having the stride equal to memory_size.
// Perform batched matrix vector multiply operation:
{
const float* matrix = weights_feature_ptr;
const float* vector = input_ptr;
float* result = &state_ptr[memory_size - 1];
float* result_in_batch = result;
for (int i = 0; i < batch_size; ++i) {
const float* matrix_ptr = matrix;
for (int j = 0; j < num_filters; ++j) {
float dot_prod = 0.0f;
const float* vector_in_batch = vector + i * input_size;
for (int k = 0; k < input_size; ++k) {
dot_prod += *matrix_ptr++ * *vector_in_batch++;
}
*result_in_batch = dot_prod;
result_in_batch += memory_size;
}
}
}
ApplyTimeWeightsBiasAndActivation(
batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr,
bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr);
}
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input_tensor,
const TfLiteEvalTensor* weights_feature_tensor,
const TfLiteEvalTensor* weights_time_tensor,
const TfLiteEvalTensor* bias_tensor,
const TfLiteSVDFParams* params,
TfLiteEvalTensor* activation_state_tensor,
TfLiteEvalTensor* output_tensor, const OpData& data) {
const int n_rank = params->rank;
const int n_batch = input_tensor->dims->data[0];
const int n_input = input_tensor->dims->data[1];
const int n_filter = weights_feature_tensor->dims->data[0];
const int n_unit = n_filter / n_rank;
const int n_memory = weights_time_tensor->dims->data[1];
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
int32_t* scratch_tensor = static_cast<int32_t*>(
context->GetScratchBuffer(context, data.scratch_tensor_index));
int32_t* scratch_output_tensor = static_cast<int32_t*>(
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
// Shift states.
int16_t* const state_ptr =
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
// Left shift the activation_state.
{
int16_t* new_state_start = state_ptr;
const int16_t* old_state_start = state_ptr + 1;
const int16_t* old_state_end = state_ptr + n_batch * n_filter * n_memory;
while (old_state_start != old_state_end) {
*new_state_start++ = *old_state_start++;
}
}
// Note: no need to clear the latest activation, matmul is not accumulative.
// Feature matmul.
{
int16_t* state =
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
const int8_t* weight_feature =
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
const int32_t output_max = std::numeric_limits<int16_t>::max();
const int32_t output_min = std::numeric_limits<int16_t>::min();
int16_t* result_in_batch = state + (n_memory - 1);
for (int b = 0; b < n_batch; b++) {
const int8_t* matrix_ptr = weight_feature;
for (int r = 0; r < n_filter; r++) {
int32_t dot_prod = 0;
const int8_t* vector_in_batch = input + b * n_input;
for (int c = 0; c < n_input; c++) {
dot_prod +=
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
}
dot_prod = MultiplyByQuantizedMultiplier(
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
dot_prod = std::min(std::max(output_min, dot_prod), output_max);
// This assumes state is symmetrically quantized. Otherwise last bit of
// state should be initialized to its zero point and accumulate the
// dot_prod.
// Equivalent as the following:
// result_in_batch = zero point, which happens to be zero.
// result_in_batch += dot_prod_56.
*result_in_batch = dot_prod;
result_in_batch += n_memory;
}
}
}
// Time.
{
for (int b = 0; b < n_batch; ++b) {
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
// Perform batched vector dot product:
const int16_t* vector1_ptr =
tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
const int16_t* vector2_ptr =
tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
b * n_memory * n_filter;
for (int i = 0; i < n_filter; i++) {
*scratch_ptr_batch = 0;
for (int j = 0; j < n_memory; j++) {
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
}
scratch_ptr_batch++;
}
}
}
// Reduce, add bias, rescale, activation.
{
// Add bias.
if (bias_tensor) {
// Vector batch assign:
const int32_t* bias_data =
tflite::micro::GetTensorData<int32_t>(bias_tensor);
for (int i = 0; i < n_batch; ++i) {
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
const int32_t* bias_ptr = bias_data;
for (int j = 0; j < n_unit; ++j) {
*output_ptr++ = *bias_ptr++;
}
}
} else {
int32_t* output_ptr = scratch_output_tensor;
for (int i = 0; i < n_batch * n_unit; ++i) {
*output_ptr++ = 0;
}
}
// Reduce.
for (int b = 0; b < n_batch; ++b) {
int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit;
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
// Reduction sum vector
for (int i = 0; i < n_unit; ++i) {
for (int j = 0; j < n_rank; ++j) {
output_temp_ptr[i] += *scratch_ptr_batch++;
}
}
}
// Rescale.
const int32_t output_max = std::numeric_limits<int8_t>::max();
const int32_t output_min = std::numeric_limits<int8_t>::min();
for (int i = 0; i < n_batch * n_unit; ++i) {
int32_t x1 = scratch_output_tensor[i];
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
data.effective_scale_2_b);
int32_t x3 = x2 + data.output_zero_point;
int32_t x4 = std::min(std::max(output_min, x3), output_max);
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
static_cast<int8_t>(x4);
}
}
}
} // namespace
// Input tensors.
constexpr int kInputTensor = 0;
constexpr int kWeightsFeatureTensor = 1;
constexpr int kWeightsTimeTensor = 2;
constexpr int kBiasTensor = 3;
// This is a variable tensor, and will be modified by this op.
constexpr int kInputActivationStateTensor = 4;
// Output tensor.
constexpr int kOutputTensor = 0;
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
// Validate Tensor Inputs (dtype depends on quantization):
// [0] = Input, {2, batch_size, input_size}
// [1] = Weights Feature, {2, num_filters, input_size}
// [2] = Weights Time, {2, num_filters, memory_size}
// [3] = Bias (optional), {1, num_units}
// [4] = Activation State (variable),
// {2, batch_size, memory_size * num_filters}
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* weights_feature =
GetInput(context, node, kWeightsFeatureTensor);
const TfLiteTensor* weights_time =
GetInput(context, node, kWeightsTimeTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
const TfLiteTensor* activation_state =
GetInput(context, node, kInputActivationStateTensor);
// Define input constants based on input tensor definition above:
const int rank = params->rank;
const int input_size = input->dims->data[1];
const int batch_size = input->dims->data[0];
const int num_filters = weights_feature->dims->data[0];
TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
const int num_units = num_filters / rank;
const int memory_size = weights_time->dims->data[1];
// Validate Input Tensor:
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
// Validate Tensor Output:
// [0] = float/int8_t, {2, batch_size, num_units}
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
// Validate Weights Feature Input Tensor:
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2);
TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size);
// Validate Weights Time Input Tensor:
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2);
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters);
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
// Validate Optional Bias Input Tensor:
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
}
// Validate Activation State Input Tensor:
TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2);
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
memory_size * num_filters);
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
}
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
const double effective_scale_1 = static_cast<double>(
input->params.scale * weights_feature->params.scale /
activation_state->params.scale);
const double effective_scale_2 =
static_cast<double>(activation_state->params.scale *
weights_time->params.scale / output->params.scale);
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
TF_LITE_ENSURE(
context,
std::abs(static_cast<double>(bias->params.scale) -
static_cast<double>(activation_state->params.scale *
weights_time->params.scale)) < 1e-5);
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
&(data->effective_scale_1_b));
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
&(data->effective_scale_2_b));
data->input_zero_point = input->params.zero_point;
data->output_zero_point = output->params.zero_point;
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
context, batch_size * num_filters * sizeof(int32_t),
&(data->scratch_tensor_index));
TF_LITE_ENSURE_OK(context, scratch_status);
const TfLiteStatus scratch_output_status =
context->RequestScratchBufferInArena(
context, batch_size * num_units * sizeof(int32_t),
&(data->scratch_output_tensor_index));
TF_LITE_ENSURE_OK(context, scratch_output_status);
} else {
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
}
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
context, batch_size * num_filters * sizeof(float),
&(data->scratch_tensor_index));
TF_LITE_ENSURE_OK(context, scratch_status);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* weights_feature =
tflite::micro::GetEvalInput(context, node, kWeightsFeatureTensor);
const TfLiteEvalTensor* weights_time =
tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 5)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
context, node, kInputActivationStateTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (weights_feature->type) {
case kTfLiteFloat32: {
EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
params, data.scratch_tensor_index, activation_state,
output);
return kTfLiteOk;
break;
}
case kTfLiteInt8: {
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
params, activation_state, output, data);
return kTfLiteOk;
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
TfLiteTypeGetName(weights_feature->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace svdf
TfLiteRegistration Register_SVDF() {
return {/*init=*/svdf::Init,
/*free=*/nullptr,
/*prepare=*/svdf::Prepare,
/*invoke=*/svdf::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,154 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/tanh.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
OpData* data) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
static constexpr int kInputIntegerBits = 4;
const double input_real_multiplier =
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
return kTfLiteOk;
}
TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
data->input_zero_point = input->params.zero_point;
return CalculateArithmeticOpData(context, node, data);
}
} // namespace
TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
switch (input->type) {
case kTfLiteFloat32: {
reference_ops::Tanh(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteInt16: {
TanhParams params;
params.input_left_shift = data.input_left_shift;
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
} break;
case kTfLiteUInt8: {
TanhParams params;
params.input_zero_point = data.input_zero_point;
params.input_range_radius = data.input_range_radius;
params.input_multiplier = data.input_multiplier;
params.input_left_shift = data.input_left_shift;
reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
reference_integer_ops::Tanh(
data.input_zero_point, data.input_range_radius, data.input_multiplier,
data.input_left_shift, NumElements(input->dims),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
} break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
}
} // namespace activations
TfLiteRegistration Register_TANH() {
return {/*init=*/activations::TanhInit,
/*free=*/nullptr,
/*prepare=*/activations::TanhPrepare,
/*invoke=*/activations::TanhEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,121 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace unpack {
namespace {
constexpr int kInputTensor = 0;
template <typename T>
TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input, int output_count,
int axis) {
const TfLiteEvalTensor* output0 =
tflite::micro::GetEvalOutput(context, node, 0);
const TfLiteIntArray* input_dims = input->dims;
const TfLiteIntArray* output_dims = output0->dims;
const int dimensions = input_dims->size;
if (axis < 0) {
axis += input->dims->size;
}
TFLITE_DCHECK_LT(axis, dimensions);
int outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= input_dims->data[i];
}
int copy_size = 1;
for (int i = axis + 1; i < dimensions; ++i) {
copy_size *= input_dims->data[i];
}
int output_size = 1;
for (int i = 0; i < output_dims->size; ++i) {
output_size *= output_dims->data[i];
}
TFLITE_DCHECK_EQ(output_size, copy_size * outer_size);
const T* input_data = tflite::micro::GetTensorData<T>(input);
for (int i = 0; i < output_count; ++i) {
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
T* output_data = tflite::micro::GetTensorData<T>(t);
for (int k = 0; k < outer_size; ++k) {
T* output_ptr = output_data + copy_size * k;
int loc = k * output_count * copy_size + i * copy_size;
const T* input_ptr = input_data + loc;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
}
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteUnpackParams* data =
reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
switch (input->type) {
case kTfLiteFloat32: {
return UnpackImpl<float>(context, node, input, data->num, data->axis);
}
case kTfLiteInt32: {
return UnpackImpl<int32_t>(context, node, input, data->num, data->axis);
}
case kTfLiteUInt8: {
return UnpackImpl<uint8_t>(context, node, input, data->num, data->axis);
}
case kTfLiteInt8: {
return UnpackImpl<int8_t>(context, node, input, data->num, data->axis);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by unpack.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
return kTfLiteOk;
}
} // namespace
} // namespace unpack
TfLiteRegistration Register_UNPACK() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/unpack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,155 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/memory_helpers.h"
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment) {
std::uintptr_t data_as_uintptr_t = reinterpret_cast<std::uintptr_t>(data);
uint8_t* aligned_result = reinterpret_cast<uint8_t*>(
((data_as_uintptr_t + (alignment - 1)) / alignment) * alignment);
return aligned_result;
}
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment) {
std::uintptr_t data_as_uintptr_t = reinterpret_cast<std::uintptr_t>(data);
uint8_t* aligned_result =
reinterpret_cast<uint8_t*>((data_as_uintptr_t / alignment) * alignment);
return aligned_result;
}
size_t AlignSizeUp(size_t size, size_t alignment) {
size_t aligned_size = (((size + (alignment - 1)) / alignment) * alignment);
return aligned_size;
}
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
switch (type) {
case kTfLiteFloat32:
*size = sizeof(float);
break;
case kTfLiteInt16:
*size = sizeof(int16_t);
break;
case kTfLiteInt32:
*size = sizeof(int32_t);
break;
case kTfLiteUInt8:
*size = sizeof(uint8_t);
break;
case kTfLiteInt8:
*size = sizeof(int8_t);
break;
case kTfLiteInt64:
*size = sizeof(int64_t);
break;
case kTfLiteBool:
*size = sizeof(bool);
break;
case kTfLiteComplex64:
*size = sizeof(float) * 2;
break;
case kTfLiteComplex128:
*size = sizeof(double) * 2;
break;
default:
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter) {
int element_count = 1;
// If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar
// so has 1 element.
if (flatbuffer_tensor.shape() != nullptr) {
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
element_count *= flatbuffer_tensor.shape()->Get(n);
}
}
TfLiteType tf_lite_type;
TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
&tf_lite_type, error_reporter));
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size));
*bytes = element_count * (*type_size);
return kTfLiteOk;
}
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
size_t* out_bytes) {
TFLITE_DCHECK(out_bytes != nullptr);
int element_count = 1;
// If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element.
if (eval_tensor->dims != nullptr) {
for (int n = 0; n < eval_tensor->dims->size; ++n) {
element_count *= eval_tensor->dims->data[n];
}
}
size_t type_size;
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size));
*out_bytes = element_count * type_size;
return kTfLiteOk;
}
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
const TfLiteTensor* input = nullptr;
TF_LITE_ENSURE(context, input1->dims != nullptr);
TF_LITE_ENSURE(context, input2->dims != nullptr);
TF_LITE_ENSURE(context, output->dims->size == 0);
input = input1->dims->size > input2->dims->size ? input1 : input2;
TF_LITE_ENSURE(context, output->type == input->type);
size_t size = 0;
TfLiteTypeSizeOf(input->type, &size);
const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount();
for (int i = 0; i < dimensions_count; i++) {
size *= input->dims->data[i];
}
output->bytes = size;
output->dims =
reinterpret_cast<TfLiteIntArray*>(context->AllocatePersistentBuffer(
context, TfLiteIntArrayGetSizeInBytes(size)));
output->dims->size = input->dims->size;
for (int i = 0; i < dimensions_count; i++) {
output->dims->data[i] = input->dims->data[i];
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,59 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Returns the next pointer address aligned to the given alignment.
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment);
// Returns the previous pointer address aligned to the given alignment.
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
// Returns an increased size that's a multiple of alignment.
size_t AlignSizeUp(size_t size, size_t alignment);
// Returns size in bytes for a given TfLiteType.
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
// How many bytes are needed to hold a tensor's contents.
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter);
// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
// returned in out_bytes.
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
size_t* out_bytes);
// Deduce output dimensions from input and allocate given size.
// Useful for operators with two inputs where the largest input should equal the
// output dimension.
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_

View File

@@ -0,0 +1,437 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
namespace tflite {
// Simple stable in-place sort function. Not time-efficient for large arrays.
// Would normally be in an anonymous namespace to keep it private, but we want
// to be able to test it externally.
void ReverseSortInPlace(int* values, int* ids, int size) {
bool any_swapped;
do {
any_swapped = false;
for (int i = 1; i < size; ++i) {
if (values[i - 1] < values[i]) {
const int value_temp = values[i - 1];
values[i - 1] = values[i];
values[i] = value_temp;
const int id_temp = ids[i - 1];
ids[i - 1] = ids[i];
ids[i] = id_temp;
any_swapped = true;
}
}
} while (any_swapped);
}
GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
int scratch_buffer_size)
: buffer_count_(0), need_to_calculate_offsets_(true) {
// Allocate the arrays we need within the scratch buffer arena.
max_buffer_count_ = scratch_buffer_size / per_buffer_size();
unsigned char* next_free = scratch_buffer;
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
next_free += sizeof(BufferRequirements) * max_buffer_count_;
buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
next_free += sizeof(ListEntry) * max_buffer_count_;
buffer_offsets_ = reinterpret_cast<int*>(next_free);
}
GreedyMemoryPlanner::~GreedyMemoryPlanner() {
// We don't own the scratch buffer, so don't deallocate anything.
}
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used) {
if (buffer_count_ >= max_buffer_count_) {
TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
max_buffer_count_);
return kTfLiteError;
}
BufferRequirements* current = &requirements_[buffer_count_];
current->size = size;
current->first_time_used = first_time_used;
current->last_time_used = last_time_used;
current->offline_offset = kOnlinePlannedBuffer;
++buffer_count_;
need_to_calculate_offsets_ = true;
return kTfLiteOk;
}
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used, int offline_offset) {
BufferRequirements* current = &requirements_[buffer_count_];
if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
kTfLiteOk) {
return kTfLiteError;
}
current->offline_offset = offline_offset;
return kTfLiteOk;
}
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
const int last_time_used) const {
const BufferRequirements* entry_requirements =
&requirements_[entry->requirements_index];
if (entry_requirements->first_time_used > last_time_used) {
return false;
}
if (first_time_used > entry_requirements->last_time_used) {
return false;
}
return true;
}
GreedyMemoryPlanner::ListEntry*
GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
const GreedyMemoryPlanner::ListEntry* start, const int first_time_used,
const int last_time_used) {
ListEntry* result = nullptr;
ListEntry* candidate_next_entry;
if (start == nullptr) {
candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
} else {
if (start->next_entry_index == -1) {
return nullptr;
}
candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index];
}
do {
if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used,
last_time_used)) {
result = candidate_next_entry;
break;
}
if (candidate_next_entry->next_entry_index == -1) {
break;
}
candidate_next_entry =
&buffers_sorted_by_offset_[candidate_next_entry->next_entry_index];
} while (true);
return result;
}
void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) {
return;
}
need_to_calculate_offsets_ = false;
// Start off by ordering the buffers in descending order of size.
// This helps find a more compact layout. Intuitively, you can think
// about putting the large buffers in place first, and then the
// smaller buffers can fit in the gaps, rather than fragmenting the
// gaps with small buffers at the beginning. Add offline planned offsets
// first in the list, since they have a predetermined offset.
int idx_from_tail = buffer_count_;
int idx_from_head = 0;
for (int i = 0; i < buffer_count_; ++i) {
if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
idx_from_tail--;
buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
buffer_ids_sorted_[idx_from_tail] = i;
buffer_offsets_[i] = -1;
} else {
buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
buffer_ids_sorted_[idx_from_head] = i;
buffer_offsets_[i] = requirements_[i].offline_offset;
idx_from_head++;
}
}
// This sorting algorithm is naive, and may end up taking a very long time
// with hundreds of buffers. Do not sort the offline planned offsets.
ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
&buffer_ids_sorted_[idx_from_head],
buffer_count_ - idx_from_head);
// Initialize the first entry to the first buffer in
// buffer_ids_sorted_.
// - If there are no offline planned offsets, the largest buffer will be
// first, and the buffers will be handled in size order.
// - If offline offsets are present, these will be handled first in order
// for the greedy algorithm to utilized gaps in the offline plan.
first_entry_index_ = 0;
next_free_entry_ = 1;
ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
first_entry->next_entry_index = -1; // to mark the entry as end of list
int buffer_id = buffer_ids_sorted_[0];
first_entry->requirements_index = buffer_id;
if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
buffer_offsets_[buffer_id] = 0;
}
first_entry->offset = buffer_offsets_[buffer_id];
// Work through the rest of the buffers to find a good gap to place each one.
for (int i = 1; i < buffer_count_; ++i) {
// The id is the order the buffer was originally added by the client.
buffer_id = buffer_ids_sorted_[i];
// Look at what size and time range the buffer needs to be active.
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
const int wanted_size = wanted_requirements->size;
const int wanted_first_time_used = wanted_requirements->first_time_used;
const int wanted_last_time_used = wanted_requirements->last_time_used;
// Find the first buffer that's active in our time range. All placed
// buffers are stored in the order of their starting position in the arena
// so that it's easy to find the next buffer in memory, and so the gap.
// The candidate_entry variable holds the buffer that we're considering
// placing the current buffer after.
int candidate_offset = 0;
// Loop through the offset-ordered list of buffers, looking for gaps.
if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
ListEntry* prior_entry = nullptr;
while (true) {
// Find out what the next active buffer is.
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
prior_entry, wanted_first_time_used, wanted_last_time_used);
if (prior_entry) {
BufferRequirements* candidate_requirements =
&requirements_[prior_entry->requirements_index];
const int prior_entry_offset =
prior_entry->offset + candidate_requirements->size;
if (prior_entry_offset > candidate_offset) {
candidate_offset = prior_entry_offset;
}
}
if (next_entry == nullptr) {
// We're at the end of the list, so we can always append the buffer
// here.
break;
}
// Find out how much space there is between us and the next buffer.
const int gap = next_entry->offset - candidate_offset;
if (gap >= wanted_size) {
// This entry has a big enough gap between it and the next, so
// use it!
break;
}
// The gap wasn't big enough, so move on to another candidate.
prior_entry = next_entry;
}
} else {
// Offline planned offset are to be considered constant
candidate_offset = wanted_requirements->offline_offset;
}
// At this point, we've either found a gap (possibly at the end of the
// list) and want to place the buffer there, or there are no other active
// buffers in this time range and so we can put it at offset zero.
// Record the buffer's offset in our plan.
buffer_offsets_[buffer_id] = candidate_offset;
// Add the newly-placed buffer to our offset-ordered list, so that
// subsequent passes can fit in their buffers around it.
ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_];
new_entry->offset = candidate_offset;
new_entry->requirements_index = buffer_id;
const int new_entry_index = next_free_entry_;
++next_free_entry_;
if (first_entry->offset > candidate_offset) {
// The new entry offset is smaller than the first entry offset =>
// replace the first entry
first_entry = new_entry;
first_entry->next_entry_index = first_entry_index_;
first_entry_index_ = new_entry_index;
} else {
ListEntry* current_entry = first_entry;
// Make sure that we insert the buffer at the correct place in the
// buffer-offset-ordered list
while (true) {
const int next_entry_index = current_entry->next_entry_index;
if (next_entry_index == -1) {
// We're at the end of the list, so just add the new entry here.
current_entry->next_entry_index = new_entry_index;
new_entry->next_entry_index = -1;
break;
}
// not at the end of the list -> take a look at next entry
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
if (next_entry->offset > candidate_offset) {
// We're at the right spot to do an insertion and retain the sorting
// order, so place the new entry here.
new_entry->next_entry_index = current_entry->next_entry_index;
current_entry->next_entry_index = new_entry_index;
break;
}
current_entry = next_entry;
}
}
}
}
size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
CalculateOffsetsIfNeeded();
if (buffer_count_ == 0) {
return 0;
}
ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
size_t max_size = 0;
while (entry) {
BufferRequirements* requirements =
&requirements_[entry->requirements_index];
// TODO(b/148246793): Update all size and offset variables types from
// int to size_t
const size_t current_size = entry->offset + requirements->size;
if (current_size > max_size) {
max_size = current_size;
}
if (entry->next_entry_index == -1) {
break;
}
entry = &buffers_sorted_by_offset_[entry->next_entry_index];
}
return max_size;
}
void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
CalculateOffsetsIfNeeded();
for (int i = 0; i < buffer_count_; ++i) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Planner buffer ID: %d, calculated offset: %d, size required: %d, "
"first_time_created: %d, "
"last_time_used: %d",
i, buffer_offsets_[i], requirements_[i].size,
requirements_[i].first_time_used, requirements_[i].last_time_used);
}
constexpr int kLineWidth = 80;
int max_size = kLineWidth;
int max_time = 0;
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* requirements = &requirements_[i];
const int offset = buffer_offsets_[i];
const int last_time_used = requirements->last_time_used;
const int size = offset + requirements->size;
if (size > max_size) {
max_size = size;
}
if (last_time_used > max_time) {
max_time = last_time_used;
}
}
char line[kLineWidth + 1];
for (int t = 0; t <= max_time; ++t) {
for (int c = 0; c < kLineWidth; ++c) {
line[c] = '.';
}
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* requirements = &requirements_[i];
if ((t < requirements->first_time_used) ||
(t > requirements->last_time_used)) {
continue;
}
const int offset = buffer_offsets_[i];
if (offset == -1) {
continue;
}
const int size = requirements->size;
const int line_start = (offset * kLineWidth) / max_size;
const int line_end = ((offset + size) * kLineWidth) / max_size;
for (int n = line_start; n < line_end; ++n) {
if (line[n] == '.') {
char display;
if (i < 10) {
display = '0' + i;
} else if (i < 36) {
display = 'a' + (i - 10);
} else if (i < 62) {
display = 'A' + (i - 36);
} else {
display = '*';
}
line[n] = display;
} else {
line[n] = '!';
}
}
}
line[kLineWidth] = 0;
TF_LITE_REPORT_ERROR(error_reporter, "%s", (const char*)line);
}
}
int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
CalculateOffsetsIfNeeded();
if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
TF_LITE_REPORT_ERROR(error_reporter,
"buffer index %d is outside range 0 to %d",
buffer_index, buffer_count_);
return kTfLiteError;
}
*offset = buffer_offsets_[buffer_index];
return kTfLiteOk;
}
bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
CalculateOffsetsIfNeeded();
bool were_overlaps_found = false;
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* a_requirements = &requirements_[i];
const int a_start_offset = buffer_offsets_[i];
const int a_first_time_used = a_requirements->first_time_used;
const int a_last_time_used = a_requirements->last_time_used;
const int a_end_offset = a_start_offset + a_requirements->size;
for (int j = 0; j < buffer_count_; ++j) {
if (i == j) {
continue;
}
BufferRequirements* b_requirements = &requirements_[j];
const int b_start_offset = buffer_offsets_[j];
const int b_first_time_used = b_requirements->first_time_used;
const int b_last_time_used = b_requirements->last_time_used;
const int b_end_offset = b_start_offset + b_requirements->size;
if ((a_first_time_used > b_last_time_used) ||
(b_first_time_used > a_last_time_used)) {
// Buffers don't overlap in time.
continue;
}
if ((a_start_offset >= b_end_offset) ||
(b_start_offset >= a_end_offset)) {
// No overlap in memory.
continue;
}
were_overlaps_found = true;
TF_LITE_REPORT_ERROR(
error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)",
i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset,
j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
}
}
return were_overlaps_found;
}
} // namespace tflite

View File

@@ -0,0 +1,163 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
namespace tflite {
constexpr int kOnlinePlannedBuffer = -1;
// A memory planner that uses a greedy algorithm to arrange buffers in memory
// to minimize the overall arena size needed.
//
// The algorithm works like this:
// - The client enters the buffer information through AddBuffer().
// - When a function like GetOffsetForBuffer() is called, the
// CalculateOffsetsIfNeeded() method is invoked.
// - If an up to date plan is not already present, one will be calculated.
// - The buffers are sorted in descending order of size.
// - The largest buffer is placed at offset zero.
// - The rest of the buffers are looped through in descending size order.
// - The other buffers that need to be in memory at the same time are found.
// - The first gap between simultaneously active buffers that the current
// buffer fits into will be used.
// - If no large-enough gap is found, the current buffer is placed after the
// last buffer that's simultaneously active.
// - This continues until all buffers are placed, and the offsets stored.
//
// This is not guaranteed to produce the best placement, since that's an
// NP-Complete problem, but in practice it should produce one that's decent.
class GreedyMemoryPlanner : public MemoryPlanner {
public:
// You need to pass in an area of memory to be used for planning. This memory
// needs to have a lifetime as long as the planner, but isn't owned by this
// object, so management should be handled by the client. This is so it can be
// stack or globally allocated if necessary on devices without dynamic memory
// allocation. How many buffers can be planned for will depend on the size of
// this scratch memory, so you should enlarge it if you see an error when
// calling AddBuffer(). The memory can be reused once you're done with the
// planner, as long as you copy the calculated offsets to another location.
// Each buffer requires about 36 bytes of scratch.
GreedyMemoryPlanner(unsigned char* scratch_buffer, int scratch_buffer_size);
~GreedyMemoryPlanner() override;
// Record details of a buffer we want to place.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
// Record details of an offline planned buffer offset we want to place.
// offline_offset is the buffer offset from the start of the arena.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used,
int offline_offset);
// Returns the high-water mark of used memory. This is the minimum size of a
// memory arena you'd need to allocate to hold these buffers.
size_t GetMaximumMemorySize() override;
// How many buffers have been recorded.
int GetBufferCount() override;
// Where a given buffer should be placed in the memory arena.
// This information is stored in the memory arena itself, so once the arena
// is used for inference, it will be overwritten.
TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter,
int buffer_index, int* offset) override;
// Prints an ascii-art diagram of the buffer layout plan.
void PrintMemoryPlan(ErrorReporter* error_reporter);
// Debug method to check whether any buffer allocations are overlapping. This
// is an O(N^2) complexity operation, so only use for testing.
bool DoAnyBuffersOverlap(ErrorReporter* error_reporter);
// Used to store a list of buffers ordered by their offset.
struct ListEntry {
int offset;
int requirements_index;
int next_entry_index;
};
// Number of bytes required in order to plan a buffer.
static size_t per_buffer_size() {
const int per_buffer_size =
sizeof(BufferRequirements) + // requirements_
sizeof(int) + // buffer_sizes_sorted_
sizeof(int) + // buffer_ids_sorted_
sizeof(ListEntry) + // buffers_sorted_by_offset_
sizeof(int); // buffer_offsets_;
return per_buffer_size;
}
private:
// Whether a buffer is active in a given time range.
bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
const int last_time_used) const;
// Walks the list to return the next buffer that is active in a given time
// range, or a null pointer if there are none.
ListEntry* NextSimultaneouslyActiveBuffer(const ListEntry* start,
const int first_time_used,
const int last_time_used);
// If there isn't an up to date plan, calculate a new one.
void CalculateOffsetsIfNeeded();
// How many buffers we can plan for, based on the arena size we're given in
// the constructor.
int max_buffer_count_;
// The number of buffers added so far.
int buffer_count_;
// Records the client-provided information about each buffer.
struct BufferRequirements {
int size;
int offline_offset;
int first_time_used;
int last_time_used;
};
// Working arrays used during the layout algorithm.
BufferRequirements* requirements_;
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
// {
// offline planned buffers,
// online planned buffers sorted by size
// }
int* buffer_sizes_sorted_;
int* buffer_ids_sorted_;
ListEntry* buffers_sorted_by_offset_;
int next_free_entry_; // Index of the next free entry of
// buffers_sorted_by_offset_
int first_entry_index_; // Index of the first entry (smallest offset) of
// buffers_sorted_by_offset_
// Stores the outcome of the plan, the location of each buffer in the arena.
int* buffer_offsets_;
// Whether buffers have been added since the last plan was calculated.
bool need_to_calculate_offsets_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_

View File

@@ -0,0 +1,54 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/memory_planner/linear_memory_planner.h"
namespace tflite {
LinearMemoryPlanner::LinearMemoryPlanner()
: current_buffer_count_(0), next_free_offset_(0) {}
LinearMemoryPlanner::~LinearMemoryPlanner() {}
TfLiteStatus LinearMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used) {
if (current_buffer_count_ >= kMaxBufferCount) {
TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
kMaxBufferCount);
return kTfLiteError;
}
buffer_offsets_[current_buffer_count_] = next_free_offset_;
next_free_offset_ += size;
++current_buffer_count_;
return kTfLiteOk;
}
size_t LinearMemoryPlanner::GetMaximumMemorySize() { return next_free_offset_; }
int LinearMemoryPlanner::GetBufferCount() { return current_buffer_count_; }
TfLiteStatus LinearMemoryPlanner::GetOffsetForBuffer(
tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
if ((buffer_index < 0) || (buffer_index >= current_buffer_count_)) {
TF_LITE_REPORT_ERROR(error_reporter,
"buffer index %d is outside range 0 to %d",
buffer_index, current_buffer_count_);
return kTfLiteError;
}
*offset = buffer_offsets_[buffer_index];
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,50 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
namespace tflite {
// The simplest possible memory planner that just lays out all buffers at
// increasing offsets without trying to reuse memory.
class LinearMemoryPlanner : public MemoryPlanner {
public:
LinearMemoryPlanner();
~LinearMemoryPlanner() override;
TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
size_t GetMaximumMemorySize() override;
int GetBufferCount() override;
TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
int buffer_index, int* offset) override;
private:
static constexpr int kMaxBufferCount = 1024;
size_t buffer_offsets_[kMaxBufferCount];
int current_buffer_count_;
size_t next_free_offset_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_

View File

@@ -0,0 +1,71 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
namespace tflite {
// Interface class for planning the layout of memory buffers during the
// execution of a graph.
// It's designed to be used by a client that iterates in any order through the
// buffers it wants to lay out, and then calls the getter functions for
// information about the calculated layout. For example:
//
// SomeMemoryPlanner planner;
// planner.AddBuffer(reporter, 100, 0, 1); // Buffer 0
// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 1
// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 2
//
// int offset0;
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 0, &offset0));
// int offset1;
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 1, &offset1));
// int offset2;
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 2, &offset2));
// const int arena_size_needed = planner.GetMaximumMemorySize();
//
// The goal is for applications to be able to experiment with different layout
// strategies without changing their client code, by swapping out classes that
// implement this interface.=
class MemoryPlanner {
public:
MemoryPlanner() {}
virtual ~MemoryPlanner() {}
// Pass information about a buffer's size and lifetime to the layout
// algorithm. The order this is called implicitly assigns an index to the
// result, so the buffer information that's passed into the N-th call of
// this method will be used as the buffer_index argument to
// GetOffsetForBuffer().
virtual TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter,
int size, int first_time_used,
int last_time_used) = 0;
// The largest contiguous block of memory that's needed to hold the layout.
virtual size_t GetMaximumMemorySize() = 0;
// How many buffers have been added to the planner.
virtual int GetBufferCount() = 0;
// Calculated layout offset for the N-th buffer added to the planner.
virtual TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
int buffer_index, int* offset) = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,250 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
b/160894903
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Namespace used for unittests.
namespace internal {
// Sets up all of the data structure members for a TfLiteTensor based on the
// contents of a serialized tensor in the flatbuffer.
// TODO(b/160894903): Once all kernels have been updated to the new
// TfLiteEvalTensor API - drop the allocate_temp flag. This enables internal
// flatbuffer quantization or dimension allocations to take place in either the
// temp or tail section of the arena.
TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
SimpleMemoryAllocator* allocator, bool allocate_temp,
const tflite::Tensor& flatbuffer_tensor,
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
ErrorReporter* error_reporter, TfLiteTensor* result);
// A handle tracking scratch buffer allocation. This handle is created by
// `RequestScratchBufferInArena`. `data` field is populated in
// `FinishModelAllocation` after static memory planning.
// TODO(b/150257460) As a future optimization, this struct could be replaced by
// a union, since once `data` is populated, `bytes` and `node_idx` is not
// needed.
typedef struct {
// Pointer to the scratch buffer.
uint8_t* data;
// Number of bytes required by the buffer. The actual allocated size might be
// greater than `bytes` due to buffer alignment.
size_t bytes;
// Node where the buffer is allocated for. This provides useful information to
// determine the lifetime of the buffer. In AllocationInfo, this buffer will
// have `before` = node_idx and `after` = node_idx.
int node_idx;
} ScratchBufferHandle;
} // namespace internal
typedef struct {
TfLiteNode node;
const TfLiteRegistration* registration;
} NodeAndRegistration;
// Allocator responsible for allocating memory for all intermediate tensors
// necessary to invoke a model.
//
// The lifetime of the model, tensor arena and error reporter must be at
// least as long as that of the allocator object, since the allocator needs
// them to be accessible during its entire lifetime.
//
// The MicroAllocator simply plans out additional allocations that are required
// to standup a model for inference in TF Micro. This class currently relies on
// an additional allocator - SimpleMemoryAllocator - for all allocations from an
// arena. These allocations are divided into head (non-persistent) and tail
// (persistent) regions:
//
// Memory layout to help understand how it works
// This information could change in the future version.
// ************** .memory_allocator->GetBuffer()
// Tensors/Scratch buffers (head)
// ************** .head_watermark
// unused memory
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
// - ->GetDataSize()
// persistent area (tail)
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
class MicroAllocator {
public:
// Creates a MicroAllocator instance from a given tensor arena. This arena
// will be managed by the created instance.
// Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
// bytes aligned, otherwise some head room will be wasted.
// TODO(b/157615197): Cleanup constructor + factory usage.
static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
ErrorReporter* error_reporter);
// Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
// intance. This allocator instance will use the SimpleMemoryAllocator
// instance to manage allocations internally.
static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
// Begin allocating internal resources required for model inference.
// This method will run through the flatbuffer data supplied in the model to
// properly allocate tensor, node, and op registration data. This method is
// expected to be followed with a call to FinishModelAllocation() before
// resuming allocation with another model. All persistent tensor buffers are
// stored in the out-param eval_tensors. This value is allocated from the
// persistent memory arena and will be used to host runtime tensor buffers.
TfLiteStatus StartModelAllocation(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration** node_and_registrations,
TfLiteEvalTensor** eval_tensors);
// Finish allocating internal resources required for model inference.
// This method will plan non-persistent buffers and commit a memory plan to
// the 'head' section of the memory arena. All variable tensor data will also
// be allocated. This method should be called after assigning model resources
// in StartModelAllocation(). The eval_tensors pointer should be the value
// passed into this class during StartModelAllocation().
TfLiteStatus FinishModelAllocation(const Model* model,
TfLiteEvalTensor* eval_tensors);
// Allocates a TfLiteTensor struct and populates the returned value with
// properties from the model flatbuffer. This struct is allocated from
// persistent arena memory is only guaranteed for the lifetime of the
// application. The eval_tensors pointer should be the value passed into this
// class during StartModelAllocation() and contains the source-of-truth for
// buffers.
virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
// Allocates a TfLiteTensor struct and populates the returned value with
// properties from the model flatbuffer. This struct is allocated from
// temporary arena memory is only guaranteed until a call is made to
// ResetTempAllocations(). The eval_tensors pointer should be the value passed
// into this class during StartModelAllocation() and contains the
// source-of-truth for buffers.
virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
TfLiteEvalTensor* eval_tensors,
int tensor_index);
// Resets all temporary allocations. This method should be called after a
// chain of temp allocations (e.g. chain of TfLiteTensor objects via
// AllocateTfLiteTensor()).
virtual void ResetTempAllocations();
// Allocates persistent buffer which has the same life time as the allocator.
// The memory is immediately available and is allocated from the tail of the
// arena.
void* AllocatePersistentBuffer(size_t bytes);
// Register a scratch buffer of size `bytes` for Node with `node_id`.
// This method only allocates a BufferHandle holding information for memory
// planning. The buffer ptr is ready after `FinishModelAllocation` and can
// be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
// Note that there should be no tail allocation between two consecutive
// `RequestScratchBufferInArena` calls.
TfLiteStatus RequestScratchBufferInArena(int node_id, size_t bytes,
int* buffer_idx);
// Returns the pointer to the planned scratch buffer.
void* GetScratchBuffer(int buffer_idx) const;
// Returns the arena usage in bytes, only available after
// `FinishModelAllocation`. Otherwise, it will return 0.
size_t used_bytes() const;
protected:
MicroAllocator(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
virtual ~MicroAllocator();
// Allocates an array in the arena to hold pointers to the node and
// registration pointers required to represent the inference graph of the
// model.
virtual TfLiteStatus AllocateNodeAndRegistrations(
const Model* model, NodeAndRegistration** node_and_registrations);
// Populates node and registration pointers representing the inference graph
// of the model from values inside the flatbuffer (loaded from the TfLiteModel
// instance). Persistent data (e.g. operator data) is allocated from the
// arena.
virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations);
// Allocates the list of persistent TfLiteEvalTensors that are used for the
// "eval" phase of model inference. These structs will be the source of truth
// for all tensor buffers. Allocation results are stored in the out-param
// eval_tensors.
virtual TfLiteStatus AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors);
// Allocates persistent tensor buffers for variable tensors in the subgraph.
virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors);
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this method. It is only used to record TfLiteTensor persistent allocations.
virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
// Populates a TfLiteTensor struct with data from the model flatbuffer. Any
// quantization data is allocated from either the tail (persistent) or temp
// sections of the arena based on the allocation flag.
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this function since all allocations for quantized data will take place in
// the temp section.
virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
int tensor_index, bool allocate_temp);
ErrorReporter* error_reporter() const;
// Returns the first subgraph from the model.
const SubGraph* GetSubGraphFromModel(const Model* model);
private:
// Commits a memory plan for all non-persistent buffer allocations in the
// 'head' section of the memory arena. The eval_tensors pointer is the list of
// pre-allocated TfLiteEvalTensor structs that will point to the buffers that
// will be allocated into the head section in this function call.
virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors);
// A simple memory allocator that always allocate from the arena tail or head.
SimpleMemoryAllocator* memory_allocator_;
ErrorReporter* error_reporter_;
bool model_is_allocating_;
// In reverse order for efficiency.
// i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
// corresponding to the last RequestScratchBufferInArena call.
internal::ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
// How many scratch buffers have been allocated.
size_t scratch_buffer_count_ = 0;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_

View File

@@ -0,0 +1,41 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include <cstdarg>
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#include "tensorflow/lite/micro/debug_log.h"
#include "tensorflow/lite/micro/micro_string.h"
#endif
namespace tflite {
int MicroErrorReporter::Report(const char* format, va_list args) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
// Only pulling in the implementation of this function for builds where we
// expect to make use of it to be extra cautious about not increasing the code
// size.
static constexpr int kMaxLogLen = 256;
char log_buffer[kMaxLogLen];
MicroVsnprintf(log_buffer, kMaxLogLen, format, args);
DebugLog(log_buffer);
DebugLog("\r\n");
#endif
return 0;
}
} // namespace tflite

View File

@@ -0,0 +1,36 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#include <cstdarg>
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
class MicroErrorReporter : public ErrorReporter {
public:
~MicroErrorReporter() override {}
int Report(const char* format, va_list args) override;
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_

View File

@@ -0,0 +1,447 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_interpreter.h"
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
return registration->custom_name;
} else {
return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
}
}
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS)
} // namespace
namespace internal {
ContextHelper::ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator, const Model* model)
: allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes) {
return reinterpret_cast<ContextHelper*>(ctx->impl_)
->allocator_->AllocatePersistentBuffer(bytes);
}
TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx) {
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
return helper->allocator_->RequestScratchBufferInArena(
helper->current_node_idx_, bytes, buffer_idx);
}
void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
return reinterpret_cast<ContextHelper*>(ctx->impl_)
->allocator_->GetScratchBuffer(buffer_idx);
}
void ContextHelper::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
va_end(args);
#endif
}
TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
int tensor_idx) {
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
return helper->allocator_->AllocateTempTfLiteTensor(
helper->model_, helper->eval_tensors_, tensor_idx);
}
TfLiteEvalTensor* ContextHelper::GetEvalTensor(
const struct TfLiteContext* context, int tensor_idx) {
ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
return &helper->eval_tensors_[tensor_idx];
}
void ContextHelper::SetNodeIndex(int idx) { current_node_idx_ = idx; }
void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
eval_tensors_ = eval_tensors;
}
} // namespace internal
MicroInterpreter::MicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
uint8_t* tensor_arena,
size_t tensor_arena_size,
ErrorReporter* error_reporter,
tflite::Profiler* profiler)
: model_(model),
op_resolver_(op_resolver),
error_reporter_(error_reporter),
allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
error_reporter)),
tensors_allocated_(false),
initialization_status_(kTfLiteError),
eval_tensors_(nullptr),
context_helper_(error_reporter_, &allocator_, model),
input_tensor_(nullptr),
output_tensor_(nullptr) {
Init(profiler);
}
MicroInterpreter::MicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
MicroAllocator* allocator,
ErrorReporter* error_reporter,
tflite::Profiler* profiler)
: model_(model),
op_resolver_(op_resolver),
error_reporter_(error_reporter),
allocator_(*allocator),
tensors_allocated_(false),
initialization_status_(kTfLiteError),
eval_tensors_(nullptr),
context_helper_(error_reporter_, &allocator_, model),
input_tensor_(nullptr),
output_tensor_(nullptr) {
Init(profiler);
}
MicroInterpreter::~MicroInterpreter() {
if (node_and_registrations_ != nullptr) {
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
TfLiteNode* node = &(node_and_registrations_[i].node);
const TfLiteRegistration* registration =
node_and_registrations_[i].registration;
// registration is allocated outside the interpreter, so double check to
// make sure it's not nullptr;
if (registration != nullptr && registration->free != nullptr) {
registration->free(&context_, node->user_data);
}
}
}
}
void MicroInterpreter::Init(tflite::Profiler* profiler) {
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
model_->subgraphs();
if (subgraphs->size() != 1) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Only 1 subgraph is currently supported.\n");
initialization_status_ = kTfLiteError;
return;
}
subgraph_ = (*subgraphs)[0];
context_.impl_ = static_cast<void*>(&context_helper_);
context_.ReportError = context_helper_.ReportOpError;
context_.GetTensor = context_helper_.GetTensor;
context_.GetEvalTensor = context_helper_.GetEvalTensor;
context_.recommended_num_threads = 1;
context_.profiler = profiler;
initialization_status_ = kTfLiteOk;
}
void MicroInterpreter::CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr) {
int32_t tensorSize = 1;
for (int d = 0; d < tensorCorr->dims->size; ++d)
tensorSize *= reinterpret_cast<const int32_t*>(tensorCorr->dims->data)[d];
switch (tensorCorr->type) {
case TfLiteType::kTfLiteFloat32:
CorrectTensorDataEndianness(tensorCorr->data.f, tensorSize);
break;
case TfLiteType::kTfLiteFloat16:
CorrectTensorDataEndianness(tensorCorr->data.f16, tensorSize);
break;
case TfLiteType::kTfLiteInt64:
CorrectTensorDataEndianness(tensorCorr->data.i64, tensorSize);
break;
case TfLiteType::kTfLiteInt32:
CorrectTensorDataEndianness(tensorCorr->data.i32, tensorSize);
break;
case TfLiteType::kTfLiteInt16:
CorrectTensorDataEndianness(tensorCorr->data.i16, tensorSize);
break;
case TfLiteType::kTfLiteComplex64:
CorrectTensorDataEndianness(tensorCorr->data.c64, tensorSize);
break;
case TfLiteType::kTfLiteComplex128:
CorrectTensorDataEndianness(tensorCorr->data.c128, tensorSize);
break;
default:
// Do nothing for other data types.
break;
}
}
template <class T>
void MicroInterpreter::CorrectTensorDataEndianness(T* data, int32_t size) {
for (int32_t i = 0; i < size; ++i) {
data[i] = flatbuffers::EndianScalar(data[i]);
}
}
TfLiteStatus MicroInterpreter::AllocateTensors() {
if (allocator_.StartModelAllocation(model_, op_resolver_,
&node_and_registrations_,
&eval_tensors_) != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed starting model allocation.\n");
initialization_status_ = kTfLiteError;
return kTfLiteError;
}
// Update the pointer now that TfLiteEvalTensor allocation has completed on
// the context helper.
// TODO(b/16157777): This call would not be needed if ContextHelper rolled
// into the interpreter.
context_helper_.SetTfLiteEvalTensors(eval_tensors_);
// If the system is big endian then convert weights from the flatbuffer from
// little to big endian on startup so that it does not need to be done during
// inference.
// NOTE: This requires that the flatbuffer is held in memory which can be
// modified by this process.
if (!FLATBUFFERS_LITTLEENDIAN) {
for (size_t t = 0; t < subgraph_->tensors()->size(); ++t) {
if (auto* buffer =
(*model_->buffers())[subgraph_->tensors()->Get(t)->buffer()]) {
// If we've found a buffer, does it have any data?
if (auto* array = buffer->data()) {
// If it has any data, is the data size larger than zero?
if (array->size()) {
// Update the endianness of the corresponding eval tensor since that
// struct holds the buffer used at inference time.
CorrectTensorEndianness(&eval_tensors_[t]);
}
}
}
}
}
// Only allow AllocatePersistentBuffer in Init stage.
context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = nullptr;
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
context_helper_.SetNodeIndex(i);
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
size_t init_data_size;
const char* init_data;
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
init_data = reinterpret_cast<const char*>(node->custom_initial_data);
init_data_size = node->custom_initial_data_size;
} else {
init_data = reinterpret_cast<const char*>(node->builtin_data);
init_data_size = 0;
}
if (registration->init) {
node->user_data =
registration->init(&context_, init_data, init_data_size);
}
}
context_helper_.SetNodeIndex(-1);
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is
// available in Prepare stage.
context_.RequestScratchBufferInArena =
context_helper_.RequestScratchBufferInArena;
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
// Set node idx to annotate the lifetime for scratch buffers.
context_helper_.SetNodeIndex(i);
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
if (registration->prepare) {
TfLiteStatus prepare_status = registration->prepare(&context_, node);
if (prepare_status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Node %s (number %df) failed to prepare with status %d",
OpNameFromRegistration(registration), i, prepare_status);
return kTfLiteError;
}
}
allocator_.ResetTempAllocations();
}
context_helper_.SetNodeIndex(-1);
// Prepare is done, we're ready for Invoke. Memory allocation is no longer
// allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
context_.AllocatePersistentBuffer = nullptr;
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
TF_LITE_ENSURE_OK(&context_,
allocator_.FinishModelAllocation(model_, eval_tensors_));
TF_LITE_ENSURE_STATUS(ResetVariableTensors());
tensors_allocated_ = true;
return kTfLiteOk;
}
TfLiteStatus MicroInterpreter::Invoke() {
if (initialization_status_ != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Invoke() called after initialization failed\n");
return kTfLiteError;
}
// Ensure tensors are allocated before the interpreter is invoked to avoid
// difficult to debug segfaults.
if (!tensors_allocated_) {
TF_LITE_ENSURE_OK(&context_, AllocateTensors());
}
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
auto* node = &(node_and_registrations_[i].node);
auto* registration = node_and_registrations_[i].registration;
if (registration->invoke) {
TfLiteStatus invoke_status;
#ifndef NDEBUG // Omit profiler overhead from release builds.
// The case where profiler == nullptr is handled by
// ScopedOperatorProfile.
tflite::Profiler* profiler =
reinterpret_cast<tflite::Profiler*>(context_.profiler);
ScopedOperatorProfile scoped_profiler(
profiler, OpNameFromRegistration(registration), i);
#endif
invoke_status = registration->invoke(&context_, node);
// All TfLiteTensor structs used in the kernel are allocated from temp
// memory in the allocator. This creates a chain of allocations in the
// temp section. The call below resets the chain of allocations to
// prepare for the next call.
allocator_.ResetTempAllocations();
if (invoke_status == kTfLiteError) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Node %s (number %d) failed to invoke with status %d",
OpNameFromRegistration(registration), i, invoke_status);
return kTfLiteError;
} else if (invoke_status != kTfLiteOk) {
return invoke_status;
}
}
}
return kTfLiteOk;
}
TfLiteTensor* MicroInterpreter::input(size_t index) {
const size_t length = inputs_size();
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Input index %d out of range (length is %d)", index,
length);
return nullptr;
}
if (index != 0) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Input tensors not at index 0 are allocated from the "
"persistent memory arena. Repeat calls will cause excess "
"allocation!");
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
inputs().Get(index));
}
if (input_tensor_ == nullptr) {
input_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
model_, eval_tensors_, inputs().Get(index));
}
return input_tensor_;
}
TfLiteTensor* MicroInterpreter::output(size_t index) {
const size_t length = outputs_size();
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Output index %d out of range (length is %d)", index,
length);
return nullptr;
}
if (index != 0) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Output tensors not at index 0 are allocated from the "
"persistent memory arena. Repeat calls will cause excess "
"allocation!");
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
outputs().Get(index));
}
if (output_tensor_ == nullptr) {
// TODO(b/160894903): This API will allocate TfLiteTensor structs from
// persistent (tail) memory and cache on this pointer.
output_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
model_, eval_tensors_, outputs().Get(index));
}
return output_tensor_;
}
TfLiteTensor* MicroInterpreter::tensor(size_t index) {
const size_t length = tensors_size();
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Tensor index %d out of range (length is %d)", index,
length);
return nullptr;
}
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
index);
}
TfLiteStatus MicroInterpreter::ResetVariableTensors() {
for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
auto* tensor = subgraph_->tensors()->Get(i);
if (tensor->is_variable()) {
size_t buffer_size;
TF_LITE_ENSURE_STATUS(
TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
int value = 0;
if (tensor->type() == tflite::TensorType_INT8) {
value = tensor->quantization()->zero_point()->Get(0);
}
memset(eval_tensors_[i].data.raw, value, buffer_size);
}
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,208 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/type_to_tflitetype.h"
namespace tflite {
namespace internal {
// A helper class to encapsulate the implementation of APIs in Context.
// context->impl_ points to an instance of this class.
// Check tensorflow/lite/c/common.h for detailed descriptions.
// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
class ContextHelper {
public:
explicit ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator, const Model* model);
// Functions that will be assigned to function pointers on TfLiteContext:
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx);
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_idx);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_idx);
// Sets the current node index to assist with scratch buffer allocations:
void SetNodeIndex(int idx);
// Sets the pointer to a list of TfLiteEvalTensor instances.
void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
private:
MicroAllocator* allocator_;
ErrorReporter* error_reporter_;
const Model* model_;
TfLiteEvalTensor* eval_tensors_;
int current_node_idx_ = -1;
};
} // namespace internal
class MicroInterpreter {
public:
// The lifetime of the model, op resolver, tensor arena, error reporter and
// profiler must be at least as long as that of the interpreter object, since
// the interpreter may need to access them at any time. This means that you
// should usually create them with the same scope as each other, for example
// having them all allocated on the stack as local variables through a
// top-level function. The interpreter doesn't do any deallocation of any of
// the pointed-to objects, ownership remains with the caller.
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
// Create an interpreter instance using an existing MicroAllocator instance.
// This constructor should be used when creating an allocator that needs to
// have allocation handled in more than one interpreter or for recording
// allocations inside the interpreter. The lifetime of the allocator must be
// as long as that of the interpreter object.
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
MicroAllocator* allocator, ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
~MicroInterpreter();
// Runs through the model and allocates all necessary input, output and
// intermediate tensors.
TfLiteStatus AllocateTensors();
// In order to support partial graph runs for strided models, this can return
// values other than kTfLiteOk and kTfLiteError.
// TODO(b/149795762): Add this to the TfLiteStatus enum.
TfLiteStatus Invoke();
size_t tensors_size() const { return context_.tensors_size; }
TfLiteTensor* tensor(size_t tensor_index);
template <class T>
T* typed_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
TfLiteTensor* input(size_t index);
size_t inputs_size() const { return subgraph_->inputs()->Length(); }
const flatbuffers::Vector<int32_t>& inputs() const {
return *subgraph_->inputs();
}
TfLiteTensor* input_tensor(size_t index) { return input(index); }
template <class T>
T* typed_input_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = input_tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
TfLiteTensor* output(size_t index);
size_t outputs_size() const { return subgraph_->outputs()->Length(); }
const flatbuffers::Vector<int32_t>& outputs() const {
return *subgraph_->outputs();
}
TfLiteTensor* output_tensor(size_t index) { return output(index); }
template <class T>
T* typed_output_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = output_tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
// Reset all variable tensors to the default value.
TfLiteStatus ResetVariableTensors();
TfLiteStatus initialization_status() const { return initialization_status_; }
size_t operators_size() const { return subgraph_->operators()->size(); }
// For debugging only.
const NodeAndRegistration node_and_registration(int node_index) const {
return node_and_registrations_[node_index];
}
// For debugging only.
// Returns the actual used arena in bytes. This method gives the optimal arena
// size. It's only available after `AllocateTensors` has been called.
// Note that normally `tensor_arena` requires 16 bytes alignment to fully
// utilize the space. If it's not the case, the optimial arena size would be
// arena_used_bytes() + 16.
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
protected:
const MicroAllocator& allocator() const { return allocator_; }
const TfLiteContext& context() const { return context_; }
private:
// TODO(b/158263161): Consider switching to Create() function to enable better
// error reporting during initialization.
void Init(tflite::Profiler* profiler);
void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr);
template <class T>
void CorrectTensorDataEndianness(T* data, int32_t size);
NodeAndRegistration* node_and_registrations_ = nullptr;
const Model* model_;
const MicroOpResolver& op_resolver_;
ErrorReporter* error_reporter_;
TfLiteContext context_ = {};
MicroAllocator& allocator_;
bool tensors_allocated_;
TfLiteStatus initialization_status_;
const SubGraph* subgraph_;
TfLiteEvalTensor* eval_tensors_;
internal::ContextHelper context_helper_;
// TODO(b/160894903): Clean these pointers up when all APIs are updated to new
// TfLiteEvalTensor buffers.
TfLiteTensor* input_tensor_;
TfLiteTensor* output_tensor_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_

View File

@@ -0,0 +1,458 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#include <stdio.h>
#include <cstring>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
template <unsigned int tOpCount>
class MicroMutableOpResolver : public MicroOpResolver {
public:
explicit MicroMutableOpResolver(ErrorReporter* error_reporter = nullptr)
: error_reporter_(error_reporter) {}
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override {
if (op == BuiltinOperator_CUSTOM) return nullptr;
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if (registration.builtin_code == op) {
return &registration;
}
}
return nullptr;
}
const TfLiteRegistration* FindOp(const char* op) const override {
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
(strcmp(registration.custom_name, op) == 0)) {
return &registration;
}
}
return nullptr;
}
MicroOpResolver::BuiltinParseFunction GetOpDataParser(
BuiltinOperator op) const override {
TFLITE_DCHECK(num_buitin_ops_ <= tOpCount);
for (unsigned int i = 0; i < num_buitin_ops_; ++i) {
if (builtin_codes_[i] == op) return builtin_parsers_[i];
}
return nullptr;
}
// Registers a Custom Operator with the MicroOpResolver.
//
// Only the first call for a given name will be successful. i.e. if this
// function is called again for a previously added Custom Operator, the
// MicroOpResolver will be unchanged and this function will return
// kTfLiteError.
TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) {
if (registrations_len_ >= tOpCount) {
if (error_reporter_) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Couldn't register custom op '%s', resolver size is too small (%d)",
name, tOpCount);
}
return kTfLiteError;
}
if (FindOp(name) != nullptr) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Calling AddCustom for the same op more than once "
"is not supported (Op: %s).",
name);
}
return kTfLiteError;
}
TfLiteRegistration* new_registration = &registrations_[registrations_len_];
registrations_len_ += 1;
*new_registration = *registration;
new_registration->builtin_code = BuiltinOperator_CUSTOM;
new_registration->custom_name = name;
return kTfLiteOk;
}
// The Add* functions below add the various Builtin operators to the
// MicroMutableOpResolver object.
TfLiteStatus AddAbs() {
return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(),
ParseAbs);
}
TfLiteStatus AddAdd() {
return AddBuiltin(BuiltinOperator_ADD, tflite::ops::micro::Register_ADD(),
ParseAdd);
}
TfLiteStatus AddArgMax() {
return AddBuiltin(BuiltinOperator_ARG_MAX,
tflite::ops::micro::Register_ARG_MAX(), ParseArgMax);
}
TfLiteStatus AddArgMin() {
return AddBuiltin(BuiltinOperator_ARG_MIN,
tflite::ops::micro::Register_ARG_MIN(), ParseArgMin);
}
TfLiteStatus AddAveragePool2D() {
return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
tflite::ops::micro::Register_AVERAGE_POOL_2D(),
ParsePool);
}
TfLiteStatus AddCeil() {
return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
ParseCeil);
}
TfLiteStatus AddCircularBuffer() {
return AddCustom("CIRCULAR_BUFFER",
tflite::ops::micro::Register_CIRCULAR_BUFFER());
}
TfLiteStatus AddConcatenation() {
return AddBuiltin(BuiltinOperator_CONCATENATION,
tflite::ops::micro::Register_CONCATENATION(),
ParseConcatenation);
}
TfLiteStatus AddConv2D() {
return AddBuiltin(BuiltinOperator_CONV_2D,
tflite::ops::micro::Register_CONV_2D(), ParseConv2D);
}
TfLiteStatus AddCos() {
return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(),
ParseCos);
}
TfLiteStatus AddDepthwiseConv2D() {
return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
tflite::ops::micro::Register_DEPTHWISE_CONV_2D(),
ParseDepthwiseConv2D);
}
TfLiteStatus AddDequantize() {
return AddBuiltin(BuiltinOperator_DEQUANTIZE,
tflite::ops::micro::Register_DEQUANTIZE(),
ParseDequantize);
}
TfLiteStatus AddEqual() {
return AddBuiltin(BuiltinOperator_EQUAL,
tflite::ops::micro::Register_EQUAL(), ParseEqual);
}
TfLiteStatus AddFloor() {
return AddBuiltin(BuiltinOperator_FLOOR,
tflite::ops::micro::Register_FLOOR(), ParseFloor);
}
TfLiteStatus AddFullyConnected() {
return AddBuiltin(BuiltinOperator_FULLY_CONNECTED,
tflite::ops::micro::Register_FULLY_CONNECTED(),
ParseFullyConnected);
}
TfLiteStatus AddGreater() {
return AddBuiltin(BuiltinOperator_GREATER,
tflite::ops::micro::Register_GREATER(), ParseGreater);
}
TfLiteStatus AddGreaterEqual() {
return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
tflite::ops::micro::Register_GREATER_EQUAL(),
ParseGreaterEqual);
}
TfLiteStatus AddHardSwish() {
return AddBuiltin(BuiltinOperator_HARD_SWISH,
tflite::ops::micro::Register_HARD_SWISH(),
ParseHardSwish);
}
TfLiteStatus AddL2Normalization() {
return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
tflite::ops::micro::Register_L2_NORMALIZATION(),
ParseL2Normalization);
}
TfLiteStatus AddLess() {
return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
ParseLess);
}
TfLiteStatus AddLessEqual() {
return AddBuiltin(BuiltinOperator_LESS_EQUAL,
tflite::ops::micro::Register_LESS_EQUAL(),
ParseLessEqual);
}
TfLiteStatus AddLog() {
return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(),
ParseLog);
}
TfLiteStatus AddLogicalAnd() {
return AddBuiltin(BuiltinOperator_LOGICAL_AND,
tflite::ops::micro::Register_LOGICAL_AND(),
ParseLogicalAnd);
}
TfLiteStatus AddLogicalNot() {
return AddBuiltin(BuiltinOperator_LOGICAL_NOT,
tflite::ops::micro::Register_LOGICAL_NOT(),
ParseLogicalNot);
}
TfLiteStatus AddLogicalOr() {
return AddBuiltin(BuiltinOperator_LOGICAL_OR,
tflite::ops::micro::Register_LOGICAL_OR(),
ParseLogicalOr);
}
TfLiteStatus AddLogistic() {
return AddBuiltin(BuiltinOperator_LOGISTIC,
tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
}
TfLiteStatus AddMaximum() {
return AddBuiltin(BuiltinOperator_MAXIMUM,
tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
}
TfLiteStatus AddMaxPool2D() {
return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
}
TfLiteStatus AddMean() {
return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
ParseReducer);
}
TfLiteStatus AddMinimum() {
return AddBuiltin(BuiltinOperator_MINIMUM,
tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
}
TfLiteStatus AddMul() {
return AddBuiltin(BuiltinOperator_MUL, tflite::ops::micro::Register_MUL(),
ParseMul);
}
TfLiteStatus AddNeg() {
return AddBuiltin(BuiltinOperator_NEG, tflite::ops::micro::Register_NEG(),
ParseNeg);
}
TfLiteStatus AddNotEqual() {
return AddBuiltin(BuiltinOperator_NOT_EQUAL,
tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
}
TfLiteStatus AddPack() {
return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
ParsePack);
}
TfLiteStatus AddPad() {
return AddBuiltin(BuiltinOperator_PAD, tflite::ops::micro::Register_PAD(),
ParsePad);
}
TfLiteStatus AddPadV2() {
return AddBuiltin(BuiltinOperator_PADV2,
tflite::ops::micro::Register_PADV2(), ParsePadV2);
}
TfLiteStatus AddPrelu() {
return AddBuiltin(BuiltinOperator_PRELU,
tflite::ops::micro::Register_PRELU(), ParsePrelu);
}
TfLiteStatus AddQuantize() {
return AddBuiltin(BuiltinOperator_QUANTIZE,
tflite::ops::micro::Register_QUANTIZE(), ParseQuantize);
}
TfLiteStatus AddRelu() {
return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
ParseRelu);
}
TfLiteStatus AddRelu6() {
return AddBuiltin(BuiltinOperator_RELU6,
tflite::ops::micro::Register_RELU6(), ParseRelu6);
}
TfLiteStatus AddReshape() {
return AddBuiltin(BuiltinOperator_RESHAPE,
tflite::ops::micro::Register_RESHAPE(), ParseReshape);
}
TfLiteStatus AddResizeNearestNeighbor() {
return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
ParseResizeNearestNeighbor);
}
TfLiteStatus AddRound() {
return AddBuiltin(BuiltinOperator_ROUND,
tflite::ops::micro::Register_ROUND(), ParseRound);
}
TfLiteStatus AddRsqrt() {
return AddBuiltin(BuiltinOperator_RSQRT,
tflite::ops::micro::Register_RSQRT(), ParseRsqrt);
}
TfLiteStatus AddSin() {
return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(),
ParseSin);
}
TfLiteStatus AddSoftmax() {
return AddBuiltin(BuiltinOperator_SOFTMAX,
tflite::ops::micro::Register_SOFTMAX(), ParseSoftmax);
}
TfLiteStatus AddSplit() {
return AddBuiltin(BuiltinOperator_SPLIT,
tflite::ops::micro::Register_SPLIT(), ParseSplit);
}
TfLiteStatus AddSqrt() {
return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(),
ParseSqrt);
}
TfLiteStatus AddSquare() {
return AddBuiltin(BuiltinOperator_SQUARE,
tflite::ops::micro::Register_SQUARE(), ParseSquare);
}
TfLiteStatus AddStridedSlice() {
return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
tflite::ops::micro::Register_STRIDED_SLICE(),
ParseStridedSlice);
}
TfLiteStatus AddSub() {
return AddBuiltin(BuiltinOperator_SUB, tflite::ops::micro::Register_SUB(),
ParseSub);
}
TfLiteStatus AddSvdf() {
return AddBuiltin(BuiltinOperator_SVDF, tflite::ops::micro::Register_SVDF(),
ParseSvdf);
}
TfLiteStatus AddTanh() {
return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(),
ParseTanh);
}
TfLiteStatus AddUnpack() {
return AddBuiltin(BuiltinOperator_UNPACK,
tflite::ops::micro::Register_UNPACK(), ParseUnpack);
}
unsigned int GetRegistrationLength() { return registrations_len_; }
private:
TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
const TfLiteRegistration& registration,
MicroOpResolver::BuiltinParseFunction parser) {
if (op == BuiltinOperator_CUSTOM) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Invalid parameter BuiltinOperator_CUSTOM to the "
"AddBuiltin function.");
}
return kTfLiteError;
}
if (FindOp(op) != nullptr) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Calling AddBuiltin with the same op more than "
"once is not supported (Op: #%d).",
op);
}
return kTfLiteError;
}
if (registrations_len_ >= tOpCount) {
if (error_reporter_) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Couldn't register builtin op #%d, resolver size "
"is too small (%d).",
op, tOpCount);
}
return kTfLiteError;
}
registrations_[registrations_len_] = registration;
// Strictly speaking, the builtin_code is not necessary for TFLM but filling
// it in regardless.
registrations_[registrations_len_].builtin_code = op;
registrations_len_++;
builtin_codes_[num_buitin_ops_] = op;
builtin_parsers_[num_buitin_ops_] = parser;
num_buitin_ops_++;
return kTfLiteOk;
}
TfLiteRegistration registrations_[tOpCount];
unsigned int registrations_len_ = 0;
// Arrays (and counter) to store the builtin codes and their corresponding
// parse functions as these are registered with the Op Resolver.
BuiltinOperator builtin_codes_[tOpCount];
MicroOpResolver::BuiltinParseFunction builtin_parsers_[tOpCount];
unsigned int num_buitin_ops_ = 0;
ErrorReporter* error_reporter_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
}; // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_

View File

@@ -0,0 +1,73 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// This is an interface for the OpResolver for TFLiteMicro. The differences from
// the TFLite OpResolver base class are to:
// * explicitly remove support for Op versions
// * allow for finer grained registration of the Builtin Ops to reduce code
// size for TFLiteMicro.
//
// We need an interface class instead of directly using MicroMutableOpResolver
// because MicroMutableOpResolver is a class template with the number of
// registered Ops as the template parameter.
class MicroOpResolver : public OpResolver {
public:
typedef TfLiteStatus (*BuiltinParseFunction)(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
// Returns the Op registration struct corresponding to the enum code from the
// flatbuffer schema. Returns nullptr if the op is not found or if op ==
// BuiltinOperator_CUSTOM.
virtual const TfLiteRegistration* FindOp(BuiltinOperator op) const = 0;
// Returns the Op registration struct corresponding to the custom operator by
// name.
virtual const TfLiteRegistration* FindOp(const char* op) const = 0;
// This implementation exists for compatibility with the OpResolver base class
// and disregards the version parameter.
const TfLiteRegistration* FindOp(BuiltinOperator op,
int version) const final {
return FindOp(op);
}
// This implementation exists for compatibility with the OpResolver base class
// and disregards the version parameter.
const TfLiteRegistration* FindOp(const char* op, int version) const final {
return FindOp(op);
}
// Returns the operator specific parsing function for the OpData for a
// BuiltinOperator (if registered), else nullptr.
virtual BuiltinParseFunction GetOpDataParser(BuiltinOperator op) const = 0;
~MicroOpResolver() override {}
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_

View File

@@ -0,0 +1,186 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
// `cinttypes` requires `__STDC_FORMAT_MACROS` to be defined to expose `PRId32`.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <cinttypes>
#include <cstddef>
#include <cstdint>
#include <stdio.h>
#include <vector>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
std::vector<int> flatbuffersVector2StdVector(
const flatbuffers::Vector<int32_t>& fVector) {
std::vector<int> stdVector;
stdVector.reserve(fVector.size());
for (size_t i = 0; i < fVector.size(); i++) {
stdVector.push_back(fVector.Get(i));
}
return stdVector;
}
void PrintIntVector(const std::vector<int>& v) {
for (const auto& it : v) {
printf(" %d", it);
}
printf("\n");
}
void PrintTfLiteIntVector(const TfLiteIntArray* v) {
if (!v) {
printf(" (null)\n");
return;
}
for (int k = 0; k < v->size; k++) {
printf(" %d", v->data[k]);
}
printf("\n");
}
const char* TensorTypeName(TfLiteType type) {
switch (type) {
case kTfLiteNoType:
return "kTfLiteNoType";
case kTfLiteFloat32:
return "kTfLiteFloat32";
case kTfLiteInt32:
return "kTfLiteInt32";
case kTfLiteUInt8:
return "kTfLiteUInt8";
case kTfLiteInt8:
return "kTfLiteInt8";
case kTfLiteInt64:
return "kTfLiteInt64";
case kTfLiteString:
return "kTfLiteString";
case kTfLiteBool:
return "kTfLiteBool";
case kTfLiteInt16:
return "kTfLiteInt16";
case kTfLiteComplex64:
return "kTfLiteComplex64";
case kTfLiteComplex128:
return "kTfLiteComplex128";
case kTfLiteFloat16:
return "kTfLiteFloat16";
case kTfLiteFloat64:
return "kTfLiteFloat64";
}
return "(invalid)";
}
const char* AllocTypeName(TfLiteAllocationType type) {
switch (type) {
case kTfLiteMemNone:
return "kTfLiteMemNone";
case kTfLiteMmapRo:
return "kTfLiteMmapRo";
case kTfLiteDynamic:
return "kTfLiteDynamic";
case kTfLiteArenaRw:
return "kTfLiteArenaRw";
case kTfLiteArenaRwPersistent:
return "kTfLiteArenaRwPersistent";
case kTfLitePersistentRo:
return "kTfLitePersistentRo";
}
return "(invalid)";
}
} // namespace
// Helper function to print model flatbuffer data. This function is not called
// by default. Hence it's not linked in to the final binary code.
void PrintModelData(const Model* model, ErrorReporter* error_reporter) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
auto* subgraphs = model->subgraphs();
const SubGraph* subgraph = (*subgraphs)[0];
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
subgraph->tensors();
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
model->buffers();
TF_LITE_REPORT_ERROR(error_reporter, "==== Model info: =====");
for (size_t i = 0; i < tensors->size(); ++i) {
const tflite::Tensor& flatbuffer_tensor = *tensors->Get(i);
size_t type_size, tensor_size;
auto* buffer = (*buffers)[flatbuffer_tensor.buffer()];
auto* array = buffer->data();
int array_size = 0;
if (array) {
array_size = array->size();
}
BytesRequiredForTensor(flatbuffer_tensor, &tensor_size, &type_size,
error_reporter);
TF_LITE_REPORT_ERROR(
error_reporter, "Tensor index: %d arena tensor %d size %d ", i,
!array_size && !flatbuffer_tensor.is_variable(), tensor_size);
}
#endif
}
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter) {
printf("Interpreter has %zu tensors and %zu nodes\n",
interpreter->tensors_size(), interpreter->operators_size());
printf("Inputs:");
PrintIntVector(flatbuffersVector2StdVector(interpreter->inputs()));
printf("Outputs:");
PrintIntVector(flatbuffersVector2StdVector(interpreter->outputs()));
printf("\n");
for (size_t tensor_index = 0; tensor_index < interpreter->tensors_size();
tensor_index++) {
TfLiteTensor* tensor = interpreter->tensor(static_cast<int>(tensor_index));
printf("Tensor %3zu %10s %15s %10zu bytes (%4.1f MB) ", tensor_index,
TensorTypeName(tensor->type), AllocTypeName(tensor->allocation_type),
tensor->bytes, static_cast<double>(tensor->bytes / (1 << 20)));
PrintTfLiteIntVector(tensor->dims);
}
printf("\n");
for (size_t node_index = 0; node_index < interpreter->operators_size();
node_index++) {
const NodeAndRegistration node_and_reg =
interpreter->node_and_registration(static_cast<int>(node_index));
const TfLiteNode& node = node_and_reg.node;
const TfLiteRegistration* reg = node_and_reg.registration;
if (reg->custom_name != nullptr) {
printf("Node %3zu Operator Custom Name %s\n", node_index,
reg->custom_name);
} else {
printf("Node %3zu Operator Builtin Code %3" PRId32 " %s\n", node_index,
reg->builtin_code, EnumNamesBuiltinOperator()[reg->builtin_code]);
}
printf(" Inputs:");
PrintTfLiteIntVector(node.inputs);
printf(" Outputs:");
PrintTfLiteIntVector(node.outputs);
}
}
} // namespace tflite

View File

@@ -0,0 +1,30 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Optional debugging functionality. For small sized binaries, these are not
// needed.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
#define TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
#include "tensorflow/lite/micro/micro_interpreter.h"
namespace tflite {
// Helper function to print model flatbuffer data. This function is not called
// by default. Hence it's not linked in to the final binary code.
void PrintModelData(const Model* model, ErrorReporter* error_reporter);
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_

View File

@@ -0,0 +1,42 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/micro_time.h"
namespace tflite {
MicroProfiler::MicroProfiler(tflite::ErrorReporter* reporter)
: reporter_(reporter) {}
uint32_t MicroProfiler::BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata1,
int64_t event_metadata2) {
start_time_ = GetCurrentTimeTicks();
TFLITE_DCHECK(tag != nullptr);
event_tag_ = tag;
return 0;
}
void MicroProfiler::EndEvent(uint32_t event_handle) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
int32_t end_time = GetCurrentTimeTicks();
TF_LITE_REPORT_ERROR(reporter_, "%s took %d cycles\n", event_tag_,
end_time - start_time_);
#endif
}
} // namespace tflite

View File

@@ -0,0 +1,71 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// MicroProfiler creates a common way to gain fine-grained insight into runtime
// performance. Bottleck operators can be identified along with slow code
// sections. This can be used in conjunction with running the relevant micro
// benchmark to evaluate end-to-end performance.
//
// Usage example:
// MicroProfiler profiler(error_reporter);
// {
// ScopedProfile scoped_profile(profiler, tag);
// work_to_profile();
// }
//
// This will call the following methods in order:
// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0)
// work_to_profile();
// profiler->EndEvent(event_handle)
class MicroProfiler : public tflite::Profiler {
public:
explicit MicroProfiler(tflite::ErrorReporter* reporter);
~MicroProfiler() override = default;
// AddEvent is unused for Tf Micro.
void AddEvent(const char* tag, EventType event_type, uint64_t start,
uint64_t end, int64_t event_metadata1,
int64_t event_metadata2) override{};
// BeginEvent followed by code followed by EndEvent will profile the code
// enclosed. Multiple concurrent events are unsupported, so the return value
// is always 0. Event_metadata1 and event_metadata2 are unused. The tag
// pointer must be valid until EndEvent is called.
uint32_t BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata1,
int64_t event_metadata2) override;
// Event_handle is ignored since TF Micro does not support concurrent events.
void EndEvent(uint32_t event_handle) override;
private:
tflite::ErrorReporter* reporter_;
int32_t start_time_;
const char* event_tag_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_

View File

@@ -0,0 +1,305 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Implements debug logging for numbers by converting them into strings and then
// calling the main DebugLog(char*) function. These are separated into a
// different file so that platforms can just implement the string output version
// of DebugLog() and then get the numerical variations without requiring any
// more code.
#include "tensorflow/lite/micro/micro_string.h"
#include <cstdarg>
#include <cstdint>
namespace {
// Int formats can need up to 10 bytes for the value plus a single byte for the
// sign.
constexpr int kMaxIntCharsNeeded = 10 + 1;
// Hex formats can need up to 8 bytes for the value plus two bytes for the "0x".
constexpr int kMaxHexCharsNeeded = 8 + 2;
// Float formats can need up to 7 bytes for the fraction plus 3 bytes for "x2^"
// plus 3 bytes for the exponent and a single sign bit.
constexpr float kMaxFloatCharsNeeded = 7 + 3 + 3 + 1;
// All input buffers to the number conversion functions must be this long.
const int kFastToBufferSize = 48;
// Reverses a zero-terminated string in-place.
char* ReverseStringInPlace(char* start, char* end) {
char* p1 = start;
char* p2 = end - 1;
while (p1 < p2) {
char tmp = *p1;
*p1++ = *p2;
*p2-- = tmp;
}
return start;
}
// Appends a string to a string, in-place. You need to pass in the maximum
// string length as the second argument.
char* StrCatStr(char* main, int main_max_length, const char* to_append) {
char* current = main;
while (*current != 0) {
++current;
}
char* current_end = main + (main_max_length - 1);
while ((*to_append != 0) && (current < current_end)) {
*current = *to_append;
++current;
++to_append;
}
*current = 0;
return current;
}
// Populates the provided buffer with an ASCII representation of the number.
char* FastUInt32ToBufferLeft(uint32_t i, char* buffer, int base) {
char* start = buffer;
do {
int32_t digit = i % base;
char character;
if (digit < 10) {
character = '0' + digit;
} else {
character = 'a' + (digit - 10);
}
*buffer++ = character;
i /= base;
} while (i > 0);
*buffer = 0;
ReverseStringInPlace(start, buffer);
return buffer;
}
// Populates the provided buffer with an ASCII representation of the number.
char* FastInt32ToBufferLeft(int32_t i, char* buffer) {
uint32_t u = i;
if (i < 0) {
*buffer++ = '-';
u = -u;
}
return FastUInt32ToBufferLeft(u, buffer, 10);
}
// Converts a number to a string and appends it to another.
char* StrCatInt32(char* main, int main_max_length, int32_t number) {
char number_string[kFastToBufferSize];
FastInt32ToBufferLeft(number, number_string);
return StrCatStr(main, main_max_length, number_string);
}
// Converts a number to a string and appends it to another.
char* StrCatUInt32(char* main, int main_max_length, uint32_t number, int base) {
char number_string[kFastToBufferSize];
FastUInt32ToBufferLeft(number, number_string, base);
return StrCatStr(main, main_max_length, number_string);
}
// Populates the provided buffer with ASCII representation of the float number.
// Avoids the use of any floating point instructions (since these aren't
// supported on many microcontrollers) and as a consequence prints values with
// power-of-two exponents.
char* FastFloatToBufferLeft(float f, char* buffer) {
char* current = buffer;
char* current_end = buffer + (kFastToBufferSize - 1);
// Access the bit fields of the floating point value to avoid requiring any
// float instructions. These constants are derived from IEEE 754.
const uint32_t sign_mask = 0x80000000;
const uint32_t exponent_mask = 0x7f800000;
const int32_t exponent_shift = 23;
const int32_t exponent_bias = 127;
const uint32_t fraction_mask = 0x007fffff;
const uint32_t u = *reinterpret_cast<uint32_t*>(&f);
const int32_t exponent =
((u & exponent_mask) >> exponent_shift) - exponent_bias;
const uint32_t fraction = (u & fraction_mask);
// Expect ~0x2B1B9D3 for fraction.
if (u & sign_mask) {
*current = '-';
current += 1;
}
*current = 0;
// These are special cases for infinities and not-a-numbers.
if (exponent == 128) {
if (fraction == 0) {
current = StrCatStr(current, (current_end - current), "Inf");
return current;
} else {
current = StrCatStr(current, (current_end - current), "NaN");
return current;
}
}
// 0x007fffff (8388607) represents 0.99... for the fraction, so to print the
// correct decimal digits we need to scale our value before passing it to the
// conversion function. This scale should be 10000000/8388608 = 1.1920928955.
// We can approximate this using multiply-adds and right-shifts using the
// values in this array. The 1. portion of the number string is printed out
// in a fixed way before the fraction, below.
const int32_t scale_shifts_size = 13;
const int8_t scale_shifts[13] = {3, 4, 8, 11, 13, 14, 17,
18, 19, 20, 21, 22, 23};
uint32_t scaled_fraction = fraction;
for (int i = 0; i < scale_shifts_size; ++i) {
scaled_fraction += (fraction >> scale_shifts[i]);
}
*current = '1';
current += 1;
*current = '.';
current += 1;
*current = 0;
// Prepend leading zeros to fill in all 7 bytes of the fraction. Truncate
// zeros off the end of the fraction. Every fractional value takes 7 bytes.
// For example, 2500 would be written into the buffer as 0002500 since it
// represents .00025.
constexpr int kMaxFractionalDigits = 7;
// Abort early if there is not enough space in the buffer.
if (current_end - current <= kMaxFractionalDigits) {
return current;
}
// Pre-fill buffer with zeros to ensure zero-truncation works properly.
for (int i = 1; i < kMaxFractionalDigits; i++) {
*(current + i) = '0';
}
// Track how large the fraction is to add leading zeros.
char* previous = current;
current = StrCatUInt32(current, (current_end - current), scaled_fraction, 10);
int fraction_digits = current - previous;
int leading_zeros = kMaxFractionalDigits - fraction_digits;
// Overwrite the null terminator from StrCatUInt32 to ensure zero-trunctaion
// works properly.
*current = '0';
// Shift fraction values and prepent zeros.
for (int i = 0; i < fraction_digits; i++) {
current--;
*(current + leading_zeros) = *current;
*current = '0';
}
current += kMaxFractionalDigits;
// Truncate trailing zeros for cleaner logs. Ensure we leave at least one
// fractional character for the case when scaled_fraction is 0.
while (*(current - 1) == '0' && (current - 1) > previous) {
current--;
}
*current = 0;
current = StrCatStr(current, (current_end - current), "*2^");
current = StrCatInt32(current, (current_end - current), exponent);
return current;
}
int FormatInt32(char* output, int32_t i) {
return static_cast<int>(FastInt32ToBufferLeft(i, output) - output);
}
int FormatUInt32(char* output, uint32_t i) {
return static_cast<int>(FastUInt32ToBufferLeft(i, output, 10) - output);
}
int FormatHex(char* output, uint32_t i) {
return static_cast<int>(FastUInt32ToBufferLeft(i, output, 16) - output);
}
int FormatFloat(char* output, float i) {
return static_cast<int>(FastFloatToBufferLeft(i, output) - output);
}
} // namespace
extern "C" int MicroVsnprintf(char* output, int len, const char* format,
va_list args) {
int output_index = 0;
const char* current = format;
// One extra character must be left for the null terminator.
const int usable_length = len - 1;
while (*current != '\0' && output_index < usable_length) {
if (*current == '%') {
current++;
switch (*current) {
case 'd':
// Cut off log message if format could exceed log buffer length.
if (usable_length - output_index < kMaxIntCharsNeeded) {
output[output_index++] = '\0';
return output_index;
}
output_index +=
FormatInt32(&output[output_index], va_arg(args, int32_t));
current++;
break;
case 'u':
if (usable_length - output_index < kMaxIntCharsNeeded) {
output[output_index++] = '\0';
return output_index;
}
output_index +=
FormatUInt32(&output[output_index], va_arg(args, uint32_t));
current++;
break;
case 'x':
if (usable_length - output_index < kMaxHexCharsNeeded) {
output[output_index++] = '\0';
return output_index;
}
output[output_index++] = '0';
output[output_index++] = 'x';
output_index +=
FormatHex(&output[output_index], va_arg(args, uint32_t));
current++;
break;
case 'f':
if (usable_length - output_index < kMaxFloatCharsNeeded) {
output[output_index++] = '\0';
return output_index;
}
output_index +=
FormatFloat(&output[output_index], va_arg(args, double));
current++;
break;
case '%':
output[output_index++] = *current++;
break;
case 's':
char* string = va_arg(args, char*);
int string_idx = 0;
while (string_idx + output_index < usable_length &&
string[string_idx] != '\0') {
output[output_index++] = string[string_idx++];
}
current++;
}
} else {
output[output_index++] = *current++;
}
}
output[output_index++] = '\0';
return output_index;
}
extern "C" int MicroSnprintf(char* output, int len, const char* format, ...) {
va_list args;
va_start(args, format);
int bytes_written = MicroVsnprintf(output, len, format, args);
va_end(args);
return bytes_written;
}

View File

@@ -0,0 +1,33 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
#define TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
#include <cstdarg>
// Implements simple string formatting for numeric types. Returns the number of
// bytes written to output.
extern "C" {
// Functionally equivalent to vsnprintf, trimmed down for TFLite Micro.
// MicroSnprintf() is implemented using MicroVsnprintf().
int MicroVsnprintf(char* output, int len, const char* format, va_list args);
// Functionally equavalent to snprintf, trimmed down for TFLite Micro.
// For example, MicroSnprintf(buffer, 10, "int %d", 10) will put the string
// "int 10" in the buffer.
// Floating point values are logged in exponent notation (1.XXX*2^N).
int MicroSnprintf(char* output, int len, const char* format, ...);
}
#endif // TENSORFLOW_LITE_MICRO_MICRO_STRING_H_

View File

@@ -0,0 +1,44 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Reference implementation of timer functions. Platforms are not required to
// implement these timer methods, but they are required to enable profiling.
// On platforms that have a POSIX stack or C library, it can be written using
// methods from <sys/time.h> or clock() from <time.h>.
// To add an equivalent function for your own platform, create your own
// implementation file, and place it in a subfolder with named after the OS
// you're targeting. For example, see the Cortex M bare metal version in
// tensorflow/lite/micro/bluepill/micro_time.cc or the mbed one on
// tensorflow/lite/micro/mbed/micro_time.cc.
#include "tensorflow/lite/micro/micro_time.h"
namespace tflite {
// Reference implementation of the ticks_per_second() function that's required
// for a platform to support Tensorflow Lite for Microcontrollers profiling.
// This returns 0 by default because timing is an optional feature that builds
// without errors on platforms that do not need it.
int32_t ticks_per_second() { return 0; }
// Reference implementation of the GetCurrentTimeTicks() function that's
// required for a platform to support Tensorflow Lite for Microcontrollers
// profiling. This returns 0 by default because timing is an optional feature
// that builds without errors on platforms that do not need it.
int32_t GetCurrentTimeTicks() { return 0; }
} // namespace tflite

View File

@@ -0,0 +1,31 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
#define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
#include <stdint.h>
namespace tflite {
// These functions should be implemented by each target platform, and provide an
// accurate tick count along with how many ticks there are per second.
int32_t ticks_per_second();
// Return time in ticks. The meaning of a tick varies per platform.
int32_t GetCurrentTimeTicks();
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_

View File

@@ -0,0 +1,279 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_utils.h"
#include <limits.h>
#include <math.h>
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace {
static const uint8_t kAsymmetricUInt8Min = 0;
static const uint8_t kAsymmetricUInt8Max = UINT8_MAX;
static const uint8_t kSymmetricUInt8Min = 1;
static const uint8_t kSymmetricUInt8Max = UINT8_MAX;
static const int8_t kAsymmetricInt8Min = INT8_MIN;
static const int8_t kAsymmetricInt8Max = INT8_MAX;
static const int kSymmetricInt8Scale = kAsymmetricInt8Max;
static const int16_t kAsymmetricInt16Min = INT16_MIN;
static const int16_t kAsymmetricInt16Max = INT16_MAX;
static const int kSymmetricInt16Scale = kAsymmetricInt16Max;
static const int32_t kAsymmetricInt32Max = INT32_MAX;
static const int kSymmetricInt32Scale = kAsymmetricInt32Max;
} // namespace
int ElementCount(const TfLiteIntArray& dims) {
int result = 1;
for (int i = 0; i < dims.size; ++i) {
result *= dims.data[i];
}
return result;
}
// Converts a float value into an unsigned eight-bit quantized value.
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
const int zero_point) {
int32_t result = round(value / scale) + zero_point;
if (result < kAsymmetricUInt8Min) {
result = kAsymmetricUInt8Min;
}
if (result > kAsymmetricUInt8Max) {
result = kAsymmetricUInt8Max;
}
return result;
}
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale) {
int32_t result = round(value / scale);
if (result < kSymmetricUInt8Min) {
result = kSymmetricUInt8Min;
}
if (result > kSymmetricUInt8Max) {
result = kSymmetricUInt8Max;
}
return result;
}
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
const int zero_point) {
int32_t result = round(value / scale) + zero_point;
if (result < kAsymmetricInt8Min) {
result = kAsymmetricInt8Min;
}
if (result > kAsymmetricInt8Max) {
result = kAsymmetricInt8Max;
}
return result;
}
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
const int zero_point) {
int32_t result = round(value / scale) + zero_point;
if (result < kAsymmetricInt16Min) {
result = kAsymmetricInt16Min;
}
if (result > kAsymmetricInt16Max) {
result = kAsymmetricInt16Max;
}
return result;
}
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale) {
return FloatToAsymmetricQuantizedInt8(value, scale, 0.0f);
}
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale) {
float quantized = round(value / scale);
if (static_cast<int>(quantized) > INT_MAX) {
quantized = static_cast<float>(INT_MAX);
} else if (quantized < INT_MIN) {
quantized = static_cast<float> INT_MIN;
}
return static_cast<int>(quantized);
}
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
float scale, int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToAsymmetricQuantizedInt8(input[i], scale, zero_point);
}
}
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
float scale, int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToAsymmetricQuantizedUInt8(input[i], scale, zero_point);
}
}
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
float scale, int zero_point) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToAsymmetricQuantizedInt16(input[i], scale, zero_point);
}
}
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
float scale) {
for (int i = 0; i < num_elements; i++) {
output[i] = FloatToSymmetricQuantizedInt32(input[i], scale);
}
}
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
int num_elements, int num_channels,
float* scales) {
int elements_per_channel = num_elements / num_channels;
for (int i = 0; i < num_channels; i++) {
for (int j = 0; j < elements_per_channel; j++) {
output[i * elements_per_channel + j] = FloatToSymmetricQuantizedInt32(
input[i * elements_per_channel + j], scales[i]);
}
}
}
void SignedSymmetricPerChannelQuantize(const float* values,
TfLiteIntArray* dims,
int quantized_dimension,
int8_t* quantized_values,
float* scaling_factors) {
int input_size = ElementCount(*dims);
int channel_count = dims->data[quantized_dimension];
int per_channel_size = input_size / channel_count;
int stride;
int channel_stride;
if (quantized_dimension == 0) {
stride = 1;
channel_stride = per_channel_size;
} else if (quantized_dimension == 3) {
stride = channel_count;
channel_stride = 1;
} else {
TF_LITE_FATAL("quantized dimension must be 0 or 3");
}
// Calculate scales for each channel.
for (int channel = 0; channel < channel_count; channel++) {
float min = 0;
float max = 0;
for (int i = 0; i < per_channel_size; i++) {
int idx = channel * channel_stride + i * stride;
min = fminf(min, values[idx]);
max = fmaxf(max, values[idx]);
}
scaling_factors[channel] =
fmaxf(fabs(min), fabs(max)) / kSymmetricInt8Scale;
for (int i = 0; i < per_channel_size; i++) {
int idx = channel * channel_stride + i * stride;
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[idx] / scaling_factors[channel]));
// Clamp: just in case some odd numeric offset.
quantized_values[idx] = fminf(
kSymmetricInt8Scale, fmaxf(-kSymmetricInt8Scale, quantized_value));
}
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int8_t* quantized_values, float* scaling_factor) {
int input_size = ElementCount(*dims);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt8Scale;
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = fminf(kSymmetricInt8Scale,
fmaxf(-kSymmetricInt8Scale, quantized_value));
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int16_t* quantized_values, float* scaling_factor) {
int input_size = ElementCount(*dims);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scaling_factor = fmaxf(fabs(min), fabs(max)) / kSymmetricInt16Scale;
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = fminf(kSymmetricInt16Scale,
fmaxf(-kSymmetricInt16Scale, quantized_value));
}
}
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int32_t* quantized_values, float* scaling_factor) {
int input_size = ElementCount(*dims);
float min = 0;
float max = 0;
for (int i = 0; i < input_size; i++) {
min = fminf(min, values[i]);
max = fmaxf(max, values[i]);
}
*scaling_factor =
fmaxf(fabs(min), fabs(max)) / static_cast<float>(kSymmetricInt32Scale);
for (int i = 0; i < input_size; i++) {
const int32_t quantized_value =
static_cast<int32_t>(roundf(values[i] / *scaling_factor));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = fminf(
static_cast<float>(kSymmetricInt32Scale),
fmaxf(static_cast<float>(-kSymmetricInt32Scale), quantized_value));
}
}
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
uint8_t* quantized_values, float* scaling_factor) {
SignedSymmetricQuantize(values, dims,
reinterpret_cast<int8_t*>(quantized_values),
scaling_factor);
}
void SymmetricDequantize(const int8_t* values, const int size,
const float dequantization_scale,
float* dequantized_values) {
for (int i = 0; i < size; ++i) {
dequantized_values[i] = values[i] * dequantization_scale;
}
}
} // namespace tflite

View File

@@ -0,0 +1,110 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
#define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Returns number of elements in the shape array.
int ElementCount(const TfLiteIntArray& dims);
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
const int zero_point);
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale);
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
const int zero_point);
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
const int zero_point);
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale);
// Converts a float value into a signed thirty-two-bit quantized value. Note
// that values close to max int and min int may see significant error due to
// a lack of floating point granularity for large values.
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale);
// Helper methods to quantize arrays of floats to the desired format.
//
// There are several key flavors of quantization in TfLite:
// asymmetric symmetric per channel
// int8_t | X | X | X |
// uint8_t | X | X | |
// int16_t | X | | |
// int32_t | | X | X |
//
// The per-op quantization spec can be found here:
// https://www.tensorflow.org/lite/performance/quantization_spec
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
float scale, int zero_point = 0);
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
float scale, int zero_point = 128);
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
float scale, int zero_point = 0);
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
float scale);
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
int num_elements, int num_channels,
float* scales);
void SignedSymmetricPerChannelQuantize(const float* values,
TfLiteIntArray* dims,
int quantized_dimension,
int8_t* quantized_values,
float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int8_t* quantized_values, float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int16_t* quantized_values, float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int32_t* quantized_values, float* scaling_factor);
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
uint8_t* quantized_values, float* scaling_factor);
void SymmetricDequantize(const int8_t* values, const int size,
const float dequantization_scale,
float* dequantized_values);
template <typename T>
void AsymmetricDequantize(const T* values, const int size,
const float dequantization_scale,
int dequantization_zero_point,
float* dequantized_values) {
for (int i = 0; i < size; ++i) {
dequantized_values[i] =
(values[i] - dequantization_zero_point) * dequantization_scale;
}
}
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_

View File

@@ -0,0 +1,230 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/recording_micro_allocator.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
namespace tflite {
RecordingMicroAllocator::RecordingMicroAllocator(
RecordingSimpleMemoryAllocator* recording_memory_allocator,
ErrorReporter* error_reporter)
: MicroAllocator(recording_memory_allocator, error_reporter),
recording_memory_allocator_(recording_memory_allocator) {}
RecordingMicroAllocator* RecordingMicroAllocator::Create(
uint8_t* tensor_arena, size_t arena_size, ErrorReporter* error_reporter) {
TFLITE_DCHECK(error_reporter != nullptr);
RecordingSimpleMemoryAllocator* simple_memory_allocator =
RecordingSimpleMemoryAllocator::Create(error_reporter, tensor_arena,
arena_size);
TFLITE_DCHECK(simple_memory_allocator != nullptr);
uint8_t* allocator_buffer = simple_memory_allocator->AllocateFromTail(
sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator));
RecordingMicroAllocator* allocator = new (allocator_buffer)
RecordingMicroAllocator(simple_memory_allocator, error_reporter);
return allocator;
}
RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation(
RecordedAllocationType allocation_type) const {
switch (allocation_type) {
case RecordedAllocationType::kTfLiteEvalTensorData:
return recorded_tflite_eval_tensor_data_;
case RecordedAllocationType::kPersistentTfLiteTensorData:
return recorded_persistent_tflite_tensor_data_;
case RecordedAllocationType::kPersistentTfLiteTensorQuantizationData:
return recorded_persistent_tflite_tensor_quantization_data_;
case RecordedAllocationType::kTfLiteTensorVariableBufferData:
return recorded_tflite_tensor_variable_buffer_data_;
case RecordedAllocationType::kNodeAndRegistrationArray:
return recorded_node_and_registration_array_data_;
case RecordedAllocationType::kOpData:
return recorded_op_data_;
}
TF_LITE_REPORT_ERROR(error_reporter(), "Invalid allocation type supplied: %d",
allocation_type);
return RecordedAllocation();
}
const RecordingSimpleMemoryAllocator*
RecordingMicroAllocator::GetSimpleMemoryAllocator() const {
return recording_memory_allocator_;
}
void RecordingMicroAllocator::PrintAllocations() const {
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] Arena allocation total %d bytes",
recording_memory_allocator_->GetUsedBytes());
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] Arena allocation head %d bytes",
recording_memory_allocator_->GetHeadUsedBytes());
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] Arena allocation tail %d bytes",
recording_memory_allocator_->GetTailUsedBytes());
PrintRecordedAllocation(RecordedAllocationType::kTfLiteEvalTensorData,
"TfLiteEvalTensor data", "allocations");
PrintRecordedAllocation(RecordedAllocationType::kPersistentTfLiteTensorData,
"Persistent TfLiteTensor data", "tensors");
PrintRecordedAllocation(
RecordedAllocationType::kPersistentTfLiteTensorQuantizationData,
"Persistent TfLiteTensor quantization data", "allocations");
PrintRecordedAllocation(
RecordedAllocationType::kTfLiteTensorVariableBufferData,
"TfLiteTensor variable buffer data", "allocations");
PrintRecordedAllocation(RecordedAllocationType::kNodeAndRegistrationArray,
"NodeAndRegistration struct",
"NodeAndRegistration structs");
PrintRecordedAllocation(RecordedAllocationType::kOpData,
"Operator runtime data", "OpData structs");
}
void RecordingMicroAllocator::PrintRecordedAllocation(
RecordedAllocationType allocation_type, const char* allocation_name,
const char* allocation_description) const {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
RecordedAllocation allocation = GetRecordedAllocation(allocation_type);
TF_LITE_REPORT_ERROR(
error_reporter(),
"[RecordingMicroAllocator] '%s' used %d bytes with alignment overhead "
"(requested %d bytes for %d %s)",
allocation_name, allocation.used_bytes, allocation.requested_bytes,
allocation.count, allocation_description);
#endif
}
TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations(
const Model* model, NodeAndRegistration** node_and_registrations) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations(
model, node_and_registrations);
RecordAllocationUsage(allocations,
recorded_node_and_registration_array_data_);
// The allocation count in SimpleMemoryAllocator will only be 1. To provide
// better logging, decrement by 1 and add in the actual number of operators
// used in the graph:
// The allocation for this recording will always be 1. This is because the
// parent class mallocs one large allocation for the number of nodes in the
// graph (e.g. sizeof(NodeAndRegistration) * num_nodes).
// To prevent extra overhead and potential for fragmentation, manually adjust
// the accounting by decrementing by 1 and adding the actual number of nodes
// used in the graph:
recorded_node_and_registration_array_data_.count +=
GetSubGraphFromModel(model)->operators()->size() - 1;
return status;
}
TfLiteStatus
RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status =
MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
model, op_resolver, node_and_registrations);
RecordAllocationUsage(allocations, recorded_op_data_);
return status;
}
TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status =
MicroAllocator::AllocateTfLiteEvalTensors(model, eval_tensors);
RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_);
// The allocation for this recording will always be 1. This is because the
// parent class mallocs one large allocation for the number of tensors in the
// graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors).
// To prevent extra overhead and potential for fragmentation, manually adjust
// the accounting by decrementing by 1 and adding the actual number of tensors
// used in the graph:
recorded_tflite_eval_tensor_data_.count +=
GetSubGraphFromModel(model)->tensors()->size() - 1;
return status;
}
TfLiteStatus RecordingMicroAllocator::AllocateVariables(
const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status =
MicroAllocator::AllocateVariables(subgraph, eval_tensors);
RecordAllocationUsage(allocations,
recorded_tflite_tensor_variable_buffer_data_);
return status;
}
TfLiteTensor* RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteTensor* result = MicroAllocator::AllocatePersistentTfLiteTensorInternal(
model, eval_tensors, tensor_index);
RecordAllocationUsage(allocations, recorded_persistent_tflite_tensor_data_);
return result;
}
TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
int tensor_index, bool allocate_temp) {
RecordedAllocation allocations = SnapshotAllocationUsage();
TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
model, subgraph, tensor, tensor_index, allocate_temp);
RecordAllocationUsage(allocations,
recorded_persistent_tflite_tensor_quantization_data_);
return status;
}
RecordedAllocation RecordingMicroAllocator::SnapshotAllocationUsage() const {
return {/*requested_bytes=*/recording_memory_allocator_->GetRequestedBytes(),
/*used_bytes=*/recording_memory_allocator_->GetUsedBytes(),
/*count=*/recording_memory_allocator_->GetAllocatedCount()};
}
void RecordingMicroAllocator::RecordAllocationUsage(
const RecordedAllocation& snapshotted_allocation,
RecordedAllocation& recorded_allocation) {
recorded_allocation.requested_bytes +=
recording_memory_allocator_->GetRequestedBytes() -
snapshotted_allocation.requested_bytes;
recorded_allocation.used_bytes +=
recording_memory_allocator_->GetUsedBytes() -
snapshotted_allocation.used_bytes;
recorded_allocation.count +=
recording_memory_allocator_->GetAllocatedCount() -
snapshotted_allocation.count;
}
} // namespace tflite

View File

@@ -0,0 +1,120 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
namespace tflite {
// List of buckets currently recorded by this class. Each type keeps a list of
// allocated information during model initialization.
enum class RecordedAllocationType {
kTfLiteEvalTensorData,
kPersistentTfLiteTensorData,
kPersistentTfLiteTensorQuantizationData,
kTfLiteTensorVariableBufferData,
kNodeAndRegistrationArray,
kOpData,
};
// Container for holding information about allocation recordings by a given
// type. Each recording contains the number of bytes requested, the actual bytes
// allocated (can defer from requested by alignment), and the number of items
// allocated.
struct RecordedAllocation {
size_t requested_bytes;
size_t used_bytes;
size_t count;
};
// Utility subclass of MicroAllocator that records all allocations
// inside the arena. A summary of allocations can be logged through the
// ErrorReporter by invoking LogAllocations(). This special allocator requires
// an instance of RecordingSimpleMemoryAllocator to capture allocations in the
// head and tail. Arena allocation recording can be retrieved by type through
// the GetRecordedAllocation() function. This class should only be used for
// auditing memory usage or integration testing.
class RecordingMicroAllocator : public MicroAllocator {
public:
static RecordingMicroAllocator* Create(uint8_t* tensor_arena,
size_t arena_size,
ErrorReporter* error_reporter);
// Returns the recorded allocations information for a given allocation type.
RecordedAllocation GetRecordedAllocation(
RecordedAllocationType allocation_type) const;
const RecordingSimpleMemoryAllocator* GetSimpleMemoryAllocator() const;
// Logs out through the ErrorReporter all allocation recordings by type
// defined in RecordedAllocationType.
void PrintAllocations() const;
protected:
TfLiteStatus AllocateNodeAndRegistrations(
const Model* model,
NodeAndRegistration** node_and_registrations) override;
TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations) override;
TfLiteStatus AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors) override;
TfLiteStatus AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors) override;
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this method. It is only used to record TfLiteTensor persistent allocations.
TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors,
int tensor_index) override;
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this function since all allocations for quantized data will take place in
// the temp section.
TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
const SubGraph* subgraph,
TfLiteTensor* tensor,
int tensor_index,
bool allocate_temp) override;
private:
RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
void PrintRecordedAllocation(RecordedAllocationType allocation_type,
const char* allocation_name,
const char* allocation_description) const;
RecordedAllocation SnapshotAllocationUsage() const;
void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation,
RecordedAllocation& recorded_allocation);
const RecordingSimpleMemoryAllocator* recording_memory_allocator_;
RecordedAllocation recorded_tflite_eval_tensor_data_ = {};
RecordedAllocation recorded_persistent_tflite_tensor_data_ = {};
RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {};
RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
RecordedAllocation recorded_node_and_registration_array_data_ = {};
RecordedAllocation recorded_op_data_ = {};
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_

View File

@@ -0,0 +1,65 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/recording_micro_allocator.h"
namespace tflite {
// Utility subclass that enables internal recordings of the MicroInterpreter.
// This class should be used to audit and analyze memory arena usage for a given
// model and interpreter.
//
// After construction and the first Invoke() or AllocateTensors() call - the
// memory usage is recorded and available through the GetMicroAllocator()
// function. See RecordingMicroAlloctor for more details on what is currently
// recorded from arena allocations.
//
// It is recommended for users to increase the tensor arena size by at least 1kb
// to ensure enough additional memory is available for internal recordings.
class RecordingMicroInterpreter : public MicroInterpreter {
public:
RecordingMicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter)
: MicroInterpreter(model, op_resolver,
RecordingMicroAllocator::Create(
tensor_arena, tensor_arena_size, error_reporter),
error_reporter),
recording_micro_allocator_(
static_cast<const RecordingMicroAllocator&>(allocator())) {}
RecordingMicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
RecordingMicroAllocator* allocator,
ErrorReporter* error_reporter)
: MicroInterpreter(model, op_resolver, allocator, error_reporter),
recording_micro_allocator_(*allocator) {}
const RecordingMicroAllocator& GetMicroAllocator() const {
return recording_micro_allocator_;
}
private:
const RecordingMicroAllocator& recording_micro_allocator_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_

View File

@@ -0,0 +1,83 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
#include <new>
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
RecordingSimpleMemoryAllocator::RecordingSimpleMemoryAllocator(
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size)
: SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size),
requested_head_bytes_(0),
requested_tail_bytes_(0),
used_bytes_(0),
alloc_count_(0) {}
RecordingSimpleMemoryAllocator::~RecordingSimpleMemoryAllocator() {}
RecordingSimpleMemoryAllocator* RecordingSimpleMemoryAllocator::Create(
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
TFLITE_DCHECK(error_reporter != nullptr);
TFLITE_DCHECK(buffer_head != nullptr);
RecordingSimpleMemoryAllocator tmp =
RecordingSimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
uint8_t* allocator_buffer =
tmp.AllocateFromTail(sizeof(RecordingSimpleMemoryAllocator),
alignof(RecordingSimpleMemoryAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) RecordingSimpleMemoryAllocator(tmp);
}
size_t RecordingSimpleMemoryAllocator::GetRequestedBytes() const {
return requested_head_bytes_ + requested_tail_bytes_;
}
size_t RecordingSimpleMemoryAllocator::GetUsedBytes() const {
return used_bytes_;
}
size_t RecordingSimpleMemoryAllocator::GetAllocatedCount() const {
return alloc_count_;
}
TfLiteStatus RecordingSimpleMemoryAllocator::EnsureHeadSize(size_t size,
size_t alignment) {
const uint8_t* previous_head = GetHead();
TfLiteStatus status = SimpleMemoryAllocator::EnsureHeadSize(size, alignment);
if (status == kTfLiteOk) {
used_bytes_ += GetHead() - previous_head;
requested_head_bytes_ = size;
}
return status;
}
uint8_t* RecordingSimpleMemoryAllocator::AllocateFromTail(size_t size,
size_t alignment) {
const uint8_t* previous_tail = GetTail();
uint8_t* result = SimpleMemoryAllocator::AllocateFromTail(size, alignment);
if (result != nullptr) {
used_bytes_ += previous_tail - GetTail();
requested_tail_bytes_ += size;
alloc_count_++;
}
return result;
}
} // namespace tflite

View File

@@ -0,0 +1,64 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
// Utility class used to log allocations of a SimpleMemoryAllocator. Should only
// be used in debug/evaluation settings or unit tests to evaluate allocation
// usage.
class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
public:
RecordingSimpleMemoryAllocator(ErrorReporter* error_reporter,
uint8_t* buffer_head, size_t buffer_size);
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
~RecordingSimpleMemoryAllocator() override;
static RecordingSimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
uint8_t* buffer_head,
size_t buffer_size);
// Returns the number of bytes requested from the head or tail.
size_t GetRequestedBytes() const;
// Returns the number of bytes actually allocated from the head or tail. This
// value will be >= to the number of requested bytes due to padding and
// alignment.
size_t GetUsedBytes() const;
// Returns the number of alloc calls from the head or tail.
size_t GetAllocatedCount() const;
TfLiteStatus EnsureHeadSize(size_t size, size_t alignment) override;
uint8_t* AllocateFromTail(size_t size, size_t alignment) override;
private:
size_t requested_head_bytes_;
size_t requested_tail_bytes_;
size_t used_bytes_;
size_t alloc_count_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_

View File

@@ -0,0 +1,154 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include <cstddef>
#include <cstdint>
#include <new>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter,
uint8_t* buffer_head,
uint8_t* buffer_tail)
: error_reporter_(error_reporter),
buffer_head_(buffer_head),
buffer_tail_(buffer_tail),
head_(buffer_head),
tail_(buffer_tail),
temp_(buffer_head_) {}
SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter,
uint8_t* buffer,
size_t buffer_size)
: SimpleMemoryAllocator(error_reporter, buffer, buffer + buffer_size) {}
/* static */
SimpleMemoryAllocator* SimpleMemoryAllocator::Create(
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
TFLITE_DCHECK(error_reporter != nullptr);
TFLITE_DCHECK(buffer_head != nullptr);
SimpleMemoryAllocator tmp =
SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
// Allocate enough bytes from the buffer to create a SimpleMemoryAllocator.
// The new instance will use the current adjusted tail buffer from the tmp
// allocator instance.
uint8_t* allocator_buffer = tmp.AllocateFromTail(
sizeof(SimpleMemoryAllocator), alignof(SimpleMemoryAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) SimpleMemoryAllocator(tmp);
}
SimpleMemoryAllocator::~SimpleMemoryAllocator() {}
TfLiteStatus SimpleMemoryAllocator::EnsureHeadSize(size_t size,
size_t alignment) {
if (head_ != temp_) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Internal error: EnsureHeadSize() needs to be called after"
"ResetTempAllocations().");
return kTfLiteError;
}
uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment);
if (aligned_result + size < head_) {
// Size is below the current head size, just return.
return kTfLiteOk;
}
const size_t available_memory = tail_ - aligned_result;
if (available_memory < size) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to adjust head size. Requested: %u, available %u, missing: %u",
size, available_memory, size - available_memory);
return kTfLiteError;
}
head_ = aligned_result + size;
temp_ = head_;
return kTfLiteOk;
}
uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
if (aligned_result < head_) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const size_t missing_memory = head_ - aligned_result;
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to allocate tail memory. Requested: %u, "
"available %u, missing: %u",
size, size - missing_memory, missing_memory);
#endif
return nullptr;
}
tail_ = aligned_result;
return aligned_result;
}
uint8_t* SimpleMemoryAllocator::AllocateTemp(size_t size, size_t alignment) {
uint8_t* const aligned_result = AlignPointerUp(temp_, alignment);
const size_t available_memory = tail_ - aligned_result;
if (available_memory < size) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to allocate temp memory. Requested: %u, "
"available %u, missing: %u",
size, available_memory, size - available_memory);
return nullptr;
}
temp_ = aligned_result + size;
return aligned_result;
}
void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_; }
uint8_t* SimpleMemoryAllocator::GetHead() const { return head_; }
uint8_t* SimpleMemoryAllocator::GetBufferHead() const { return buffer_head_; }
uint8_t* SimpleMemoryAllocator::GetTail() const { return tail_; }
size_t SimpleMemoryAllocator::GetHeadUsedBytes() const {
return head_ - buffer_head_;
}
size_t SimpleMemoryAllocator::GetTailUsedBytes() const {
return buffer_tail_ - tail_;
}
size_t SimpleMemoryAllocator::GetAvailableMemory(size_t alignment) const {
uint8_t* const aligned_head = AlignPointerUp(head_, alignment);
uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment);
return aligned_tail - aligned_head;
}
size_t SimpleMemoryAllocator::GetUsedBytes() const {
return GetBufferSize() - (tail_ - head_);
}
size_t SimpleMemoryAllocator::GetBufferSize() const {
return buffer_tail_ - buffer_head_;
}
} // namespace tflite

View File

@@ -0,0 +1,99 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// TODO(petewarden): This allocator never frees up or reuses any memory, even
// though we have enough information about lifetimes of the tensors to do so.
// This makes it pretty wasteful, so we should use a more intelligent method.
class SimpleMemoryAllocator {
public:
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
uint8_t* buffer_tail);
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
size_t buffer_size);
virtual ~SimpleMemoryAllocator();
// Creates a new SimpleMemoryAllocator from a given buffer head and size.
static SimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
uint8_t* buffer_head,
size_t buffer_size);
// Ensure that the head (lowest address and moving upwards) memory allocation
// is at least a given size. This function will only increase the head size if
// the passed in value is larger than the current head size. Calls to this
// method will also invalidate all temporary allocation values. This call will
// fail if a chain of allocations through AllocateTemp() have not been cleaned
// up with a call to ResetTempAllocations().
virtual TfLiteStatus EnsureHeadSize(size_t size, size_t alignment);
// Allocates memory starting at the tail of the arena (highest address and
// moving downwards).
virtual uint8_t* AllocateFromTail(size_t size, size_t alignment);
// Allocates a temporary buffer from the head of the arena (lowest address and
// moving upwards) but does not update the actual head allocation size or
// position. The returned buffer is guaranteed until either
// ResetTempAllocations() is called or another call to AllocateFromHead().
// Repeat calls to this function will create a chain of temp allocations. All
// calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
// AllocateFromHead() is called before a call to ResetTempAllocations(), it
// will fail with an error message.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment);
// Resets a chain of temporary allocations back to the current head of the
// arena (lowest address).
virtual void ResetTempAllocations();
uint8_t* GetHead() const;
uint8_t* GetBufferHead() const;
uint8_t* GetTail() const;
size_t GetHeadUsedBytes() const;
size_t GetTailUsedBytes() const;
// Returns the number of bytes available with a given alignment.
size_t GetAvailableMemory(size_t alignment) const;
size_t GetUsedBytes() const;
private:
size_t GetBufferSize() const;
ErrorReporter* error_reporter_;
uint8_t* buffer_head_;
uint8_t* buffer_tail_;
uint8_t* head_;
uint8_t* tail_;
uint8_t* temp_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_

Some files were not shown because too many files have changed in this diff Show More