tflite micro integrate repo

This commit is contained in:
QingChuanWS
2020-12-09 15:06:27 +08:00
parent 200c0ff460
commit d80d0af1a6
406 changed files with 58235 additions and 1908 deletions

View File

@@ -0,0 +1,35 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
namespace tflite {
// The magic number in the template parameter is the maximum number of ops that
// can be added to AllOpsResolver. It can be increased if needed. And most
// applications that care about the memory footprint will want to directly use
// MicroMutableOpResolver and have an application specific template parameter.
// The examples directory has sample code for this.
class AllOpsResolver : public MicroMutableOpResolver<128> {
public:
AllOpsResolver();
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_

View File

@@ -0,0 +1,22 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
#define TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
extern const unsigned char g_keyword_scrambled_model_data[];
extern const unsigned int g_keyword_scrambled_model_data_length;
#endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_

View File

@@ -0,0 +1,32 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
// C++ will automatically create class-specific delete operators for virtual
// objects, which by default call the global delete function. For embedded
// applications we want to avoid this, and won't be calling new/delete on these
// objects, so we need to override the default implementation with one that does
// nothing to avoid linking in ::delete().
// This macro needs to be included in all subclasses of a virtual base class in
// the private section.
#ifdef TF_LITE_STATIC_MEMORY
#define TF_LITE_REMOVE_VIRTUAL_DELETE \
void operator delete(void* p) {}
#else
#define TF_LITE_REMOVE_VIRTUAL_DELETE
#endif
#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_

View File

@@ -0,0 +1,23 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
// This function should be implemented by each target platform, and provide a
// way for strings to be output to some text stream. For more information, see
// tensorflow/lite/micro/debug_log.cc.
extern "C" void DebugLog(const char* s);
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_

View File

@@ -0,0 +1,34 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Provides an interface to take an action based on the output from the person
// detection model.
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
// Called every time the results of a person detection run are available. The
// `person_score` has the numerical confidence that the captured image contains
// a person, and `no_person_score` has the numerical confidence that the image
// does not contain a person. Typically if person_score > no person score, the
// image is considered to contain a person. This threshold may be adjusted for
// particular applications.
void RespondToDetection(tflite::ErrorReporter* error_reporter,
int8_t person_score, int8_t no_person_score);
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_

View File

@@ -0,0 +1,40 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
// This is an abstraction around an image source like a camera, and is
// expected to return 8-bit sample data. The assumption is that this will be
// called in a low duty-cycle fashion in a low-power application. In these
// cases, the imaging sensor need not be run in a streaming mode, but rather can
// be idled in a relatively low-power mode between calls to GetImage(). The
// assumption is that the overhead and time of bringing the low-power sensor out
// of this standby mode is commensurate with the expected duty cycle of the
// application. The underlying sensor may actually be put into a streaming
// configuration, but the image buffer provided to GetImage should not be
// overwritten by the driver code until the next call to GetImage();
//
// The reference implementation can have no platform-specific dependencies, so
// it just returns a static image. For real applications, you should
// ensure there's a specialized implementation that accesses hardware APIs.
TfLiteStatus GetImage(tflite::ErrorReporter* error_reporter, int image_width,
int image_height, int channels, int8_t* image_data,
uint8_t * hardware_input);
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_

View File

@@ -0,0 +1,30 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
#include "tensorflow/lite/c/common.h"
// Initializes all data needed for the example. The name is important, and needs
// to be setup() for Arduino compatibility.
extern "C" void person_detect_init();
// Runs one iteration of data gathering and inference. This should be called
// repeatedly from the application code. The name needs to be loop() for Arduino
// compatibility.
extern "C" int person_detect(uint8_t * hardware_input);
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_

View File

@@ -0,0 +1,35 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
// Keeping these as constant expressions allow us to allocate fixed-sized arrays
// on the stack for our working memory.
// All of these values are derived from the values used during model training,
// if you change your model you'll need to update these constants.
constexpr int kNumCols = 96;
constexpr int kNumRows = 96;
constexpr int kNumChannels = 1;
constexpr int kMaxImageSize = kNumCols * kNumRows * kNumChannels;
constexpr int kCategoryCount = 2;
constexpr int kPersonIndex = 1;
constexpr int kNotAPersonIndex = 0;
extern const char* kCategoryLabels[kCategoryCount];
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_

View File

@@ -0,0 +1,27 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This is a standard TensorFlow Lite model file that has been converted into a
// C data array, so it can be easily compiled into a binary for devices that
// don't have a file system. It was created using the command:
// xxd -i person_detect.tflite > person_detect_model_data.cc
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
extern const unsigned char g_person_detect_model_data[];
extern const int g_person_detect_model_data_len;
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_

View File

@@ -0,0 +1,57 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
#include <algorithm>
#include <cmath>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/max.h"
#include "tensorflow/lite/kernels/internal/min.h"
namespace tflite {
namespace ops {
namespace micro {
// Returns the floating point value for a fused activation:
inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
switch (act) {
case kTfLiteActNone:
return a;
case kTfLiteActRelu:
return TfLiteMax(0.0f, a);
case kTfLiteActReluN1To1:
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
case kTfLiteActRelu6:
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
case kTfLiteActTanh:
return std::tanh(a);
case kTfLiteActSignBit:
return std::signbit(a);
case kTfLiteActSigmoid:
return 1.0f / (1.0f + std::exp(-a));
}
return 0.0f; // To indicate an unsupported activation (i.e. when a new fused
// activation is added to the enum and not handled here).
}
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_

View File

@@ -0,0 +1,83 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
namespace micro {
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
// (init, prepare, invoke). All internal allocations are handled by this class.
// Simply pass in the registration, list of required tensors, inputs array,
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
// walk the kernl and outputs will be ready on the the TfLiteTensor output
// provided during construction.
class KernelRunner {
public:
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
int tensors_size, TfLiteIntArray* inputs,
TfLiteIntArray* outputs, void* builtin_data,
ErrorReporter* error_reporter);
// Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
// exceptions will be reported through the error_reporter and returned as a
// status code here.
TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
// Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
// After successful invoke, results will be available in the output tensor as
// passed into the constructor of this class.
TfLiteStatus Invoke();
protected:
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_index);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_index);
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
size_t bytes,
int* buffer_index);
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
private:
static constexpr int kNumScratchBuffers_ = 5;
static constexpr int kKernelRunnerBufferSize_ = 10000;
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
SimpleMemoryAllocator* allocator_ = nullptr;
const TfLiteRegistration& registration_;
TfLiteTensor* tensors_ = nullptr;
ErrorReporter* error_reporter_ = nullptr;
TfLiteContext context_ = {};
TfLiteNode node_ = {};
int scratch_buffer_count_ = 0;
uint8_t* scratch_buffers_[kNumScratchBuffers_];
};
} // namespace micro
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_

View File

@@ -0,0 +1,83 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace micro {
// Returns a mutable tensor for a given input index. is_variable must be checked
// during prepare when the full TfLiteTensor is available.
inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
const TfLiteNode* node,
int index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
return context->GetEvalTensor(context, node->inputs->data[index]);
}
// Returns the TfLiteEvalTensor struct for a given input index in a node.
inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
return GetMutableEvalInput(context, node, index);
}
// Returns the TfLiteEvalTensor struct for a given output index in a node.
inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
return context->GetEvalTensor(context, node->outputs->data[index]);
}
// Returns data for a TfLiteEvalTensor struct.
template <typename T>
T* GetTensorData(TfLiteEvalTensor* tensor) {
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
}
// Returns const data for a TfLiteEvalTensor struct.
template <typename T>
const T* GetTensorData(const TfLiteEvalTensor* tensor) {
TFLITE_DCHECK(tensor != nullptr);
return reinterpret_cast<const T*>(tensor->data.raw);
}
// Returns the shape of a TfLiteEvalTensor struct.
inline const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
if (tensor == nullptr) {
return RuntimeShape();
}
TfLiteIntArray* dims = tensor->dims;
const int dims_size = dims->size;
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
return RuntimeShape(dims_size, dims_data);
}
// Return true if the given tensors have the same shape.
bool HaveSameShapes(const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2);
} // namespace micro
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_

View File

@@ -0,0 +1,92 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace ops {
namespace micro {
// Forward declaration of all micro op kernel registration methods. These
// registrations are included with the standard `BuiltinOpResolver`.
//
// This header is particularly useful in cases where only a subset of ops are
// needed. In such cases, the client can selectively add only the registrations
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
// registration in turn allows the linker to strip unused kernels.
TfLiteRegistration Register_ABS();
TfLiteRegistration Register_ADD();
TfLiteRegistration Register_ARG_MAX();
TfLiteRegistration Register_ARG_MIN();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_CEIL();
// TODO(b/160234179): Change custom OPs to also return by value.
TfLiteRegistration* Register_CIRCULAR_BUFFER();
TfLiteRegistration Register_CONV_2D();
TfLiteRegistration Register_CONCATENATION();
TfLiteRegistration Register_COS();
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
TfLiteRegistration Register_DEQUANTIZE();
TfLiteRegistration Register_EQUAL();
TfLiteRegistration Register_FLOOR();
TfLiteRegistration Register_FULLY_CONNECTED();
TfLiteRegistration Register_GREATER();
TfLiteRegistration Register_GREATER_EQUAL();
TfLiteRegistration Register_HARD_SWISH();
TfLiteRegistration Register_LESS();
TfLiteRegistration Register_LESS_EQUAL();
TfLiteRegistration Register_LOG();
TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_NOT();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAXIMUM();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_MEAN();
TfLiteRegistration Register_MINIMUM();
TfLiteRegistration Register_MUL();
TfLiteRegistration Register_NEG();
TfLiteRegistration Register_NOT_EQUAL();
TfLiteRegistration Register_PACK();
TfLiteRegistration Register_PAD();
TfLiteRegistration Register_PADV2();
TfLiteRegistration Register_PRELU();
TfLiteRegistration Register_QUANTIZE();
TfLiteRegistration Register_RELU();
TfLiteRegistration Register_RELU6();
TfLiteRegistration Register_RESHAPE();
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
TfLiteRegistration Register_ROUND();
TfLiteRegistration Register_RSQRT();
TfLiteRegistration Register_SIN();
TfLiteRegistration Register_SOFTMAX();
TfLiteRegistration Register_SPLIT();
TfLiteRegistration Register_SQRT();
TfLiteRegistration Register_SQUARE();
TfLiteRegistration Register_STRIDED_SLICE();
TfLiteRegistration Register_SUB();
TfLiteRegistration Register_SVDF();
TfLiteRegistration Register_UNPACK();
TfLiteRegistration Register_L2_NORMALIZATION();
TfLiteRegistration Register_TANH();
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_

View File

@@ -0,0 +1,37 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
namespace tflite {
namespace ops {
namespace micro {
// Same as gtl::Greater but defined here to reduce dependencies and
// binary size for micro environment.
struct Greater {
template <typename T>
bool operator()(const T& x, const T& y) const {
return x > y;
}
};
struct Less {
template <typename T>
bool operator()(const T& x, const T& y) const {
return x < y;
}
};
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_

View File

@@ -0,0 +1,59 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Returns the next pointer address aligned to the given alignment.
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment);
// Returns the previous pointer address aligned to the given alignment.
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
// Returns an increased size that's a multiple of alignment.
size_t AlignSizeUp(size_t size, size_t alignment);
// Returns size in bytes for a given TfLiteType.
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
// How many bytes are needed to hold a tensor's contents.
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter);
// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
// returned in out_bytes.
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
size_t* out_bytes);
// Deduce output dimensions from input and allocate given size.
// Useful for operators with two inputs where the largest input should equal the
// output dimension.
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_

View File

@@ -0,0 +1,163 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
namespace tflite {
constexpr int kOnlinePlannedBuffer = -1;
// A memory planner that uses a greedy algorithm to arrange buffers in memory
// to minimize the overall arena size needed.
//
// The algorithm works like this:
// - The client enters the buffer information through AddBuffer().
// - When a function like GetOffsetForBuffer() is called, the
// CalculateOffsetsIfNeeded() method is invoked.
// - If an up to date plan is not already present, one will be calculated.
// - The buffers are sorted in descending order of size.
// - The largest buffer is placed at offset zero.
// - The rest of the buffers are looped through in descending size order.
// - The other buffers that need to be in memory at the same time are found.
// - The first gap between simultaneously active buffers that the current
// buffer fits into will be used.
// - If no large-enough gap is found, the current buffer is placed after the
// last buffer that's simultaneously active.
// - This continues until all buffers are placed, and the offsets stored.
//
// This is not guaranteed to produce the best placement, since that's an
// NP-Complete problem, but in practice it should produce one that's decent.
class GreedyMemoryPlanner : public MemoryPlanner {
public:
// You need to pass in an area of memory to be used for planning. This memory
// needs to have a lifetime as long as the planner, but isn't owned by this
// object, so management should be handled by the client. This is so it can be
// stack or globally allocated if necessary on devices without dynamic memory
// allocation. How many buffers can be planned for will depend on the size of
// this scratch memory, so you should enlarge it if you see an error when
// calling AddBuffer(). The memory can be reused once you're done with the
// planner, as long as you copy the calculated offsets to another location.
// Each buffer requires about 36 bytes of scratch.
GreedyMemoryPlanner(unsigned char* scratch_buffer, int scratch_buffer_size);
~GreedyMemoryPlanner() override;
// Record details of a buffer we want to place.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
// Record details of an offline planned buffer offset we want to place.
// offline_offset is the buffer offset from the start of the arena.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used,
int offline_offset);
// Returns the high-water mark of used memory. This is the minimum size of a
// memory arena you'd need to allocate to hold these buffers.
size_t GetMaximumMemorySize() override;
// How many buffers have been recorded.
int GetBufferCount() override;
// Where a given buffer should be placed in the memory arena.
// This information is stored in the memory arena itself, so once the arena
// is used for inference, it will be overwritten.
TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter,
int buffer_index, int* offset) override;
// Prints an ascii-art diagram of the buffer layout plan.
void PrintMemoryPlan(ErrorReporter* error_reporter);
// Debug method to check whether any buffer allocations are overlapping. This
// is an O(N^2) complexity operation, so only use for testing.
bool DoAnyBuffersOverlap(ErrorReporter* error_reporter);
// Used to store a list of buffers ordered by their offset.
struct ListEntry {
int offset;
int requirements_index;
int next_entry_index;
};
// Number of bytes required in order to plan a buffer.
static size_t per_buffer_size() {
const int per_buffer_size =
sizeof(BufferRequirements) + // requirements_
sizeof(int) + // buffer_sizes_sorted_
sizeof(int) + // buffer_ids_sorted_
sizeof(ListEntry) + // buffers_sorted_by_offset_
sizeof(int); // buffer_offsets_;
return per_buffer_size;
}
private:
// Whether a buffer is active in a given time range.
bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
const int last_time_used) const;
// Walks the list to return the next buffer that is active in a given time
// range, or a null pointer if there are none.
ListEntry* NextSimultaneouslyActiveBuffer(const ListEntry* start,
const int first_time_used,
const int last_time_used);
// If there isn't an up to date plan, calculate a new one.
void CalculateOffsetsIfNeeded();
// How many buffers we can plan for, based on the arena size we're given in
// the constructor.
int max_buffer_count_;
// The number of buffers added so far.
int buffer_count_;
// Records the client-provided information about each buffer.
struct BufferRequirements {
int size;
int offline_offset;
int first_time_used;
int last_time_used;
};
// Working arrays used during the layout algorithm.
BufferRequirements* requirements_;
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
// {
// offline planned buffers,
// online planned buffers sorted by size
// }
int* buffer_sizes_sorted_;
int* buffer_ids_sorted_;
ListEntry* buffers_sorted_by_offset_;
int next_free_entry_; // Index of the next free entry of
// buffers_sorted_by_offset_
int first_entry_index_; // Index of the first entry (smallest offset) of
// buffers_sorted_by_offset_
// Stores the outcome of the plan, the location of each buffer in the arena.
int* buffer_offsets_;
// Whether buffers have been added since the last plan was calculated.
bool need_to_calculate_offsets_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_

View File

@@ -0,0 +1,50 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
namespace tflite {
// The simplest possible memory planner that just lays out all buffers at
// increasing offsets without trying to reuse memory.
class LinearMemoryPlanner : public MemoryPlanner {
public:
LinearMemoryPlanner();
~LinearMemoryPlanner() override;
TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
size_t GetMaximumMemorySize() override;
int GetBufferCount() override;
TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
int buffer_index, int* offset) override;
private:
static constexpr int kMaxBufferCount = 1024;
size_t buffer_offsets_[kMaxBufferCount];
int current_buffer_count_;
size_t next_free_offset_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_

View File

@@ -0,0 +1,71 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
namespace tflite {
// Interface class for planning the layout of memory buffers during the
// execution of a graph.
// It's designed to be used by a client that iterates in any order through the
// buffers it wants to lay out, and then calls the getter functions for
// information about the calculated layout. For example:
//
// SomeMemoryPlanner planner;
// planner.AddBuffer(reporter, 100, 0, 1); // Buffer 0
// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 1
// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 2
//
// int offset0;
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 0, &offset0));
// int offset1;
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 1, &offset1));
// int offset2;
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 2, &offset2));
// const int arena_size_needed = planner.GetMaximumMemorySize();
//
// The goal is for applications to be able to experiment with different layout
// strategies without changing their client code, by swapping out classes that
// implement this interface.=
class MemoryPlanner {
public:
MemoryPlanner() {}
virtual ~MemoryPlanner() {}
// Pass information about a buffer's size and lifetime to the layout
// algorithm. The order this is called implicitly assigns an index to the
// result, so the buffer information that's passed into the N-th call of
// this method will be used as the buffer_index argument to
// GetOffsetForBuffer().
virtual TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter,
int size, int first_time_used,
int last_time_used) = 0;
// The largest contiguous block of memory that's needed to hold the layout.
virtual size_t GetMaximumMemorySize() = 0;
// How many buffers have been added to the planner.
virtual int GetBufferCount() = 0;
// Calculated layout offset for the N-th buffer added to the planner.
virtual TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
int buffer_index, int* offset) = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_

View File

@@ -0,0 +1,250 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
b/160894903
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Namespace used for unittests.
namespace internal {
// Sets up all of the data structure members for a TfLiteTensor based on the
// contents of a serialized tensor in the flatbuffer.
// TODO(b/160894903): Once all kernels have been updated to the new
// TfLiteEvalTensor API - drop the allocate_temp flag. This enables internal
// flatbuffer quantization or dimension allocations to take place in either the
// temp or tail section of the arena.
TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
SimpleMemoryAllocator* allocator, bool allocate_temp,
const tflite::Tensor& flatbuffer_tensor,
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
ErrorReporter* error_reporter, TfLiteTensor* result);
// A handle tracking scratch buffer allocation. This handle is created by
// `RequestScratchBufferInArena`. `data` field is populated in
// `FinishModelAllocation` after static memory planning.
// TODO(b/150257460) As a future optimization, this struct could be replaced by
// a union, since once `data` is populated, `bytes` and `node_idx` is not
// needed.
typedef struct {
// Pointer to the scratch buffer.
uint8_t* data;
// Number of bytes required by the buffer. The actual allocated size might be
// greater than `bytes` due to buffer alignment.
size_t bytes;
// Node where the buffer is allocated for. This provides useful information to
// determine the lifetime of the buffer. In AllocationInfo, this buffer will
// have `before` = node_idx and `after` = node_idx.
int node_idx;
} ScratchBufferHandle;
} // namespace internal
typedef struct {
TfLiteNode node;
const TfLiteRegistration* registration;
} NodeAndRegistration;
// Allocator responsible for allocating memory for all intermediate tensors
// necessary to invoke a model.
//
// The lifetime of the model, tensor arena and error reporter must be at
// least as long as that of the allocator object, since the allocator needs
// them to be accessible during its entire lifetime.
//
// The MicroAllocator simply plans out additional allocations that are required
// to standup a model for inference in TF Micro. This class currently relies on
// an additional allocator - SimpleMemoryAllocator - for all allocations from an
// arena. These allocations are divided into head (non-persistent) and tail
// (persistent) regions:
//
// Memory layout to help understand how it works
// This information could change in the future version.
// ************** .memory_allocator->GetBuffer()
// Tensors/Scratch buffers (head)
// ************** .head_watermark
// unused memory
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
// - ->GetDataSize()
// persistent area (tail)
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
class MicroAllocator {
public:
// Creates a MicroAllocator instance from a given tensor arena. This arena
// will be managed by the created instance.
// Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
// bytes aligned, otherwise some head room will be wasted.
// TODO(b/157615197): Cleanup constructor + factory usage.
static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
ErrorReporter* error_reporter);
// Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
// intance. This allocator instance will use the SimpleMemoryAllocator
// instance to manage allocations internally.
static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
// Begin allocating internal resources required for model inference.
// This method will run through the flatbuffer data supplied in the model to
// properly allocate tensor, node, and op registration data. This method is
// expected to be followed with a call to FinishModelAllocation() before
// resuming allocation with another model. All persistent tensor buffers are
// stored in the out-param eval_tensors. This value is allocated from the
// persistent memory arena and will be used to host runtime tensor buffers.
TfLiteStatus StartModelAllocation(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration** node_and_registrations,
TfLiteEvalTensor** eval_tensors);
// Finish allocating internal resources required for model inference.
// This method will plan non-persistent buffers and commit a memory plan to
// the 'head' section of the memory arena. All variable tensor data will also
// be allocated. This method should be called after assigning model resources
// in StartModelAllocation(). The eval_tensors pointer should be the value
// passed into this class during StartModelAllocation().
TfLiteStatus FinishModelAllocation(const Model* model,
TfLiteEvalTensor* eval_tensors);
// Allocates a TfLiteTensor struct and populates the returned value with
// properties from the model flatbuffer. This struct is allocated from
// persistent arena memory is only guaranteed for the lifetime of the
// application. The eval_tensors pointer should be the value passed into this
// class during StartModelAllocation() and contains the source-of-truth for
// buffers.
virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
// Allocates a TfLiteTensor struct and populates the returned value with
// properties from the model flatbuffer. This struct is allocated from
// temporary arena memory is only guaranteed until a call is made to
// ResetTempAllocations(). The eval_tensors pointer should be the value passed
// into this class during StartModelAllocation() and contains the
// source-of-truth for buffers.
virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
TfLiteEvalTensor* eval_tensors,
int tensor_index);
// Resets all temporary allocations. This method should be called after a
// chain of temp allocations (e.g. chain of TfLiteTensor objects via
// AllocateTfLiteTensor()).
virtual void ResetTempAllocations();
// Allocates persistent buffer which has the same life time as the allocator.
// The memory is immediately available and is allocated from the tail of the
// arena.
void* AllocatePersistentBuffer(size_t bytes);
// Register a scratch buffer of size `bytes` for Node with `node_id`.
// This method only allocates a BufferHandle holding information for memory
// planning. The buffer ptr is ready after `FinishModelAllocation` and can
// be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
// Note that there should be no tail allocation between two consecutive
// `RequestScratchBufferInArena` calls.
TfLiteStatus RequestScratchBufferInArena(int node_id, size_t bytes,
int* buffer_idx);
// Returns the pointer to the planned scratch buffer.
void* GetScratchBuffer(int buffer_idx) const;
// Returns the arena usage in bytes, only available after
// `FinishModelAllocation`. Otherwise, it will return 0.
size_t used_bytes() const;
protected:
MicroAllocator(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
virtual ~MicroAllocator();
// Allocates an array in the arena to hold pointers to the node and
// registration pointers required to represent the inference graph of the
// model.
virtual TfLiteStatus AllocateNodeAndRegistrations(
const Model* model, NodeAndRegistration** node_and_registrations);
// Populates node and registration pointers representing the inference graph
// of the model from values inside the flatbuffer (loaded from the TfLiteModel
// instance). Persistent data (e.g. operator data) is allocated from the
// arena.
virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations);
// Allocates the list of persistent TfLiteEvalTensors that are used for the
// "eval" phase of model inference. These structs will be the source of truth
// for all tensor buffers. Allocation results are stored in the out-param
// eval_tensors.
virtual TfLiteStatus AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors);
// Allocates persistent tensor buffers for variable tensors in the subgraph.
virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors);
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this method. It is only used to record TfLiteTensor persistent allocations.
virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
// Populates a TfLiteTensor struct with data from the model flatbuffer. Any
// quantization data is allocated from either the tail (persistent) or temp
// sections of the arena based on the allocation flag.
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this function since all allocations for quantized data will take place in
// the temp section.
virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
int tensor_index, bool allocate_temp);
ErrorReporter* error_reporter() const;
// Returns the first subgraph from the model.
const SubGraph* GetSubGraphFromModel(const Model* model);
private:
// Commits a memory plan for all non-persistent buffer allocations in the
// 'head' section of the memory arena. The eval_tensors pointer is the list of
// pre-allocated TfLiteEvalTensor structs that will point to the buffers that
// will be allocated into the head section in this function call.
virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors);
// A simple memory allocator that always allocate from the arena tail or head.
SimpleMemoryAllocator* memory_allocator_;
ErrorReporter* error_reporter_;
bool model_is_allocating_;
// In reverse order for efficiency.
// i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
// corresponding to the last RequestScratchBufferInArena call.
internal::ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
// How many scratch buffers have been allocated.
size_t scratch_buffer_count_ = 0;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_

View File

@@ -0,0 +1,36 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#include <cstdarg>
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
class MicroErrorReporter : public ErrorReporter {
public:
~MicroErrorReporter() override {}
int Report(const char* format, va_list args) override;
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_

View File

@@ -0,0 +1,208 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/type_to_tflitetype.h"
namespace tflite {
namespace internal {
// A helper class to encapsulate the implementation of APIs in Context.
// context->impl_ points to an instance of this class.
// Check tensorflow/lite/c/common.h for detailed descriptions.
// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
class ContextHelper {
public:
explicit ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator, const Model* model);
// Functions that will be assigned to function pointers on TfLiteContext:
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx);
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_idx);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_idx);
// Sets the current node index to assist with scratch buffer allocations:
void SetNodeIndex(int idx);
// Sets the pointer to a list of TfLiteEvalTensor instances.
void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
private:
MicroAllocator* allocator_;
ErrorReporter* error_reporter_;
const Model* model_;
TfLiteEvalTensor* eval_tensors_;
int current_node_idx_ = -1;
};
} // namespace internal
class MicroInterpreter {
public:
// The lifetime of the model, op resolver, tensor arena, error reporter and
// profiler must be at least as long as that of the interpreter object, since
// the interpreter may need to access them at any time. This means that you
// should usually create them with the same scope as each other, for example
// having them all allocated on the stack as local variables through a
// top-level function. The interpreter doesn't do any deallocation of any of
// the pointed-to objects, ownership remains with the caller.
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
// Create an interpreter instance using an existing MicroAllocator instance.
// This constructor should be used when creating an allocator that needs to
// have allocation handled in more than one interpreter or for recording
// allocations inside the interpreter. The lifetime of the allocator must be
// as long as that of the interpreter object.
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
MicroAllocator* allocator, ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
~MicroInterpreter();
// Runs through the model and allocates all necessary input, output and
// intermediate tensors.
TfLiteStatus AllocateTensors();
// In order to support partial graph runs for strided models, this can return
// values other than kTfLiteOk and kTfLiteError.
// TODO(b/149795762): Add this to the TfLiteStatus enum.
TfLiteStatus Invoke();
size_t tensors_size() const { return context_.tensors_size; }
TfLiteTensor* tensor(size_t tensor_index);
template <class T>
T* typed_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
TfLiteTensor* input(size_t index);
size_t inputs_size() const { return subgraph_->inputs()->Length(); }
const flatbuffers::Vector<int32_t>& inputs() const {
return *subgraph_->inputs();
}
TfLiteTensor* input_tensor(size_t index) { return input(index); }
template <class T>
T* typed_input_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = input_tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
TfLiteTensor* output(size_t index);
size_t outputs_size() const { return subgraph_->outputs()->Length(); }
const flatbuffers::Vector<int32_t>& outputs() const {
return *subgraph_->outputs();
}
TfLiteTensor* output_tensor(size_t index) { return output(index); }
template <class T>
T* typed_output_tensor(int tensor_index) {
if (TfLiteTensor* tensor_ptr = output_tensor(tensor_index)) {
if (tensor_ptr->type == typeToTfLiteType<T>()) {
return GetTensorData<T>(tensor_ptr);
}
}
return nullptr;
}
// Reset all variable tensors to the default value.
TfLiteStatus ResetVariableTensors();
TfLiteStatus initialization_status() const { return initialization_status_; }
size_t operators_size() const { return subgraph_->operators()->size(); }
// For debugging only.
const NodeAndRegistration node_and_registration(int node_index) const {
return node_and_registrations_[node_index];
}
// For debugging only.
// Returns the actual used arena in bytes. This method gives the optimal arena
// size. It's only available after `AllocateTensors` has been called.
// Note that normally `tensor_arena` requires 16 bytes alignment to fully
// utilize the space. If it's not the case, the optimial arena size would be
// arena_used_bytes() + 16.
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
protected:
const MicroAllocator& allocator() const { return allocator_; }
const TfLiteContext& context() const { return context_; }
private:
// TODO(b/158263161): Consider switching to Create() function to enable better
// error reporting during initialization.
void Init(tflite::Profiler* profiler);
void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr);
template <class T>
void CorrectTensorDataEndianness(T* data, int32_t size);
NodeAndRegistration* node_and_registrations_ = nullptr;
const Model* model_;
const MicroOpResolver& op_resolver_;
ErrorReporter* error_reporter_;
TfLiteContext context_ = {};
MicroAllocator& allocator_;
bool tensors_allocated_;
TfLiteStatus initialization_status_;
const SubGraph* subgraph_;
TfLiteEvalTensor* eval_tensors_;
internal::ContextHelper context_helper_;
// TODO(b/160894903): Clean these pointers up when all APIs are updated to new
// TfLiteEvalTensor buffers.
TfLiteTensor* input_tensor_;
TfLiteTensor* output_tensor_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_

View File

@@ -0,0 +1,458 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#include <stdio.h>
#include <cstring>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
template <unsigned int tOpCount>
class MicroMutableOpResolver : public MicroOpResolver {
public:
explicit MicroMutableOpResolver(ErrorReporter* error_reporter = nullptr)
: error_reporter_(error_reporter) {}
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override {
if (op == BuiltinOperator_CUSTOM) return nullptr;
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if (registration.builtin_code == op) {
return &registration;
}
}
return nullptr;
}
const TfLiteRegistration* FindOp(const char* op) const override {
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
(strcmp(registration.custom_name, op) == 0)) {
return &registration;
}
}
return nullptr;
}
MicroOpResolver::BuiltinParseFunction GetOpDataParser(
BuiltinOperator op) const override {
TFLITE_DCHECK(num_buitin_ops_ <= tOpCount);
for (unsigned int i = 0; i < num_buitin_ops_; ++i) {
if (builtin_codes_[i] == op) return builtin_parsers_[i];
}
return nullptr;
}
// Registers a Custom Operator with the MicroOpResolver.
//
// Only the first call for a given name will be successful. i.e. if this
// function is called again for a previously added Custom Operator, the
// MicroOpResolver will be unchanged and this function will return
// kTfLiteError.
TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) {
if (registrations_len_ >= tOpCount) {
if (error_reporter_) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Couldn't register custom op '%s', resolver size is too small (%d)",
name, tOpCount);
}
return kTfLiteError;
}
if (FindOp(name) != nullptr) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Calling AddCustom for the same op more than once "
"is not supported (Op: %s).",
name);
}
return kTfLiteError;
}
TfLiteRegistration* new_registration = &registrations_[registrations_len_];
registrations_len_ += 1;
*new_registration = *registration;
new_registration->builtin_code = BuiltinOperator_CUSTOM;
new_registration->custom_name = name;
return kTfLiteOk;
}
// The Add* functions below add the various Builtin operators to the
// MicroMutableOpResolver object.
TfLiteStatus AddAbs() {
return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(),
ParseAbs);
}
TfLiteStatus AddAdd() {
return AddBuiltin(BuiltinOperator_ADD, tflite::ops::micro::Register_ADD(),
ParseAdd);
}
TfLiteStatus AddArgMax() {
return AddBuiltin(BuiltinOperator_ARG_MAX,
tflite::ops::micro::Register_ARG_MAX(), ParseArgMax);
}
TfLiteStatus AddArgMin() {
return AddBuiltin(BuiltinOperator_ARG_MIN,
tflite::ops::micro::Register_ARG_MIN(), ParseArgMin);
}
TfLiteStatus AddAveragePool2D() {
return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
tflite::ops::micro::Register_AVERAGE_POOL_2D(),
ParsePool);
}
TfLiteStatus AddCeil() {
return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
ParseCeil);
}
TfLiteStatus AddCircularBuffer() {
return AddCustom("CIRCULAR_BUFFER",
tflite::ops::micro::Register_CIRCULAR_BUFFER());
}
TfLiteStatus AddConcatenation() {
return AddBuiltin(BuiltinOperator_CONCATENATION,
tflite::ops::micro::Register_CONCATENATION(),
ParseConcatenation);
}
TfLiteStatus AddConv2D() {
return AddBuiltin(BuiltinOperator_CONV_2D,
tflite::ops::micro::Register_CONV_2D(), ParseConv2D);
}
TfLiteStatus AddCos() {
return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(),
ParseCos);
}
TfLiteStatus AddDepthwiseConv2D() {
return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
tflite::ops::micro::Register_DEPTHWISE_CONV_2D(),
ParseDepthwiseConv2D);
}
TfLiteStatus AddDequantize() {
return AddBuiltin(BuiltinOperator_DEQUANTIZE,
tflite::ops::micro::Register_DEQUANTIZE(),
ParseDequantize);
}
TfLiteStatus AddEqual() {
return AddBuiltin(BuiltinOperator_EQUAL,
tflite::ops::micro::Register_EQUAL(), ParseEqual);
}
TfLiteStatus AddFloor() {
return AddBuiltin(BuiltinOperator_FLOOR,
tflite::ops::micro::Register_FLOOR(), ParseFloor);
}
TfLiteStatus AddFullyConnected() {
return AddBuiltin(BuiltinOperator_FULLY_CONNECTED,
tflite::ops::micro::Register_FULLY_CONNECTED(),
ParseFullyConnected);
}
TfLiteStatus AddGreater() {
return AddBuiltin(BuiltinOperator_GREATER,
tflite::ops::micro::Register_GREATER(), ParseGreater);
}
TfLiteStatus AddGreaterEqual() {
return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
tflite::ops::micro::Register_GREATER_EQUAL(),
ParseGreaterEqual);
}
TfLiteStatus AddHardSwish() {
return AddBuiltin(BuiltinOperator_HARD_SWISH,
tflite::ops::micro::Register_HARD_SWISH(),
ParseHardSwish);
}
TfLiteStatus AddL2Normalization() {
return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
tflite::ops::micro::Register_L2_NORMALIZATION(),
ParseL2Normalization);
}
TfLiteStatus AddLess() {
return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
ParseLess);
}
TfLiteStatus AddLessEqual() {
return AddBuiltin(BuiltinOperator_LESS_EQUAL,
tflite::ops::micro::Register_LESS_EQUAL(),
ParseLessEqual);
}
TfLiteStatus AddLog() {
return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(),
ParseLog);
}
TfLiteStatus AddLogicalAnd() {
return AddBuiltin(BuiltinOperator_LOGICAL_AND,
tflite::ops::micro::Register_LOGICAL_AND(),
ParseLogicalAnd);
}
TfLiteStatus AddLogicalNot() {
return AddBuiltin(BuiltinOperator_LOGICAL_NOT,
tflite::ops::micro::Register_LOGICAL_NOT(),
ParseLogicalNot);
}
TfLiteStatus AddLogicalOr() {
return AddBuiltin(BuiltinOperator_LOGICAL_OR,
tflite::ops::micro::Register_LOGICAL_OR(),
ParseLogicalOr);
}
TfLiteStatus AddLogistic() {
return AddBuiltin(BuiltinOperator_LOGISTIC,
tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
}
TfLiteStatus AddMaximum() {
return AddBuiltin(BuiltinOperator_MAXIMUM,
tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
}
TfLiteStatus AddMaxPool2D() {
return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
}
TfLiteStatus AddMean() {
return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
ParseReducer);
}
TfLiteStatus AddMinimum() {
return AddBuiltin(BuiltinOperator_MINIMUM,
tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
}
TfLiteStatus AddMul() {
return AddBuiltin(BuiltinOperator_MUL, tflite::ops::micro::Register_MUL(),
ParseMul);
}
TfLiteStatus AddNeg() {
return AddBuiltin(BuiltinOperator_NEG, tflite::ops::micro::Register_NEG(),
ParseNeg);
}
TfLiteStatus AddNotEqual() {
return AddBuiltin(BuiltinOperator_NOT_EQUAL,
tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
}
TfLiteStatus AddPack() {
return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
ParsePack);
}
TfLiteStatus AddPad() {
return AddBuiltin(BuiltinOperator_PAD, tflite::ops::micro::Register_PAD(),
ParsePad);
}
TfLiteStatus AddPadV2() {
return AddBuiltin(BuiltinOperator_PADV2,
tflite::ops::micro::Register_PADV2(), ParsePadV2);
}
TfLiteStatus AddPrelu() {
return AddBuiltin(BuiltinOperator_PRELU,
tflite::ops::micro::Register_PRELU(), ParsePrelu);
}
TfLiteStatus AddQuantize() {
return AddBuiltin(BuiltinOperator_QUANTIZE,
tflite::ops::micro::Register_QUANTIZE(), ParseQuantize);
}
TfLiteStatus AddRelu() {
return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
ParseRelu);
}
TfLiteStatus AddRelu6() {
return AddBuiltin(BuiltinOperator_RELU6,
tflite::ops::micro::Register_RELU6(), ParseRelu6);
}
TfLiteStatus AddReshape() {
return AddBuiltin(BuiltinOperator_RESHAPE,
tflite::ops::micro::Register_RESHAPE(), ParseReshape);
}
TfLiteStatus AddResizeNearestNeighbor() {
return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
ParseResizeNearestNeighbor);
}
TfLiteStatus AddRound() {
return AddBuiltin(BuiltinOperator_ROUND,
tflite::ops::micro::Register_ROUND(), ParseRound);
}
TfLiteStatus AddRsqrt() {
return AddBuiltin(BuiltinOperator_RSQRT,
tflite::ops::micro::Register_RSQRT(), ParseRsqrt);
}
TfLiteStatus AddSin() {
return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(),
ParseSin);
}
TfLiteStatus AddSoftmax() {
return AddBuiltin(BuiltinOperator_SOFTMAX,
tflite::ops::micro::Register_SOFTMAX(), ParseSoftmax);
}
TfLiteStatus AddSplit() {
return AddBuiltin(BuiltinOperator_SPLIT,
tflite::ops::micro::Register_SPLIT(), ParseSplit);
}
TfLiteStatus AddSqrt() {
return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(),
ParseSqrt);
}
TfLiteStatus AddSquare() {
return AddBuiltin(BuiltinOperator_SQUARE,
tflite::ops::micro::Register_SQUARE(), ParseSquare);
}
TfLiteStatus AddStridedSlice() {
return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
tflite::ops::micro::Register_STRIDED_SLICE(),
ParseStridedSlice);
}
TfLiteStatus AddSub() {
return AddBuiltin(BuiltinOperator_SUB, tflite::ops::micro::Register_SUB(),
ParseSub);
}
TfLiteStatus AddSvdf() {
return AddBuiltin(BuiltinOperator_SVDF, tflite::ops::micro::Register_SVDF(),
ParseSvdf);
}
TfLiteStatus AddTanh() {
return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(),
ParseTanh);
}
TfLiteStatus AddUnpack() {
return AddBuiltin(BuiltinOperator_UNPACK,
tflite::ops::micro::Register_UNPACK(), ParseUnpack);
}
unsigned int GetRegistrationLength() { return registrations_len_; }
private:
TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
const TfLiteRegistration& registration,
MicroOpResolver::BuiltinParseFunction parser) {
if (op == BuiltinOperator_CUSTOM) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Invalid parameter BuiltinOperator_CUSTOM to the "
"AddBuiltin function.");
}
return kTfLiteError;
}
if (FindOp(op) != nullptr) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Calling AddBuiltin with the same op more than "
"once is not supported (Op: #%d).",
op);
}
return kTfLiteError;
}
if (registrations_len_ >= tOpCount) {
if (error_reporter_) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Couldn't register builtin op #%d, resolver size "
"is too small (%d).",
op, tOpCount);
}
return kTfLiteError;
}
registrations_[registrations_len_] = registration;
// Strictly speaking, the builtin_code is not necessary for TFLM but filling
// it in regardless.
registrations_[registrations_len_].builtin_code = op;
registrations_len_++;
builtin_codes_[num_buitin_ops_] = op;
builtin_parsers_[num_buitin_ops_] = parser;
num_buitin_ops_++;
return kTfLiteOk;
}
TfLiteRegistration registrations_[tOpCount];
unsigned int registrations_len_ = 0;
// Arrays (and counter) to store the builtin codes and their corresponding
// parse functions as these are registered with the Op Resolver.
BuiltinOperator builtin_codes_[tOpCount];
MicroOpResolver::BuiltinParseFunction builtin_parsers_[tOpCount];
unsigned int num_buitin_ops_ = 0;
ErrorReporter* error_reporter_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
}; // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_

View File

@@ -0,0 +1,73 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// This is an interface for the OpResolver for TFLiteMicro. The differences from
// the TFLite OpResolver base class are to:
// * explicitly remove support for Op versions
// * allow for finer grained registration of the Builtin Ops to reduce code
// size for TFLiteMicro.
//
// We need an interface class instead of directly using MicroMutableOpResolver
// because MicroMutableOpResolver is a class template with the number of
// registered Ops as the template parameter.
class MicroOpResolver : public OpResolver {
public:
typedef TfLiteStatus (*BuiltinParseFunction)(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
// Returns the Op registration struct corresponding to the enum code from the
// flatbuffer schema. Returns nullptr if the op is not found or if op ==
// BuiltinOperator_CUSTOM.
virtual const TfLiteRegistration* FindOp(BuiltinOperator op) const = 0;
// Returns the Op registration struct corresponding to the custom operator by
// name.
virtual const TfLiteRegistration* FindOp(const char* op) const = 0;
// This implementation exists for compatibility with the OpResolver base class
// and disregards the version parameter.
const TfLiteRegistration* FindOp(BuiltinOperator op,
int version) const final {
return FindOp(op);
}
// This implementation exists for compatibility with the OpResolver base class
// and disregards the version parameter.
const TfLiteRegistration* FindOp(const char* op, int version) const final {
return FindOp(op);
}
// Returns the operator specific parsing function for the OpData for a
// BuiltinOperator (if registered), else nullptr.
virtual BuiltinParseFunction GetOpDataParser(BuiltinOperator op) const = 0;
~MicroOpResolver() override {}
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_

View File

@@ -0,0 +1,30 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Optional debugging functionality. For small sized binaries, these are not
// needed.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
#define TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
#include "tensorflow/lite/micro/micro_interpreter.h"
namespace tflite {
// Helper function to print model flatbuffer data. This function is not called
// by default. Hence it's not linked in to the final binary code.
void PrintModelData(const Model* model, ErrorReporter* error_reporter);
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_

View File

@@ -0,0 +1,71 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// MicroProfiler creates a common way to gain fine-grained insight into runtime
// performance. Bottleck operators can be identified along with slow code
// sections. This can be used in conjunction with running the relevant micro
// benchmark to evaluate end-to-end performance.
//
// Usage example:
// MicroProfiler profiler(error_reporter);
// {
// ScopedProfile scoped_profile(profiler, tag);
// work_to_profile();
// }
//
// This will call the following methods in order:
// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0)
// work_to_profile();
// profiler->EndEvent(event_handle)
class MicroProfiler : public tflite::Profiler {
public:
explicit MicroProfiler(tflite::ErrorReporter* reporter);
~MicroProfiler() override = default;
// AddEvent is unused for Tf Micro.
void AddEvent(const char* tag, EventType event_type, uint64_t start,
uint64_t end, int64_t event_metadata1,
int64_t event_metadata2) override{};
// BeginEvent followed by code followed by EndEvent will profile the code
// enclosed. Multiple concurrent events are unsupported, so the return value
// is always 0. Event_metadata1 and event_metadata2 are unused. The tag
// pointer must be valid until EndEvent is called.
uint32_t BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata1,
int64_t event_metadata2) override;
// Event_handle is ignored since TF Micro does not support concurrent events.
void EndEvent(uint32_t event_handle) override;
private:
tflite::ErrorReporter* reporter_;
int32_t start_time_;
const char* event_tag_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_

View File

@@ -0,0 +1,33 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
#define TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
#include <cstdarg>
// Implements simple string formatting for numeric types. Returns the number of
// bytes written to output.
extern "C" {
// Functionally equivalent to vsnprintf, trimmed down for TFLite Micro.
// MicroSnprintf() is implemented using MicroVsnprintf().
int MicroVsnprintf(char* output, int len, const char* format, va_list args);
// Functionally equavalent to snprintf, trimmed down for TFLite Micro.
// For example, MicroSnprintf(buffer, 10, "int %d", 10) will put the string
// "int 10" in the buffer.
// Floating point values are logged in exponent notation (1.XXX*2^N).
int MicroSnprintf(char* output, int len, const char* format, ...);
}
#endif // TENSORFLOW_LITE_MICRO_MICRO_STRING_H_

View File

@@ -0,0 +1,31 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
#define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
#include <stdint.h>
namespace tflite {
// These functions should be implemented by each target platform, and provide an
// accurate tick count along with how many ticks there are per second.
int32_t ticks_per_second();
// Return time in ticks. The meaning of a tick varies per platform.
int32_t GetCurrentTimeTicks();
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_

View File

@@ -0,0 +1,110 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
#define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Returns number of elements in the shape array.
int ElementCount(const TfLiteIntArray& dims);
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
const int zero_point);
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale);
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
const int zero_point);
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
const int zero_point);
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale);
// Converts a float value into a signed thirty-two-bit quantized value. Note
// that values close to max int and min int may see significant error due to
// a lack of floating point granularity for large values.
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale);
// Helper methods to quantize arrays of floats to the desired format.
//
// There are several key flavors of quantization in TfLite:
// asymmetric symmetric per channel
// int8_t | X | X | X |
// uint8_t | X | X | |
// int16_t | X | | |
// int32_t | | X | X |
//
// The per-op quantization spec can be found here:
// https://www.tensorflow.org/lite/performance/quantization_spec
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
float scale, int zero_point = 0);
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
float scale, int zero_point = 128);
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
float scale, int zero_point = 0);
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
float scale);
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
int num_elements, int num_channels,
float* scales);
void SignedSymmetricPerChannelQuantize(const float* values,
TfLiteIntArray* dims,
int quantized_dimension,
int8_t* quantized_values,
float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int8_t* quantized_values, float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int16_t* quantized_values, float* scaling_factor);
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
int32_t* quantized_values, float* scaling_factor);
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
uint8_t* quantized_values, float* scaling_factor);
void SymmetricDequantize(const int8_t* values, const int size,
const float dequantization_scale,
float* dequantized_values);
template <typename T>
void AsymmetricDequantize(const T* values, const int size,
const float dequantization_scale,
int dequantization_zero_point,
float* dequantized_values) {
for (int i = 0; i < size; ++i) {
dequantized_values[i] =
(values[i] - dequantization_zero_point) * dequantization_scale;
}
}
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_

View File

@@ -0,0 +1,120 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
namespace tflite {
// List of buckets currently recorded by this class. Each type keeps a list of
// allocated information during model initialization.
enum class RecordedAllocationType {
kTfLiteEvalTensorData,
kPersistentTfLiteTensorData,
kPersistentTfLiteTensorQuantizationData,
kTfLiteTensorVariableBufferData,
kNodeAndRegistrationArray,
kOpData,
};
// Container for holding information about allocation recordings by a given
// type. Each recording contains the number of bytes requested, the actual bytes
// allocated (can defer from requested by alignment), and the number of items
// allocated.
struct RecordedAllocation {
size_t requested_bytes;
size_t used_bytes;
size_t count;
};
// Utility subclass of MicroAllocator that records all allocations
// inside the arena. A summary of allocations can be logged through the
// ErrorReporter by invoking LogAllocations(). This special allocator requires
// an instance of RecordingSimpleMemoryAllocator to capture allocations in the
// head and tail. Arena allocation recording can be retrieved by type through
// the GetRecordedAllocation() function. This class should only be used for
// auditing memory usage or integration testing.
class RecordingMicroAllocator : public MicroAllocator {
public:
static RecordingMicroAllocator* Create(uint8_t* tensor_arena,
size_t arena_size,
ErrorReporter* error_reporter);
// Returns the recorded allocations information for a given allocation type.
RecordedAllocation GetRecordedAllocation(
RecordedAllocationType allocation_type) const;
const RecordingSimpleMemoryAllocator* GetSimpleMemoryAllocator() const;
// Logs out through the ErrorReporter all allocation recordings by type
// defined in RecordedAllocationType.
void PrintAllocations() const;
protected:
TfLiteStatus AllocateNodeAndRegistrations(
const Model* model,
NodeAndRegistration** node_and_registrations) override;
TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations) override;
TfLiteStatus AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors) override;
TfLiteStatus AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors) override;
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this method. It is only used to record TfLiteTensor persistent allocations.
TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors,
int tensor_index) override;
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this function since all allocations for quantized data will take place in
// the temp section.
TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
const SubGraph* subgraph,
TfLiteTensor* tensor,
int tensor_index,
bool allocate_temp) override;
private:
RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
void PrintRecordedAllocation(RecordedAllocationType allocation_type,
const char* allocation_name,
const char* allocation_description) const;
RecordedAllocation SnapshotAllocationUsage() const;
void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation,
RecordedAllocation& recorded_allocation);
const RecordingSimpleMemoryAllocator* recording_memory_allocator_;
RecordedAllocation recorded_tflite_eval_tensor_data_ = {};
RecordedAllocation recorded_persistent_tflite_tensor_data_ = {};
RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {};
RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
RecordedAllocation recorded_node_and_registration_array_data_ = {};
RecordedAllocation recorded_op_data_ = {};
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_

View File

@@ -0,0 +1,65 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/recording_micro_allocator.h"
namespace tflite {
// Utility subclass that enables internal recordings of the MicroInterpreter.
// This class should be used to audit and analyze memory arena usage for a given
// model and interpreter.
//
// After construction and the first Invoke() or AllocateTensors() call - the
// memory usage is recorded and available through the GetMicroAllocator()
// function. See RecordingMicroAlloctor for more details on what is currently
// recorded from arena allocations.
//
// It is recommended for users to increase the tensor arena size by at least 1kb
// to ensure enough additional memory is available for internal recordings.
class RecordingMicroInterpreter : public MicroInterpreter {
public:
RecordingMicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter)
: MicroInterpreter(model, op_resolver,
RecordingMicroAllocator::Create(
tensor_arena, tensor_arena_size, error_reporter),
error_reporter),
recording_micro_allocator_(
static_cast<const RecordingMicroAllocator&>(allocator())) {}
RecordingMicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
RecordingMicroAllocator* allocator,
ErrorReporter* error_reporter)
: MicroInterpreter(model, op_resolver, allocator, error_reporter),
recording_micro_allocator_(*allocator) {}
const RecordingMicroAllocator& GetMicroAllocator() const {
return recording_micro_allocator_;
}
private:
const RecordingMicroAllocator& recording_micro_allocator_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_

View File

@@ -0,0 +1,64 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
// Utility class used to log allocations of a SimpleMemoryAllocator. Should only
// be used in debug/evaluation settings or unit tests to evaluate allocation
// usage.
class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
public:
RecordingSimpleMemoryAllocator(ErrorReporter* error_reporter,
uint8_t* buffer_head, size_t buffer_size);
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
~RecordingSimpleMemoryAllocator() override;
static RecordingSimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
uint8_t* buffer_head,
size_t buffer_size);
// Returns the number of bytes requested from the head or tail.
size_t GetRequestedBytes() const;
// Returns the number of bytes actually allocated from the head or tail. This
// value will be >= to the number of requested bytes due to padding and
// alignment.
size_t GetUsedBytes() const;
// Returns the number of alloc calls from the head or tail.
size_t GetAllocatedCount() const;
TfLiteStatus EnsureHeadSize(size_t size, size_t alignment) override;
uint8_t* AllocateFromTail(size_t size, size_t alignment) override;
private:
size_t requested_head_bytes_;
size_t requested_tail_bytes_;
size_t used_bytes_;
size_t alloc_count_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_

View File

@@ -0,0 +1,99 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// TODO(petewarden): This allocator never frees up or reuses any memory, even
// though we have enough information about lifetimes of the tensors to do so.
// This makes it pretty wasteful, so we should use a more intelligent method.
class SimpleMemoryAllocator {
public:
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
uint8_t* buffer_tail);
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
size_t buffer_size);
virtual ~SimpleMemoryAllocator();
// Creates a new SimpleMemoryAllocator from a given buffer head and size.
static SimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
uint8_t* buffer_head,
size_t buffer_size);
// Ensure that the head (lowest address and moving upwards) memory allocation
// is at least a given size. This function will only increase the head size if
// the passed in value is larger than the current head size. Calls to this
// method will also invalidate all temporary allocation values. This call will
// fail if a chain of allocations through AllocateTemp() have not been cleaned
// up with a call to ResetTempAllocations().
virtual TfLiteStatus EnsureHeadSize(size_t size, size_t alignment);
// Allocates memory starting at the tail of the arena (highest address and
// moving downwards).
virtual uint8_t* AllocateFromTail(size_t size, size_t alignment);
// Allocates a temporary buffer from the head of the arena (lowest address and
// moving upwards) but does not update the actual head allocation size or
// position. The returned buffer is guaranteed until either
// ResetTempAllocations() is called or another call to AllocateFromHead().
// Repeat calls to this function will create a chain of temp allocations. All
// calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
// AllocateFromHead() is called before a call to ResetTempAllocations(), it
// will fail with an error message.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment);
// Resets a chain of temporary allocations back to the current head of the
// arena (lowest address).
virtual void ResetTempAllocations();
uint8_t* GetHead() const;
uint8_t* GetBufferHead() const;
uint8_t* GetTail() const;
size_t GetHeadUsedBytes() const;
size_t GetTailUsedBytes() const;
// Returns the number of bytes available with a given alignment.
size_t GetAvailableMemory(size_t alignment) const;
size_t GetUsedBytes() const;
private:
size_t GetBufferSize() const;
ErrorReporter* error_reporter_;
uint8_t* buffer_head_;
uint8_t* buffer_tail_;
uint8_t* head_;
uint8_t* tail_;
uint8_t* temp_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_

View File

@@ -0,0 +1,186 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
#define TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
// Useful functions for writing tests.
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace testing {
constexpr int kOfflinePlannerHeaderSize = 3;
struct NodeConnection_ {
std::initializer_list<int32_t> input;
std::initializer_list<int32_t> output;
};
typedef struct NodeConnection_ NodeConnection;
// A simple operator that returns the median of the input with the number of
// times the kernel was invoked. The implementation below is deliberately
// complicated, just to demonstrate how kernel memory planning works.
class SimpleStatefulOp {
static constexpr int kBufferNotAllocated = 0;
// Inputs:
static constexpr int kInputTensor = 0;
// Outputs:
static constexpr int kMedianTensor = 0;
static constexpr int kInvokeCount = 1;
struct OpData {
int invoke_count = 0;
int sorting_buffer = kBufferNotAllocated;
};
public:
static const TfLiteRegistration* getRegistration();
static TfLiteRegistration* GetMutableRegistration();
static void* Init(TfLiteContext* context, const char* buffer, size_t length);
static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
};
class MockCustom {
public:
static const TfLiteRegistration* getRegistration();
static TfLiteRegistration* GetMutableRegistration();
static void* Init(TfLiteContext* context, const char* buffer, size_t length);
static void Free(TfLiteContext* context, void* buffer);
static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
static bool freed_;
};
// Returns an Op Resolver that can be used in the testing code.
AllOpsResolver GetOpResolver();
// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input,
// 1 layer of weights, 1 output Tensor, and 1 operator.
const Model* GetSimpleMockModel();
// Returns a flatbuffer TensorFlow Lite model with more inputs, variable
// tensors, and operators.
const Model* GetComplexMockModel();
// Returns a simple flatbuffer model with two branches.
const Model* GetSimpleModelWithBranch();
// Returns a simple flatbuffer model with offline planned tensors
const Model* GetModelWithOfflinePlanning(int num_tensors,
const int32_t* metadata_buffer,
NodeConnection* node_conn,
int num_conns);
// Returns a flatbuffer model with `simple_stateful_op`
const Model* GetSimpleStatefulModel();
// Builds a one-dimensional flatbuffer tensor of the given size.
const Tensor* Create1dFlatbufferTensor(int size, bool is_variable = false);
// Builds a one-dimensional flatbuffer tensor of the given size with
// quantization metadata.
const Tensor* CreateQuantizedFlatbufferTensor(int size);
// Creates a one-dimensional tensor with no quantization metadata.
const Tensor* CreateMissingQuantizationFlatbufferTensor(int size);
// Creates a vector of flatbuffer buffers.
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>*
CreateFlatbufferBuffers();
// Performs a simple string comparison without requiring standard C library.
int TestStrcmp(const char* a, const char* b);
// Wrapper to forward kernel errors to the interpreter's error reporter.
void ReportOpError(struct TfLiteContext* context, const char* format, ...);
void PopulateContext(TfLiteTensor* tensors, int tensors_size,
TfLiteContext* context);
// Create a TfLiteIntArray from an array of ints. The first element in the
// supplied array must be the size of the array expressed as an int.
TfLiteIntArray* IntArrayFromInts(const int* int_array);
// Create a TfLiteFloatArray from an array of floats. The first element in the
// supplied array must be the size of the array expressed as a float.
TfLiteFloatArray* FloatArrayFromFloats(const float* floats);
TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
bool is_variable = false);
void PopulateFloatTensor(TfLiteTensor* tensor, float* begin, float* end);
TfLiteTensor CreateBoolTensor(const bool* data, TfLiteIntArray* dims,
bool is_variable = false);
TfLiteTensor CreateInt32Tensor(const int32_t*, TfLiteIntArray* dims,
bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
float scale, int zero_point,
bool is_variable = false);
template <typename T>
TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized,
TfLiteIntArray* dims, float scale,
int zero_point, bool is_variable = false) {
int input_size = ElementCount(*dims);
tflite::AsymmetricQuantize(input, quantized, input_size, scale, zero_point);
return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable);
}
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
TfLiteIntArray* dims, float input_scale,
float weights_scale,
bool is_variable = false);
// Quantizes int32_t bias tensor with per-channel weights determined by input
// scale multiplied by weight scale for each channel.
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
const float* input, int32_t* quantized, TfLiteIntArray* dims,
float input_scale, float* weight_scales, float* scales, int* zero_points,
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
bool is_variable = false);
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
int* zero_points, TfLiteAffineQuantization* affine_quant,
int quantized_dimension, bool is_variable = false);
// Returns the number of tensors in the default subgraph for a tflite::Model.
size_t GetModelTensorCount(const Model* model);
} // namespace testing
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_

View File

@@ -0,0 +1,241 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// An ultra-lightweight testing framework designed for use with microcontroller
// applications. Its only dependency is on TensorFlow Lite's ErrorReporter
// interface, where log messages are output. This is designed to be usable even
// when no standard C or C++ libraries are available, and without any dynamic
// memory allocation or reliance on global constructors.
//
// To build a test, you use syntax similar to gunit, but with some extra
// decoration to create a hidden 'main' function containing each of the tests to
// be run. Your code should look something like:
// ----------------------------------------------------------------------------
// #include "path/to/this/header"
//
// TF_LITE_MICRO_TESTS_BEGIN
//
// TF_LITE_MICRO_TEST(SomeTest) {
// TF_LITE_LOG_EXPECT_EQ(true, true);
// }
//
// TF_LITE_MICRO_TESTS_END
// ----------------------------------------------------------------------------
// If you compile this for your platform, you'll get a normal binary that you
// should be able to run. Executing it will output logging information like this
// to stderr (or whatever equivalent is available and written to by
// ErrorReporter):
// ----------------------------------------------------------------------------
// Testing SomeTest
// 1/1 tests passed
// ~~~ALL TESTS PASSED~~~
// ----------------------------------------------------------------------------
// This is designed to be human-readable, so you can just run tests manually,
// but the string "~~~ALL TESTS PASSED~~~" should only appear if all of the
// tests do pass. This makes it possible to integrate with automated test
// systems by scanning the output logs and looking for that magic value.
//
// This framework is intended to be a rudimentary alternative to no testing at
// all on systems that struggle to run more conventional approaches, so use with
// caution!
#ifndef TENSORFLOW_LITE_MICRO_TESTING_MICRO_TEST_H_
#define TENSORFLOW_LITE_MICRO_TESTING_MICRO_TEST_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace micro_test {
extern int tests_passed;
extern int tests_failed;
extern bool is_test_complete;
extern bool did_test_fail;
extern tflite::ErrorReporter* reporter;
} // namespace micro_test
#define TF_LITE_MICRO_TESTS_BEGIN \
namespace micro_test { \
int tests_passed; \
int tests_failed; \
bool is_test_complete; \
bool did_test_fail; \
tflite::ErrorReporter* reporter; \
} \
\
int main(void) { \
micro_test::tests_passed = 0; \
micro_test::tests_failed = 0; \
tflite::MicroErrorReporter error_reporter; \
micro_test::reporter = &error_reporter; \
HAL_Init(); \
SystemClock_Config(); \
board_init(); \
printf("Init Successful");
#define TF_LITE_MICRO_TESTS_END \
micro_test::reporter->Report( \
"%d/%d tests passed", micro_test::tests_passed, \
(micro_test::tests_failed + micro_test::tests_passed)); \
if (micro_test::tests_failed == 0) { \
micro_test::reporter->Report("~~~ALL TESTS PASSED~~~\n"); \
} else { \
micro_test::reporter->Report("~~~SOME TESTS FAILED~~~\n"); \
} \
while(1); \
}
// TODO(petewarden): I'm going to hell for what I'm doing to this poor for loop.
#define TF_LITE_MICRO_TEST(name) \
micro_test::reporter->Report("Testing " #name); \
for (micro_test::is_test_complete = false, \
micro_test::did_test_fail = false; \
!micro_test::is_test_complete; micro_test::is_test_complete = true, \
micro_test::tests_passed += (micro_test::did_test_fail) ? 0 : 1, \
micro_test::tests_failed += (micro_test::did_test_fail) ? 1 : 0)
#define TF_LITE_MICRO_EXPECT(x) \
do { \
if (!(x)) { \
micro_test::reporter->Report(#x " failed at %s:%d", __FILE__, __LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
// TODO(b/139142772): this macro is used with types other than ints even though
// the printf specifier is %d.
#define TF_LITE_MICRO_EXPECT_EQ(x, y) \
do { \
auto vx = x; \
auto vy = y; \
if ((vx) != (vy)) { \
micro_test::reporter->Report(#x " == " #y " failed at %s:%d (%d vs %d)", \
__FILE__, __LINE__, static_cast<int>(vx), \
static_cast<int>(vy)); \
micro_test::did_test_fail = true; \
} \
} while (false)
#define TF_LITE_MICRO_EXPECT_NE(x, y) \
do { \
if ((x) == (y)) { \
micro_test::reporter->Report(#x " != " #y " failed at %s:%d", __FILE__, \
__LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
// TODO(wangtz): Making it more generic once needed.
#define TF_LITE_MICRO_ARRAY_ELEMENT_EXPECT_NEAR(arr1, idx1, arr2, idx2, \
epsilon) \
do { \
auto delta = ((arr1)[(idx1)] > (arr2)[(idx2)]) \
? ((arr1)[(idx1)] - (arr2)[(idx2)]) \
: ((arr2)[(idx2)] - (arr1)[(idx1)]); \
if (delta > epsilon) { \
micro_test::reporter->Report( \
#arr1 "[%d] (%f) near " #arr2 "[%d] (%f) failed at %s:%d", \
static_cast<int>(idx1), static_cast<float>((arr1)[(idx1)]), \
static_cast<int>(idx2), static_cast<float>((arr2)[(idx2)]), \
__FILE__, __LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
#define TF_LITE_MICRO_EXPECT_NEAR(x, y, epsilon) \
do { \
auto vx = (x); \
auto vy = (y); \
auto delta = ((vx) > (vy)) ? ((vx) - (vy)) : ((vy) - (vx)); \
if (delta > epsilon) { \
micro_test::reporter->Report( \
#x " (%f) near " #y " (%f) failed at %s:%d", \
static_cast<double>(vx), static_cast<double>(vy), __FILE__, \
__LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
#define TF_LITE_MICRO_EXPECT_GT(x, y) \
do { \
if ((x) <= (y)) { \
micro_test::reporter->Report(#x " > " #y " failed at %s:%d", __FILE__, \
__LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
#define TF_LITE_MICRO_EXPECT_LT(x, y) \
do { \
if ((x) >= (y)) { \
micro_test::reporter->Report(#x " < " #y " failed at %s:%d", __FILE__, \
__LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
#define TF_LITE_MICRO_EXPECT_GE(x, y) \
do { \
if ((x) < (y)) { \
micro_test::reporter->Report(#x " >= " #y " failed at %s:%d", __FILE__, \
__LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
#define TF_LITE_MICRO_EXPECT_LE(x, y) \
do { \
if ((x) > (y)) { \
micro_test::reporter->Report(#x " <= " #y " failed at %s:%d", __FILE__, \
__LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
#define TF_LITE_MICRO_EXPECT_TRUE(x) \
do { \
if (!(x)) { \
micro_test::reporter->Report(#x " was not true failed at %s:%d", \
__FILE__, __LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
#define TF_LITE_MICRO_EXPECT_FALSE(x) \
do { \
if (x) { \
micro_test::reporter->Report(#x " was not false failed at %s:%d", \
__FILE__, __LINE__); \
micro_test::did_test_fail = true; \
} \
} while (false)
#define TF_LITE_MICRO_FAIL(msg) \
do { \
micro_test::reporter->Report("FAIL: %s", msg, __FILE__, __LINE__); \
micro_test::did_test_fail = true; \
} while (false)
#define TF_LITE_MICRO_EXPECT_STRING_EQ(string1, string2) \
do { \
for (int i = 0; string1[i] != '\0' && string2[i] != '\0'; i++) { \
if (string1[i] != string2[i]) { \
micro_test::reporter->Report("FAIL: %s did not match %s", string1, \
string2, __FILE__, __LINE__); \
micro_test::did_test_fail = true; \
} \
} \
} while (false)
#endif // TENSORFLOW_LITE_MICRO_TESTING_MICRO_TEST_H_

View File

@@ -0,0 +1,23 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_TESTING_TEST_CONV_MODEL_H_
#define TENSORFLOW_LITE_MICRO_TESTING_TEST_CONV_MODEL_H_
// See generate_test_models.py for updating the contents of this model:
extern const unsigned char kTestConvModelData[];
extern const unsigned int kTestConvModelDataSize;
#endif // TENSORFLOW_LITE_MICRO_TESTING_TEST_CONV_MODEL_H_

View File

@@ -0,0 +1,116 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_TESTING_TEST_UTILS_H_
#define TENSORFLOW_LITE_MICRO_TESTING_TEST_UTILS_H_
#include <cmath>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/micro/test_helpers.h"
#include "tensorflow/lite/micro/testing/micro_test.h"
namespace tflite {
namespace testing {
// Note: These methods are deprecated, do not use. See b/141332970.
// Derives the quantization range max from scaling factor and zero point.
template <typename T>
inline float MaxFromZeroPointScale(const int zero_point, const float scale) {
return (std::numeric_limits<T>::max() - zero_point) * scale;
}
// Derives the quantization range min from scaling factor and zero point.
template <typename T>
inline float MinFromZeroPointScale(const int zero_point, const float scale) {
return (std::numeric_limits<T>::min() - zero_point) * scale;
}
// Derives the quantization scaling factor from a min and max range.
template <typename T>
inline float ScaleFromMinMax(const float min, const float max) {
return (max - min) /
static_cast<float>((std::numeric_limits<T>::max() * 1.0) -
std::numeric_limits<T>::min());
}
// Derives the quantization zero point from a min and max range.
template <typename T>
inline int ZeroPointFromMinMax(const float min, const float max) {
return static_cast<int>(std::numeric_limits<T>::min()) +
static_cast<int>(-min / ScaleFromMinMax<T>(min, max) + 0.5f);
}
// Converts a float value into an unsigned eight-bit quantized value.
uint8_t F2Q(float value, float min, float max);
// Converts a float value into a signed eight-bit quantized value.
int8_t F2QS(const float value, const float min, const float max);
// Converts a float value into a signed thirty-two-bit quantized value. Note
// that values close to max int and min int may see significant error due to
// a lack of floating point granularity for large values.
int32_t F2Q32(const float value, const float scale);
// TODO(b/141330728): Move this method elsewhere as part clean up.
void PopulateContext(TfLiteTensor* tensors, int tensors_size,
ErrorReporter* error_reporter, TfLiteContext* context);
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
float min, float max,
bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
float min, float max,
bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(float* data, uint8_t* quantized_data,
TfLiteIntArray* dims,
bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(float* data, int8_t* quantized_data,
TfLiteIntArray* dims,
bool is_variable = false);
TfLiteTensor CreateQuantizedTensor(float* data, int16_t* quantized_data,
TfLiteIntArray* dims,
bool is_variable = false);
TfLiteTensor CreateQuantized32Tensor(const int32_t* data, TfLiteIntArray* dims,
float scale, bool is_variable = false);
template <typename input_type = int32_t,
TfLiteType tensor_input_type = kTfLiteInt32>
inline TfLiteTensor CreateTensor(const input_type* data, TfLiteIntArray* dims,
bool is_variable = false) {
TfLiteTensor result;
result.type = tensor_input_type;
result.data.raw = reinterpret_cast<char*>(const_cast<input_type*>(data));
result.dims = dims;
result.allocation_type = kTfLiteMemNone;
result.bytes = ElementCount(*dims) * sizeof(input_type);
result.is_variable = is_variable;
return result;
}
} // namespace testing
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_TESTING_TEST_UTILS_H_

View File

@@ -0,0 +1,528 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_common_tables.h
* Description: Extern declaration for common tables
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _ARM_COMMON_TABLES_H
#define _ARM_COMMON_TABLES_H
#include "cmsis/CMSIS/DSP/Include/arm_math.h"
#ifdef __cplusplus
extern "C"
{
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
/* Double Precision Float CFFT twiddles */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
extern const uint16_t armBitRevTable[1024];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_16)
extern const uint64_t twiddleCoefF64_16[32];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_32)
extern const uint64_t twiddleCoefF64_32[64];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_64)
extern const uint64_t twiddleCoefF64_64[128];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_128)
extern const uint64_t twiddleCoefF64_128[256];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_256)
extern const uint64_t twiddleCoefF64_256[512];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_512)
extern const uint64_t twiddleCoefF64_512[1024];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_1024)
extern const uint64_t twiddleCoefF64_1024[2048];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_2048)
extern const uint64_t twiddleCoefF64_2048[4096];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_4096)
extern const uint64_t twiddleCoefF64_4096[8192];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_16)
extern const float32_t twiddleCoef_16[32];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_32)
extern const float32_t twiddleCoef_32[64];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_64)
extern const float32_t twiddleCoef_64[128];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_128)
extern const float32_t twiddleCoef_128[256];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_256)
extern const float32_t twiddleCoef_256[512];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_512)
extern const float32_t twiddleCoef_512[1024];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_1024)
extern const float32_t twiddleCoef_1024[2048];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_2048)
extern const float32_t twiddleCoef_2048[4096];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096)
extern const float32_t twiddleCoef_4096[8192];
#define twiddleCoef twiddleCoef_4096
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
/* Q31 */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_16)
extern const q31_t twiddleCoef_16_q31[24];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_32)
extern const q31_t twiddleCoef_32_q31[48];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_64)
extern const q31_t twiddleCoef_64_q31[96];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_128)
extern const q31_t twiddleCoef_128_q31[192];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_256)
extern const q31_t twiddleCoef_256_q31[384];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_512)
extern const q31_t twiddleCoef_512_q31[768];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_1024)
extern const q31_t twiddleCoef_1024_q31[1536];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_2048)
extern const q31_t twiddleCoef_2048_q31[3072];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096)
extern const q31_t twiddleCoef_4096_q31[6144];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_16)
extern const q15_t twiddleCoef_16_q15[24];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_32)
extern const q15_t twiddleCoef_32_q15[48];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_64)
extern const q15_t twiddleCoef_64_q15[96];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_128)
extern const q15_t twiddleCoef_128_q15[192];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_256)
extern const q15_t twiddleCoef_256_q15[384];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_512)
extern const q15_t twiddleCoef_512_q15[768];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_1024)
extern const q15_t twiddleCoef_1024_q15[1536];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_2048)
extern const q15_t twiddleCoef_2048_q15[3072];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096)
extern const q15_t twiddleCoef_4096_q15[6144];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
/* Double Precision Float RFFT twiddles */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_32)
extern const uint64_t twiddleCoefF64_rfft_32[32];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_64)
extern const uint64_t twiddleCoefF64_rfft_64[64];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_128)
extern const uint64_t twiddleCoefF64_rfft_128[128];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_256)
extern const uint64_t twiddleCoefF64_rfft_256[256];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_512)
extern const uint64_t twiddleCoefF64_rfft_512[512];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_1024)
extern const uint64_t twiddleCoefF64_rfft_1024[1024];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_2048)
extern const uint64_t twiddleCoefF64_rfft_2048[2048];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_4096)
extern const uint64_t twiddleCoefF64_rfft_4096[4096];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32)
extern const float32_t twiddleCoef_rfft_32[32];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64)
extern const float32_t twiddleCoef_rfft_64[64];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128)
extern const float32_t twiddleCoef_rfft_128[128];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256)
extern const float32_t twiddleCoef_rfft_256[256];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512)
extern const float32_t twiddleCoef_rfft_512[512];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024)
extern const float32_t twiddleCoef_rfft_1024[1024];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048)
extern const float32_t twiddleCoef_rfft_2048[2048];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096)
extern const float32_t twiddleCoef_rfft_4096[4096];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
/* Double precision floating-point bit reversal tables */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_16)
#define ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH ((uint16_t)12)
extern const uint16_t armBitRevIndexTableF64_16[ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_32)
#define ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH ((uint16_t)24)
extern const uint16_t armBitRevIndexTableF64_32[ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_64)
#define ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH ((uint16_t)56)
extern const uint16_t armBitRevIndexTableF64_64[ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_128)
#define ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH ((uint16_t)112)
extern const uint16_t armBitRevIndexTableF64_128[ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_256)
#define ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH ((uint16_t)240)
extern const uint16_t armBitRevIndexTableF64_256[ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_512)
#define ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH ((uint16_t)480)
extern const uint16_t armBitRevIndexTableF64_512[ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_1024)
#define ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH ((uint16_t)992)
extern const uint16_t armBitRevIndexTableF64_1024[ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_2048)
#define ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH ((uint16_t)1984)
extern const uint16_t armBitRevIndexTableF64_2048[ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_4096)
#define ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH ((uint16_t)4032)
extern const uint16_t armBitRevIndexTableF64_4096[ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
/* floating-point bit reversal tables */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_16)
#define ARMBITREVINDEXTABLE_16_TABLE_LENGTH ((uint16_t)20)
extern const uint16_t armBitRevIndexTable16[ARMBITREVINDEXTABLE_16_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_32)
#define ARMBITREVINDEXTABLE_32_TABLE_LENGTH ((uint16_t)48)
extern const uint16_t armBitRevIndexTable32[ARMBITREVINDEXTABLE_32_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_64)
#define ARMBITREVINDEXTABLE_64_TABLE_LENGTH ((uint16_t)56)
extern const uint16_t armBitRevIndexTable64[ARMBITREVINDEXTABLE_64_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_128)
#define ARMBITREVINDEXTABLE_128_TABLE_LENGTH ((uint16_t)208)
extern const uint16_t armBitRevIndexTable128[ARMBITREVINDEXTABLE_128_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_256)
#define ARMBITREVINDEXTABLE_256_TABLE_LENGTH ((uint16_t)440)
extern const uint16_t armBitRevIndexTable256[ARMBITREVINDEXTABLE_256_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_512)
#define ARMBITREVINDEXTABLE_512_TABLE_LENGTH ((uint16_t)448)
extern const uint16_t armBitRevIndexTable512[ARMBITREVINDEXTABLE_512_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_1024)
#define ARMBITREVINDEXTABLE_1024_TABLE_LENGTH ((uint16_t)1800)
extern const uint16_t armBitRevIndexTable1024[ARMBITREVINDEXTABLE_1024_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_2048)
#define ARMBITREVINDEXTABLE_2048_TABLE_LENGTH ((uint16_t)3808)
extern const uint16_t armBitRevIndexTable2048[ARMBITREVINDEXTABLE_2048_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_4096)
#define ARMBITREVINDEXTABLE_4096_TABLE_LENGTH ((uint16_t)4032)
extern const uint16_t armBitRevIndexTable4096[ARMBITREVINDEXTABLE_4096_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
/* fixed-point bit reversal tables */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16)
#define ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH ((uint16_t)12)
extern const uint16_t armBitRevIndexTable_fixed_16[ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_32)
#define ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH ((uint16_t)24)
extern const uint16_t armBitRevIndexTable_fixed_32[ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64)
#define ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH ((uint16_t)56)
extern const uint16_t armBitRevIndexTable_fixed_64[ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_128)
#define ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH ((uint16_t)112)
extern const uint16_t armBitRevIndexTable_fixed_128[ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256)
#define ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH ((uint16_t)240)
extern const uint16_t armBitRevIndexTable_fixed_256[ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_512)
#define ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH ((uint16_t)480)
extern const uint16_t armBitRevIndexTable_fixed_512[ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024)
#define ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH ((uint16_t)992)
extern const uint16_t armBitRevIndexTable_fixed_1024[ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
#define ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH ((uint16_t)1984)
extern const uint16_t armBitRevIndexTable_fixed_2048[ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
#define ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH ((uint16_t)4032)
extern const uint16_t armBitRevIndexTable_fixed_4096[ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_F32)
extern const float32_t realCoefA[8192];
extern const float32_t realCoefB[8192];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q31)
extern const q31_t realCoefAQ31[8192];
extern const q31_t realCoefBQ31[8192];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q15)
extern const q15_t realCoefAQ15[8192];
extern const q15_t realCoefBQ15[8192];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_128)
extern const float32_t Weights_128[256];
extern const float32_t cos_factors_128[128];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_512)
extern const float32_t Weights_512[1024];
extern const float32_t cos_factors_512[512];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_2048)
extern const float32_t Weights_2048[4096];
extern const float32_t cos_factors_2048[2048];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_8192)
extern const float32_t Weights_8192[16384];
extern const float32_t cos_factors_8192[8192];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_128)
extern const q15_t WeightsQ15_128[256];
extern const q15_t cos_factorsQ15_128[128];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_512)
extern const q15_t WeightsQ15_512[1024];
extern const q15_t cos_factorsQ15_512[512];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_2048)
extern const q15_t WeightsQ15_2048[4096];
extern const q15_t cos_factorsQ15_2048[2048];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_8192)
extern const q15_t WeightsQ15_8192[16384];
extern const q15_t cos_factorsQ15_8192[8192];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_128)
extern const q31_t WeightsQ31_128[256];
extern const q31_t cos_factorsQ31_128[128];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_512)
extern const q31_t WeightsQ31_512[1024];
extern const q31_t cos_factorsQ31_512[512];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_2048)
extern const q31_t WeightsQ31_2048[4096];
extern const q31_t cos_factorsQ31_2048[2048];
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_8192)
extern const q31_t WeightsQ31_8192[16384];
extern const q31_t cos_factorsQ31_8192[8192];
#endif
#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_ALLOW_TABLES)
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q15)
extern const q15_t armRecipTableQ15[64];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q31)
extern const q31_t armRecipTableQ31[64];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
/* Tables for Fast Math Sine and Cosine */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_F32)
extern const float32_t sinTable_f32[FAST_MATH_TABLE_SIZE + 1];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q31)
extern const q31_t sinTable_q31[FAST_MATH_TABLE_SIZE + 1];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q15)
extern const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
#if defined(ARM_MATH_MVEI)
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE)
extern const q31_t sqrtTable_Q31[256];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
#endif
#if defined(ARM_MATH_MVEI)
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q15_MVE)
extern const q15_t sqrtTable_Q15[256];
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
#endif
#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_TABLES) */
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
extern const float32_t exp_tab[8];
extern const float32_t __logf_lut_f32[8];
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */
#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM))
extern const unsigned char hwLUT[256];
#endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */
#ifdef __cplusplus
}
#endif
#endif /* ARM_COMMON_TABLES_H */

View File

@@ -0,0 +1,283 @@
/**************************************************************************//**
* @file cmsis_compiler.h
* @brief CMSIS compiler generic header file
* @version V5.1.0
* @date 09. October 2018
******************************************************************************/
/*
* Copyright (c) 2009-2018 Arm Limited. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __CMSIS_COMPILER_H
#define __CMSIS_COMPILER_H
#include <stdint.h>
/*
* Arm Compiler 4/5
*/
#if defined ( __CC_ARM )
#include "cmsis_armcc.h"
/*
* Arm Compiler 6.6 LTM (armclang)
*/
#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) && (__ARMCC_VERSION < 6100100)
#include "cmsis_armclang_ltm.h"
/*
* Arm Compiler above 6.10.1 (armclang)
*/
#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100)
#include "cmsis_armclang.h"
/*
* GNU Compiler
*/
#elif defined ( __GNUC__ )
#include "cmsis_gcc.h"
/*
* IAR Compiler
*/
#elif defined ( __ICCARM__ )
#include <cmsis_iccarm.h>
/*
* TI Arm Compiler
*/
#elif defined ( __TI_ARM__ )
#include <cmsis_ccs.h>
#ifndef __ASM
#define __ASM __asm
#endif
#ifndef __INLINE
#define __INLINE inline
#endif
#ifndef __STATIC_INLINE
#define __STATIC_INLINE static inline
#endif
#ifndef __STATIC_FORCEINLINE
#define __STATIC_FORCEINLINE __STATIC_INLINE
#endif
#ifndef __NO_RETURN
#define __NO_RETURN __attribute__((noreturn))
#endif
#ifndef __USED
#define __USED __attribute__((used))
#endif
#ifndef __WEAK
#define __WEAK __attribute__((weak))
#endif
#ifndef __PACKED
#define __PACKED __attribute__((packed))
#endif
#ifndef __PACKED_STRUCT
#define __PACKED_STRUCT struct __attribute__((packed))
#endif
#ifndef __PACKED_UNION
#define __PACKED_UNION union __attribute__((packed))
#endif
#ifndef __UNALIGNED_UINT32 /* deprecated */
struct __attribute__((packed)) T_UINT32 { uint32_t v; };
#define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v)
#endif
#ifndef __UNALIGNED_UINT16_WRITE
__PACKED_STRUCT T_UINT16_WRITE { uint16_t v; };
#define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void*)(addr))->v) = (val))
#endif
#ifndef __UNALIGNED_UINT16_READ
__PACKED_STRUCT T_UINT16_READ { uint16_t v; };
#define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v)
#endif
#ifndef __UNALIGNED_UINT32_WRITE
__PACKED_STRUCT T_UINT32_WRITE { uint32_t v; };
#define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val))
#endif
#ifndef __UNALIGNED_UINT32_READ
__PACKED_STRUCT T_UINT32_READ { uint32_t v; };
#define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v)
#endif
#ifndef __ALIGNED
#define __ALIGNED(x) __attribute__((aligned(x)))
#endif
#ifndef __RESTRICT
#define __RESTRICT __restrict
#endif
#ifndef __COMPILER_BARRIER
#warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored.
#define __COMPILER_BARRIER() (void)0
#endif
/*
* TASKING Compiler
*/
#elif defined ( __TASKING__ )
/*
* The CMSIS functions have been implemented as intrinsics in the compiler.
* Please use "carm -?i" to get an up to date list of all intrinsics,
* Including the CMSIS ones.
*/
#ifndef __ASM
#define __ASM __asm
#endif
#ifndef __INLINE
#define __INLINE inline
#endif
#ifndef __STATIC_INLINE
#define __STATIC_INLINE static inline
#endif
#ifndef __STATIC_FORCEINLINE
#define __STATIC_FORCEINLINE __STATIC_INLINE
#endif
#ifndef __NO_RETURN
#define __NO_RETURN __attribute__((noreturn))
#endif
#ifndef __USED
#define __USED __attribute__((used))
#endif
#ifndef __WEAK
#define __WEAK __attribute__((weak))
#endif
#ifndef __PACKED
#define __PACKED __packed__
#endif
#ifndef __PACKED_STRUCT
#define __PACKED_STRUCT struct __packed__
#endif
#ifndef __PACKED_UNION
#define __PACKED_UNION union __packed__
#endif
#ifndef __UNALIGNED_UINT32 /* deprecated */
struct __packed__ T_UINT32 { uint32_t v; };
#define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v)
#endif
#ifndef __UNALIGNED_UINT16_WRITE
__PACKED_STRUCT T_UINT16_WRITE { uint16_t v; };
#define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val))
#endif
#ifndef __UNALIGNED_UINT16_READ
__PACKED_STRUCT T_UINT16_READ { uint16_t v; };
#define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v)
#endif
#ifndef __UNALIGNED_UINT32_WRITE
__PACKED_STRUCT T_UINT32_WRITE { uint32_t v; };
#define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val))
#endif
#ifndef __UNALIGNED_UINT32_READ
__PACKED_STRUCT T_UINT32_READ { uint32_t v; };
#define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v)
#endif
#ifndef __ALIGNED
#define __ALIGNED(x) __align(x)
#endif
#ifndef __RESTRICT
#warning No compiler specific solution for __RESTRICT. __RESTRICT is ignored.
#define __RESTRICT
#endif
#ifndef __COMPILER_BARRIER
#warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored.
#define __COMPILER_BARRIER() (void)0
#endif
/*
* COSMIC Compiler
*/
#elif defined ( __CSMC__ )
#include <cmsis_csm.h>
#ifndef __ASM
#define __ASM _asm
#endif
#ifndef __INLINE
#define __INLINE inline
#endif
#ifndef __STATIC_INLINE
#define __STATIC_INLINE static inline
#endif
#ifndef __STATIC_FORCEINLINE
#define __STATIC_FORCEINLINE __STATIC_INLINE
#endif
#ifndef __NO_RETURN
// NO RETURN is automatically detected hence no warning here
#define __NO_RETURN
#endif
#ifndef __USED
#warning No compiler specific solution for __USED. __USED is ignored.
#define __USED
#endif
#ifndef __WEAK
#define __WEAK __weak
#endif
#ifndef __PACKED
#define __PACKED @packed
#endif
#ifndef __PACKED_STRUCT
#define __PACKED_STRUCT @packed struct
#endif
#ifndef __PACKED_UNION
#define __PACKED_UNION @packed union
#endif
#ifndef __UNALIGNED_UINT32 /* deprecated */
@packed struct T_UINT32 { uint32_t v; };
#define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v)
#endif
#ifndef __UNALIGNED_UINT16_WRITE
__PACKED_STRUCT T_UINT16_WRITE { uint16_t v; };
#define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val))
#endif
#ifndef __UNALIGNED_UINT16_READ
__PACKED_STRUCT T_UINT16_READ { uint16_t v; };
#define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v)
#endif
#ifndef __UNALIGNED_UINT32_WRITE
__PACKED_STRUCT T_UINT32_WRITE { uint32_t v; };
#define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val))
#endif
#ifndef __UNALIGNED_UINT32_READ
__PACKED_STRUCT T_UINT32_READ { uint32_t v; };
#define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v)
#endif
#ifndef __ALIGNED
#warning No compiler specific solution for __ALIGNED. __ALIGNED is ignored.
#define __ALIGNED(x)
#endif
#ifndef __RESTRICT
#warning No compiler specific solution for __RESTRICT. __RESTRICT is ignored.
#define __RESTRICT
#endif
#ifndef __COMPILER_BARRIER
#warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored.
#define __COMPILER_BARRIER() (void)0
#endif
#else
#error Unknown compiler.
#endif
#endif /* __CMSIS_COMPILER_H */

View File

@@ -0,0 +1,56 @@
/* ----------------------------------------------------------------------
* Project: CMSIS NN Library
* Title: arm_nn_tables.h
* Description: Extern declaration for NN tables
*
* $Date: 17. January 2018
* $Revision: V.1.0.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _ARM_NN_TABLES_H
#define _ARM_NN_TABLES_H
#include "cmsis/CMSIS/DSP/Include/arm_math.h"
/**
* @brief tables for various activation functions
*
*/
extern const q15_t sigmoidTable_q15[256];
extern const q7_t sigmoidTable_q7[256];
extern const q7_t tanhTable_q7[256];
extern const q15_t tanhTable_q15[256];
/**
* @brief 2-way tables for various activation functions
*
* 2-way table, H table for value larger than 1/4
* L table for value smaller than 1/4, H table for remaining
* We have this only for the q15_t version. It does not make
* sense to have it for q7_t type
*/
extern const q15_t sigmoidHTable_q15[192];
extern const q15_t sigmoidLTable_q15[128];
#endif /* ARM_NN_TABLES_H */

View File

@@ -0,0 +1,120 @@
/*
* Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* ----------------------------------------------------------------------
* Project: CMSIS NN Library
* Title: arm_nn_types.h
* Description: Public header file to contain the CMSIS-NN structs for the
* TensorFlowLite micro compliant functions
*
* $Date: April 23, 2020
* $Revision: V.0.5.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
#ifndef _ARM_NN_TYPES_H
#define _ARM_NN_TYPES_H
/** CMSIS-NN object to contain the width and height of a tile */
typedef struct
{
int32_t w; /**< Width */
int32_t h; /**< Height */
} cmsis_nn_tile;
/** CMSIS-NN object used for the function context. */
typedef struct
{
void *buf; /**< Pointer to a buffer needed for the optimization */
int32_t size; /**< Buffer size */
} cmsis_nn_context;
/** CMSIS-NN object to contain the dimensions of the tensors */
typedef struct
{
int32_t n; /**< Generic dimension to contain either the batch size or output channels. Please refer to the function documentation for more information */
int32_t h; /**< Height */
int32_t w; /**< Width */
int32_t c; /**< Input channels */
} cmsis_nn_dims;
/** CMSIS-NN object for the per-channel quantization parameters */
typedef struct
{
int32_t *multiplier; /**< Multiplier values */
int32_t *shift; /**< Shift values */
} cmsis_nn_per_channel_quant_params;
/** CMSIS-NN object for the per-tensor quantization parameters */
typedef struct
{
int32_t multiplier; /**< Multiplier value */
int32_t shift; /**< Shift value */
} cmsis_nn_per_tensor_quant_params;
/** CMSIS-NN object for the quantized Relu activation */
typedef struct
{
int32_t min; /**< Min value used to clamp the result */
int32_t max; /**< Max value used to clamp the result */
} cmsis_nn_activation;
/** CMSIS-NN object for the convolution layer parameters */
typedef struct
{
int32_t input_offset; /**< Zero value for the input tensor */
int32_t output_offset; /**< Zero value for the output tensor */
cmsis_nn_tile stride;
cmsis_nn_tile padding;
cmsis_nn_tile dilation;
cmsis_nn_activation activation;
} cmsis_nn_conv_params;
/** CMSIS-NN object for Depthwise convolution layer parameters */
typedef struct
{
int32_t input_offset; /**< Zero value for the input tensor */
int32_t output_offset; /**< Zero value for the output tensor */
int32_t ch_mult; /**< Channel Multiplier. ch_mult * in_ch = out_ch */
cmsis_nn_tile stride;
cmsis_nn_tile padding;
cmsis_nn_tile dilation;
cmsis_nn_activation activation;
} cmsis_nn_dw_conv_params;
/** CMSIS-NN object for pooling layer parameters */
typedef struct
{
cmsis_nn_tile stride;
cmsis_nn_tile padding;
cmsis_nn_activation activation;
} cmsis_nn_pool_params;
/** CMSIS-NN object for Fully Connected layer parameters */
typedef struct
{
int32_t input_offset; /**< Zero value for the input tensor */
int32_t filter_offset; /**< Zero value for the filter tensor */
int32_t output_offset; /**< Zero value for the output tensor */
cmsis_nn_activation activation;
} cmsis_nn_fc_params;
#endif // _ARM_NN_TYPES_H

View File

@@ -0,0 +1,905 @@
/*
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* ----------------------------------------------------------------------
* Project: CMSIS NN Library
* Title: arm_nnsupportfunctions.h
* Description: Public header file of support functions for CMSIS NN Library
*
* $Date: May 11, 2020
* $Revision: V.4.0.4
*
* Target Processor: Cortex-M CPUs
* -------------------------------------------------------------------- */
#ifndef _ARM_NNSUPPORTFUNCTIONS_H_
#define _ARM_NNSUPPORTFUNCTIONS_H_
#include "cmsis/CMSIS/DSP/Include/arm_math.h"
#include "cmsis/CMSIS/DSP/Include/arm_common_tables.h"
#ifdef __cplusplus
extern "C"
{
#endif
#define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
#define MASK_IF_ZERO(x) (x) == 0 ? ~0 : 0
#define MASK_IF_NON_ZERO(x) (x) != 0 ? ~0 : 0
#define SELECT_USING_MASK(mask, a, b) ((mask) & (a)) ^ (~(mask) & (b))
#define MAX(A,B) ((A) > (B) ? (A) : (B))
#define MIN(A,B) ((A) < (B) ? (A) : (B))
#define CLAMP(x, h, l) MAX(MIN((x), (h)), (l))
/**
* @brief Union for SIMD access of q31/q15/q7 types
*/
union arm_nnword
{
q31_t word;
/**< q31 type */
q15_t half_words[2];
/**< q15 type */
q7_t bytes[4];
/**< q7 type */
};
/**
* @brief Struct for specifying activation function types
*
*/
typedef enum
{
ARM_SIGMOID = 0,
/**< Sigmoid activation function */
ARM_TANH = 1,
/**< Tanh activation function */
} arm_nn_activation_type;
/**
* @defgroup nndata_convert Neural Network Data Conversion Functions
*
* Perform data type conversion in-between neural network operations
*
*/
/**
* @brief Converts the elements of the q7 vector to q15 vector without left-shift
* @param[in] *pSrc points to the q7 input vector
* @param[out] *pDst points to the q15 output vector
* @param[in] blockSize length of the input vector
*
*/
void arm_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize);
/**
* @brief Non-saturating addition of elements of a q7 vector
* @param[in] *input Pointer to the q7 input vector
* @param[out] *output Pointer to the q31 output variable.
* @param[in] block_size length of the input vector
* \par Description:
*
* 2^24 samples can be added without saturating the result.
*
* The equation used for the conversion process is:
*
* <pre>
* sum = input[0] + input[1] + .. + input[block_size -1]
* </pre>
*
* */
void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size);
/**
* @brief Converts the elements of the q7 vector to reordered q15 vector without left-shift
* @param[in] *pSrc points to the q7 input vector
* @param[out] *pDst points to the q15 output vector
* @param[in] blockSize length of the input vector
* @return none.
*
*/
void arm_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
/**
* @brief Converts the elements from a q7 vector to a q15 vector with an added offset
* @param[in] src pointer to the q7 input vector
* @param[out] dst pointer to the q15 output vector
* @param[in] block_size length of the input vector
* @param[in] offset q7 offset to be added to each input vector element.
*
* \par Description:
*
* The equation used for the conversion process is:
*
* <pre>
* dst[n] = (q15_t) src[n] + offset; 0 <= n < block_size.
* </pre>
*
*/
void arm_q7_to_q15_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset);
/**
* @brief Converts the elements of the q7 vector to reordered q15 vector with an added offset
* @param[in] src pointer to the q7 input vector
* @param[out] dst pointer to the q15 output vector
* @param[in] block_size length of the input vector
* @param[in] offset offset to be added to each input vector element.
* @return none.
*
* @details This function does the q7 to q15 expansion with re-ordering of bytes. Re-ordering is a consequence of
* the sign extension intrinsic(DSP extension). The tail (i.e., last (N % 4) elements) retains its original
* order.
*
*/
void arm_q7_to_q15_reordered_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset);
/**
* @brief Converts the elements from a q7 vector and accumulate to a q15 vector
* @param[in] *src points to the q7 input vector
* @param[out] *dst points to the q15 output vector
* @param[in] block_size length of the input vector
*
* \par Description:
*
* The equation used for the conversion process is:
*
* <pre>
* dst[n] += (q15_t) src[n] ; 0 <= n < block_size.
* </pre>
*
*/
void arm_nn_accumulate_q7_to_q15(q15_t *dst, const q7_t *src, uint32_t block_size);
/**
* @brief Depthwise conv on an im2col buffer where the input channel equals output channel.
* @param[in] row pointer to row
* @param[in] col pointer to im2col buffer, always consists of 2 columns.
* @param[in] num_ch number of channels
* @param[in] out_shift pointer to per output channel requantization shift parameter.
* @param[in] out_mult pointer to per output channel requantization multiplier parameter.
* @param[in] out_offset output tensor offset.
* @param[in] activation_min minimum value to clamp the output to. Range : int8
* @param[in] activation_max maximum value to clamp the output to. Range : int8
* @param[in] kernel_size number of elements in one column.
* @param[in] output_bias per output channel bias. Range : int32
* @param[out] out pointer to output
* @return The function returns one of the two
* 1. The incremented output pointer for a successful operation or
* 2. NULL if implementation is not available.
*
* @details Supported framework: TensorFlow Lite micro.
*/
q7_t *arm_nn_depthwise_conv_s8_core(const q7_t *row,
const q15_t *col,
const uint16_t num_ch,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t out_offset,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t kernel_size,
const int32_t *const output_bias,
q7_t *out);
/**
* @brief General Matrix-multiplication function with per-channel requantization.
* @param[in] input_row pointer to row operand
* @param[in] input_col pointer to col operand
* @param[in] output_ch number of rows of input_row
* @param[in] col_batches number of column batches. Range: 1 to 4
* @param[in] output_shift pointer to per output channel requantization shift parameter.
* @param[in] output_mult pointer to per output channel requantization multiplier parameter.
* @param[in] out_offset output tensor offset.
* @param[in] col_offset input tensor(col) offset.
* @param[in] row_offset kernel offset(row). Not used.
* @param[in] out_activation_min minimum value to clamp the output to. Range : int8
* @param[in] out_activation_max maximum value to clamp the output to. Range : int8
* @param[in] row_len number of elements in each row
* @param[in] bias per output channel bias. Range : int32
* @param[in,out] out pointer to output
* @return The function returns one of the two
* 1. The incremented output pointer for a successful operation or
* 2. NULL if implementation is not available.
*
* @details Supported framework: TensorFlow Lite
*/
q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
const q7_t *input_col,
const uint16_t output_ch,
const uint16_t col_batches,
const int32_t *output_shift,
const int32_t *output_mult,
const int32_t out_offset,
const int32_t col_offset,
const int32_t row_offset,
const int16_t out_activation_min,
const int16_t out_activation_max,
const uint16_t row_len,
const int32_t *const bias,
q7_t *out);
/**
* @brief General Matrix-multiplication without requantization for one row & one column
* @param[in] row_elements number of row elements
* @param[in] row_base pointer to row operand
* @param[in] col_base pointer to col operand
* @param[out] sum_col pointer to store sum of column elements
* @param[out] output pointer to store result of multiply-accumulate
* @return The function returns the multiply-accumulated result of the row by column.
*
* @details Pseudo-code
* *output = 0
* sum_col = 0
* for (i = 0; i < row_elements; i++)
* *output += row_base[i] * col_base[i]
* sum_col += col_base[i]
*
*/
arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements,
const int8_t *row_base,
const int8_t *col_base,
int32_t *const sum_col,
int32_t *const output);
/**
* @brief General Matrix-multiplication without requantization for four rows and one column
* @param[in] row_elements number of row elements
* @param[in] offset offset between rows. Can be the same as row_elements.
* For e.g, in a 1x1 conv scenario with stride as 1.
* @param[in] row_base pointer to row operand
* @param[in] col_base pointer to col operand
* @param[out] sum_col pointer to store sum of column elements
* @param[out] output pointer to store result(4 int32's) of multiply-accumulate
* @return The function returns the multiply-accumulated result of the row by column
*
* @details Pseudo-code
* output[0] = 0
* ..
* output[3] = 0
* sum_col = 0
* for (i = 0; i < row_elements; i++)
* output[0] += row_base[i] * col_base[i]
* ..
* output[3] += row_base[i + (row_elements * 3)] * col_base[i]
* sum_col += col_base[i]
*/
arm_status arm_nn_mat_mul_core_4x_s8(const int32_t row_elements,
const int32_t offset,
const int8_t *row_base,
const int8_t *col_base,
int32_t *const sum_col,
int32_t *const output);
/**
* @brief General Matrix-multiplication function with per-channel requantization.
* This function assumes:
* - LHS input matrix NOT transposed (nt)
* - RHS input matrix transposed (t)
*
* @note This operation also performs the broadcast bias addition before the requantization
*
* @param[in] lhs Pointer to the LHS input matrix
* @param[in] rhs Pointer to the RHS input matrix
* @param[in] bias Pointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows)
* @param[out] dst Pointer to the output matrix with "m" rows and "n" columns
* @param[in] dst_multipliers Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to
* the number of output columns (or RHS input rows)
* @param[in] dst_shifts Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to
* the number of output columns (or RHS input rows)
* @param[in] lhs_rows Number of LHS input rows
* @param[in] rhs_rows Number of RHS input rows
* @param[in] rhs_cols Number of LHS/RHS input columns
* @param[in] lhs_offset Offset to be applied to the LHS input value
* @param[in] dst_offset Offset to be applied the output result
* @param[in] activation_min Minimum value to clamp down the output. Range : int8
* @param[in] activation_max Maximum value to clamp up the output. Range : int8
*
* @return The function returns <code>ARM_MATH_SUCCESS</code>
*
*/
arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
const q7_t *rhs,
const q31_t *bias,
q7_t *dst,
const int32_t *dst_multipliers,
const int32_t *dst_shifts,
const int32_t lhs_rows,
const int32_t rhs_rows,
const int32_t rhs_cols,
const int32_t lhs_offset,
const int32_t dst_offset,
const int32_t activation_min,
const int32_t activation_max);
/**
* @brief s8 Vector by Matrix (transposed) multiplication
*
* @param[in] lhs Input left-hand side vector
* @param[in] rhs Input right-hand side matrix (transposed)
* @param[in] bias Input bias
* @param[out] dst Output vector
* @param[in] lhs_offset Offset to be added to the input values of the left-hand side vector. Range: -127 to 128
* @param[in] rhs_offset Offset to be added to the input values of the right-hand side matrix. Range: -127 to 128
* @param[in] dst_offset Offset to be added to the output values. Range: -127 to 128
* @param[in] dst_multiplier Output multiplier
* @param[in] dst_shift Output shift
* @param[in] rhs_cols Number of columns in the right-hand side input matrix
* @param[in] rhs_rows Number of rows in the right-hand side input matrix
* @param[in] activation_min Minimum value to clamp the output to. Range: int8
* @param[in] activation_max Maximum value to clamp the output to. Range: int8
*
* @return The function returns <code>ARM_MATH_SUCCESS</code>
*
*/
arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
const q7_t *rhs,
const q31_t *bias,
q7_t *dst,
const int32_t lhs_offset,
const int32_t rhs_offset,
const int32_t dst_offset,
const int32_t dst_multiplier,
const int32_t dst_shift,
const int32_t rhs_cols,
const int32_t rhs_rows,
const int32_t activation_min,
const int32_t activation_max);
/**
* @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where
* the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs.
*
* @param[in] lhs Input left-hand side matrix
* @param[in] rhs Input right-hand side matrix (transposed)
* @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128
* @param[in] num_ch Number of channels in LHS/RHS
* @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels
* @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels
* @param[in] out_offset Offset to be added to the output values. Range: -127 to 128
* @param[in] activation_min Minimum value to clamp the output to. Range: int8
* @param[in] activation_max Maximum value to clamp the output to. Range: int8
* @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix
* @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels
* @param[in] out Output pointer
*
* @return The function returns one of the two
* - Updated output pointer if an implementaiton is available
* - NULL if no implementation is available.
*
* @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
* for the following.
* - Output shift
* - Output multiplier
* - Output bias
* - rhs
*/
q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
const q7_t *rhs,
const int32_t lhs_offset,
const uint16_t num_ch,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t out_offset,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t row_x_col,
const int32_t *const output_bias,
q7_t *out);
/**
* @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases.
* Dimensions are the same for lhs and rhs.
*
* @param[in] lhs Input left-hand side matrix
* @param[in] rhs Input right-hand side matrix (transposed)
* @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128
* @param[in] num_ch Number of channels in LHS/RHS
* @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels.
* @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels.
* @param[in] out_offset Offset to be added to the output values. Range: -127 to 128
* @param[in] activation_min Minimum value to clamp the output to. Range: int8
* @param[in] activation_max Maximum value to clamp the output to. Range: int8
* @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix
* @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels.
* @param[in] out Output pointer
*
* @return The function returns one of the two
* - Updated output pointer if an implementaiton is available
* - NULL if no implementation is available.
*
* @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
* for the following.
* - Output shift
* - Output multiplier
* - Output bias
* - rhs
*/
q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
const q7_t *rhs,
const int32_t lhs_offset,
const uint16_t num_ch,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t out_offset,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t row_x_col,
const int32_t *const output_bias,
q7_t *out);
/**
@brief Read 2 q15 elements and post increment pointer.
@param[in] in_q15 Pointer to pointer that holds address of input.
@return q31 value
*/
__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia(const q15_t **in_q15)
{
q31_t val;
memcpy(&val, *in_q15, 4);
*in_q15 += 2;
return (val);
}
/**
@brief Read 4 q7 from q7 pointer and post increment pointer.
@param[in] in_q7 Pointer to pointer that holds address of input.
@return q31 value
*/
__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia(const q7_t **in_q7)
{
q31_t val;
memcpy(&val, *in_q7, 4);
*in_q7 += 4;
return (val);
}
/**
@brief Read 2 q15 from q15 pointer.
@param[in] in_q15 pointer to address of input.
@return q31 value
*/
__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2(const q15_t *in_q15)
{
q31_t val;
memcpy(&val, in_q15, 4);
return (val);
}
/**
@brief Read 4 q7 values.
@param[in] in_q7 pointer to address of input.
@return q31 value
*/
__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4(const q7_t *in_q7)
{
q31_t val;
memcpy(&val, in_q7, 4);
return (val);
}
/**
* @brief memset optimized for MVE
* @param[in, out] dst Destination pointer
* @param[in] val Value to set
* @param[in] block_size Number of bytes to copy.
*
*/
__STATIC_FORCEINLINE void arm_memset_q7(q7_t *dst,
const q7_t val,
uint32_t block_size)
{
#if defined(ARM_MATH_MVEI)
__asm volatile (
" vdup.8 q0, %[set_val] \n"
" wlstp.8 lr, %[cnt], 1f \n"
"2: \n"
" vstrb.8 q0, [%[in]], 16 \n"
" letp lr, 2b \n"
"1: \n"
:[in] "+r"(dst)
:[cnt] "r"(block_size), [set_val] "r"(val)
:"q0", "memory", "r14");
#else
memset(dst, val, block_size);
#endif
}
#if defined (ARM_MATH_DSP)
/**
* @brief read and expand one q7 word into two q15 words
*/
__STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t * out1, q31_t * out2)
{
q31_t inA = arm_nn_read_q7x4_ia(&source);
q31_t inAbuf1 = __SXTB16(__ROR((uint32_t)inA, 8));
q31_t inAbuf2 = __SXTB16(inA);
#ifndef ARM_MATH_BIG_ENDIAN
*out2 = (int32_t) (__PKHTB (inAbuf1, inAbuf2, 16));
*out1 = (int32_t) (__PKHBT (inAbuf2, inAbuf1, 16));
#else
*out1 = (int32_t) (__PKHTB(inAbuf1, inAbuf2, 16));
*out2 = (int32_t) (__PKHBT(inAbuf2, inAbuf1, 16));
#endif
return source;
}
/**
* @brief read and expand one q7 word into two q15 words with reordering
*/
__STATIC_FORCEINLINE const q7_t *read_and_pad_reordered(const q7_t *source, q31_t * out1, q31_t * out2)
{
q31_t inA = arm_nn_read_q7x4_ia(&source);
#ifndef ARM_MATH_BIG_ENDIAN
*out2 = __SXTB16(__ROR((uint32_t)inA, 8));
*out1 = __SXTB16(inA);
#else
*out1 = __SXTB16(__ROR((uint32_t)inA, 8));
*out2 = __SXTB16(inA);
#endif
return source;
}
/**
* @brief read and expand one q7 word into two q15 words with reordering and add an offset
*/
__STATIC_FORCEINLINE const q7_t *read_and_pad_reordered_with_offset(const q7_t *source, q31_t * out1, q31_t * out2, q31_t offset)
{
q31_t inA = arm_nn_read_q7x4_ia(&source);
#ifndef ARM_MATH_BIG_ENDIAN
*out2 = __SXTB16(__ROR((uint32_t)inA, 8));
*out1 = __SXTB16(inA);
#else
*out1 = __SXTB16(__ROR((uint32_t)inA, 8));
*out2 = __SXTB16(inA);
#endif
*out1 = __QADD16(*out1,offset);
*out2 = __QADD16(*out2,offset);
return source;
}
#endif
/**
* @defgroup NNBasicMath Basic Math Functions for Neural Network Computation
*
* Basic Math Functions for Neural Network Computation
*
*/
/**
* @brief q7 vector multiplication with variable output shifts
* @param[in] *pSrcA pointer to the first input vector
* @param[in] *pSrcB pointer to the second input vector
* @param[out] *pDst pointer to the output vector
* @param[in] out_shift amount of right-shift for output
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable q15 range [0x8000 0x7FFF] will be saturated.
*/
void arm_nn_mult_q15(
q15_t * pSrcA,
q15_t * pSrcB,
q15_t * pDst,
const uint16_t out_shift,
uint32_t blockSize);
/**
* @brief q7 vector multiplication with variable output shifts
* @param[in] *pSrcA pointer to the first input vector
* @param[in] *pSrcB pointer to the second input vector
* @param[out] *pDst pointer to the output vector
* @param[in] out_shift amount of right-shift for output
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable q7 range [0x80 0x7F] will be saturated.
*/
void arm_nn_mult_q7(
q7_t * pSrcA,
q7_t * pSrcB,
q7_t * pDst,
const uint16_t out_shift,
uint32_t blockSize);
/**
* @brief macro for adding rounding offset
*/
#ifndef ARM_NN_TRUNCATE
#define NN_ROUND(out_shift) ( (0x1u << out_shift) >> 1 )
#else
#define NN_ROUND(out_shift) 0
#endif
// Macros for shortening quantization functions' names and avoid long lines
#define MUL_SAT(a, b) arm_nn_sat_doubling_high_mult((a), (b))
#define MUL_SAT_MVE(a, b) arm_sat_doubling_high_mult_mve_32x4((a), (b))
#define MUL_POW2(a, b) arm_nn_mult_by_power_of_two((a), (b))
#define DIV_POW2(a, b) arm_nn_divide_by_power_of_two((a), (b))
#define DIV_POW2_MVE(a, b) arm_divide_by_power_of_two_mve((a), (b))
#define EXP_ON_NEG(x) arm_nn_exp_on_negative_values((x))
#define ONE_OVER1(x) arm_nn_one_over_one_plus_x_for_x_in_0_1((x))
/**
* @brief Saturating doubling high multiply. Result matches
* NEON instruction VQRDMULH.
* @param[in] m1 Multiplicand
* @param[in] m2 Multiplier
* @return Result of multiplication.
*
*/
__STATIC_FORCEINLINE q31_t arm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
{
q31_t result = 0;
// Rounding offset to add for a right shift of 31
q63_t mult = 1 << 30;
if ((m1 < 0) ^ (m2 < 0))
{
mult = 1 - mult;
}
// Gets resolved as a SMLAL instruction
mult = mult + (q63_t)m1 * m2;
// Utilize all of the upper 32 bits. This is the doubling step
// as well.
result = (int32_t) (mult / (1ll << 31));
if ((m1 == m2) && (m1 == (int32_t)Q31_MIN))
{
result = Q31_MAX;
}
return result;
}
/**
* @brief Rounding divide by power of two.
* @param[in] dividend - Dividend
* @param[in] exponent - Divisor = power(2, exponent)
* Range: [0, 31]
* @return Rounded result of division. Midpoint is rounded away from zero.
*
*/
__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
{
q31_t result = 0;
const q31_t remainder_mask = (1 << exponent) - 1;
int32_t remainder = remainder_mask & dividend;
// Basic division
result = dividend >> exponent;
// Adjust 'result' for rounding (mid point away from zero)
q31_t threshold = remainder_mask >> 1;
if (result < 0)
{
threshold++;
}
if (remainder > threshold)
{
result++;
}
return result;
}
/**
* @brief Requantize a given value.
* @param[in] val Value to be requantized
* @param[in] multiplier multiplier
* @param[in] shift left or right shift for 'val * multiplier'
*
* @return Returns (val * multiplier)/(2 ^ shift)
*
*/
__STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
{
return arm_nn_divide_by_power_of_two(arm_nn_sat_doubling_high_mult(val * (1 << LEFT_SHIFT(shift)), multiplier),
RIGHT_SHIFT(shift));
}
/**
* @brief memcpy optimized for MVE
* @param[in, out] dst Destination pointer
* @param[in] src Source pointer.
* @param[in] block_size Number of bytes to copy.
*
*/
__STATIC_FORCEINLINE void arm_memcpy_q7(q7_t *__RESTRICT dst,
const q7_t *__RESTRICT src,
uint32_t block_size)
{
#if defined(ARM_MATH_MVEI)
__asm volatile (
" wlstp.8 lr, %[cnt], 1f \n"
"2: \n"
" vldrb.8 q0, [%[in]], 16 \n"
" vstrb.8 q0, [%[out]], 16 \n"
" letp lr, 2b \n"
"1: \n"
:[in] "+r"(src)
,[out] "+r"(dst)
:[cnt] "r"(block_size)
:"q0", "memory", "r14");
#else
memcpy(dst, src, block_size);
#endif
}
#if defined(ARM_MATH_MVEI)
/**
* @brief Vector saturating doubling high multiply returning high half.
* @param[in] m1 Multiplicand
* @param[in] m2 Multiplier
* @return Result of multiplication.
*
*/
__STATIC_FORCEINLINE int32x4_t arm_sat_doubling_high_mult_mve(const int32x4_t m1, const q31_t m2)
{
return vqrdmulhq_n_s32(m1, m2);
}
/**
* @brief Vector rounding divide by power of two.
* @param[in] dividend - Dividend vector
* @param[in] exponent - Divisor = power(2, exponent)
* Range: [0, 31]
* @return Rounded result of division. Midpoint is rounded away from zero.
*
*/
__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve(const int32x4_t dividend, const q31_t exponent)
{
const int32x4_t shift = vdupq_n_s32(-exponent);
const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31);
const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup);
return vrshlq_s32(fixed_up_dividend, shift);
}
/**
* @brief Requantize a given vector.
* @param[in] val Vector to be requantized
* @param[in] multiplier multiplier
* @param[in] shift shift
*
* @return Returns (val * multiplier)/(2 ^ shift)
*
*/
__STATIC_FORCEINLINE int32x4_t arm_requantize_mve(const int32x4_t val, const q31_t multiplier, const q31_t shift)
{
return arm_divide_by_power_of_two_mve(
arm_sat_doubling_high_mult_mve(vshlq_s32(val, vdupq_n_s32(LEFT_SHIFT(shift))), multiplier),
RIGHT_SHIFT(shift));
}
__STATIC_FORCEINLINE int32x4_t arm_sat_doubling_high_mult_mve_32x4(const int32x4_t m1, const int32x4_t m2)
{
return vqrdmulhq_s32(m1, m2);
}
__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve_32x4(const int32x4_t dividend, const int32x4_t exponent)
{
const int32x4_t shift = -exponent;
const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31);
const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup);
return vrshlq_s32(fixed_up_dividend, shift);
}
__STATIC_FORCEINLINE int32x4_t arm_requantize_mve_32x4(const int32x4_t val, const int32x4_t multiplier, const int32x4_t shift)
{
const int32x4_t zz = vdupq_n_s32(0);
const mve_pred16_t p = vcmpgtq_n_s32(shift, 0);
const int32x4_t left_shift = vpselq_s32(shift, zz, p);
const int32x4_t right_shift = -vpselq_s32(zz, shift, p);
return arm_divide_by_power_of_two_mve_32x4(arm_sat_doubling_high_mult_mve_32x4(vshlq_s32(val, left_shift), multiplier), right_shift);
}
#endif
// @note The following functions are used only for softmax layer, scaled bits = 5 assumed
__STATIC_FORCEINLINE int32_t arm_nn_exp_on_negative_values(int32_t val)
{
int32_t mask = 0;
int32_t shift = 24;
const int32_t val_mod_minus_quarter = (val & ((1 << shift) - 1)) - (1 << shift);
const int32_t remainder = val_mod_minus_quarter - val;
const int32_t x = (val_mod_minus_quarter << 5) + (1 << 28);
const int32_t x2 = MUL_SAT(x, x);
int32_t result = 1895147668 + MUL_SAT(1895147668, x +
DIV_POW2(MUL_SAT(DIV_POW2(MUL_SAT(x2, x2), 2) + MUL_SAT(x2, x), 715827883) + x2, 1));
#define SELECT_IF_NON_ZERO(x) \
{ \
mask = MASK_IF_NON_ZERO(remainder & (1 << shift++)); \
result = SELECT_USING_MASK(mask, MUL_SAT(result, x), result); \
}
SELECT_IF_NON_ZERO(1672461947)
SELECT_IF_NON_ZERO(1302514674)
SELECT_IF_NON_ZERO(790015084)
SELECT_IF_NON_ZERO(290630308)
SELECT_IF_NON_ZERO(39332535)
SELECT_IF_NON_ZERO(720401)
SELECT_IF_NON_ZERO(242)
#undef SELECT_IF_NON_ZERO
mask = MASK_IF_ZERO(val);
return SELECT_USING_MASK(mask, Q31_MAX, result);
}
__STATIC_FORCEINLINE q31_t arm_nn_mult_by_power_of_two(const int32_t val, const int32_t exp)
{
const int32_t thresh = ((1 << (31 - exp)) - 1);
int32_t result = val << exp;
result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), Q31_MAX, result);
result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), Q31_MIN, result);
return result;
}
__STATIC_FORCEINLINE int32_t arm_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val)
{
const int64_t sum = (int64_t)val + (int64_t)Q31_MAX;
const int32_t half_denominator = (int32_t)((sum + (sum >= 0 ? 1 : -1)) / 2L);
int32_t x = 1515870810 + MUL_SAT(half_denominator, -1010580540);
const int32_t shift = (1 << 29);
x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
return MUL_POW2(x, 1);
}
#ifdef __cplusplus
}
#endif
#endif