tflite micro integrate repo
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// The magic number in the template parameter is the maximum number of ops that
|
||||
// can be added to AllOpsResolver. It can be increased if needed. And most
|
||||
// applications that care about the memory footprint will want to directly use
|
||||
// MicroMutableOpResolver and have an application specific template parameter.
|
||||
// The examples directory has sample code for this.
|
||||
class AllOpsResolver : public MicroMutableOpResolver<128> {
|
||||
public:
|
||||
AllOpsResolver();
|
||||
|
||||
private:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
@@ -0,0 +1,22 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
|
||||
#define TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
|
||||
|
||||
extern const unsigned char g_keyword_scrambled_model_data[];
|
||||
extern const unsigned int g_keyword_scrambled_model_data_length;
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
|
@@ -0,0 +1,32 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
|
||||
#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
|
||||
|
||||
// C++ will automatically create class-specific delete operators for virtual
|
||||
// objects, which by default call the global delete function. For embedded
|
||||
// applications we want to avoid this, and won't be calling new/delete on these
|
||||
// objects, so we need to override the default implementation with one that does
|
||||
// nothing to avoid linking in ::delete().
|
||||
// This macro needs to be included in all subclasses of a virtual base class in
|
||||
// the private section.
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
#define TF_LITE_REMOVE_VIRTUAL_DELETE \
|
||||
void operator delete(void* p) {}
|
||||
#else
|
||||
#define TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
#endif
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
|
@@ -0,0 +1,23 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
|
||||
// This function should be implemented by each target platform, and provide a
|
||||
// way for strings to be output to some text stream. For more information, see
|
||||
// tensorflow/lite/micro/debug_log.cc.
|
||||
extern "C" void DebugLog(const char* s);
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
@@ -0,0 +1,34 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Provides an interface to take an action based on the output from the person
|
||||
// detection model.
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
// Called every time the results of a person detection run are available. The
|
||||
// `person_score` has the numerical confidence that the captured image contains
|
||||
// a person, and `no_person_score` has the numerical confidence that the image
|
||||
// does not contain a person. Typically if person_score > no person score, the
|
||||
// image is considered to contain a person. This threshold may be adjusted for
|
||||
// particular applications.
|
||||
void RespondToDetection(tflite::ErrorReporter* error_reporter,
|
||||
int8_t person_score, int8_t no_person_score);
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
|
@@ -0,0 +1,40 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
// This is an abstraction around an image source like a camera, and is
|
||||
// expected to return 8-bit sample data. The assumption is that this will be
|
||||
// called in a low duty-cycle fashion in a low-power application. In these
|
||||
// cases, the imaging sensor need not be run in a streaming mode, but rather can
|
||||
// be idled in a relatively low-power mode between calls to GetImage(). The
|
||||
// assumption is that the overhead and time of bringing the low-power sensor out
|
||||
// of this standby mode is commensurate with the expected duty cycle of the
|
||||
// application. The underlying sensor may actually be put into a streaming
|
||||
// configuration, but the image buffer provided to GetImage should not be
|
||||
// overwritten by the driver code until the next call to GetImage();
|
||||
//
|
||||
// The reference implementation can have no platform-specific dependencies, so
|
||||
// it just returns a static image. For real applications, you should
|
||||
// ensure there's a specialized implementation that accesses hardware APIs.
|
||||
TfLiteStatus GetImage(tflite::ErrorReporter* error_reporter, int image_width,
|
||||
int image_height, int channels, int8_t* image_data,
|
||||
uint8_t * hardware_input);
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
|
@@ -0,0 +1,30 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
// Initializes all data needed for the example. The name is important, and needs
|
||||
// to be setup() for Arduino compatibility.
|
||||
extern "C" void person_detect_init();
|
||||
|
||||
// Runs one iteration of data gathering and inference. This should be called
|
||||
// repeatedly from the application code. The name needs to be loop() for Arduino
|
||||
// compatibility.
|
||||
extern "C" int person_detect(uint8_t * hardware_input);
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
|
@@ -0,0 +1,35 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
|
||||
|
||||
// Keeping these as constant expressions allow us to allocate fixed-sized arrays
|
||||
// on the stack for our working memory.
|
||||
|
||||
// All of these values are derived from the values used during model training,
|
||||
// if you change your model you'll need to update these constants.
|
||||
constexpr int kNumCols = 96;
|
||||
constexpr int kNumRows = 96;
|
||||
constexpr int kNumChannels = 1;
|
||||
|
||||
constexpr int kMaxImageSize = kNumCols * kNumRows * kNumChannels;
|
||||
|
||||
constexpr int kCategoryCount = 2;
|
||||
constexpr int kPersonIndex = 1;
|
||||
constexpr int kNotAPersonIndex = 0;
|
||||
extern const char* kCategoryLabels[kCategoryCount];
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
|
@@ -0,0 +1,27 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// This is a standard TensorFlow Lite model file that has been converted into a
|
||||
// C data array, so it can be easily compiled into a binary for devices that
|
||||
// don't have a file system. It was created using the command:
|
||||
// xxd -i person_detect.tflite > person_detect_model_data.cc
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
|
||||
#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
|
||||
|
||||
extern const unsigned char g_person_detect_model_data[];
|
||||
extern const int g_person_detect_model_data_len;
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
|
@@ -0,0 +1,57 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/max.h"
|
||||
#include "tensorflow/lite/kernels/internal/min.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Returns the floating point value for a fused activation:
|
||||
inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
|
||||
switch (act) {
|
||||
case kTfLiteActNone:
|
||||
return a;
|
||||
case kTfLiteActRelu:
|
||||
return TfLiteMax(0.0f, a);
|
||||
case kTfLiteActReluN1To1:
|
||||
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
|
||||
case kTfLiteActRelu6:
|
||||
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
|
||||
case kTfLiteActTanh:
|
||||
return std::tanh(a);
|
||||
case kTfLiteActSignBit:
|
||||
return std::signbit(a);
|
||||
case kTfLiteActSigmoid:
|
||||
return 1.0f / (1.0f + std::exp(-a));
|
||||
}
|
||||
return 0.0f; // To indicate an unsupported activation (i.e. when a new fused
|
||||
// activation is added to the enum and not handled here).
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
@@ -0,0 +1,83 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
|
||||
// (init, prepare, invoke). All internal allocations are handled by this class.
|
||||
// Simply pass in the registration, list of required tensors, inputs array,
|
||||
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
|
||||
// walk the kernl and outputs will be ready on the the TfLiteTensor output
|
||||
// provided during construction.
|
||||
class KernelRunner {
|
||||
public:
|
||||
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
|
||||
int tensors_size, TfLiteIntArray* inputs,
|
||||
TfLiteIntArray* outputs, void* builtin_data,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
|
||||
// exceptions will be reported through the error_reporter and returned as a
|
||||
// status code here.
|
||||
TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
|
||||
|
||||
// Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
|
||||
// After successful invoke, results will be available in the output tensor as
|
||||
// passed into the constructor of this class.
|
||||
TfLiteStatus Invoke();
|
||||
|
||||
protected:
|
||||
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
|
||||
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
|
||||
size_t bytes,
|
||||
int* buffer_index);
|
||||
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
|
||||
static void ReportOpError(struct TfLiteContext* context, const char* format,
|
||||
...);
|
||||
|
||||
private:
|
||||
static constexpr int kNumScratchBuffers_ = 5;
|
||||
|
||||
static constexpr int kKernelRunnerBufferSize_ = 10000;
|
||||
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
|
||||
|
||||
SimpleMemoryAllocator* allocator_ = nullptr;
|
||||
const TfLiteRegistration& registration_;
|
||||
TfLiteTensor* tensors_ = nullptr;
|
||||
ErrorReporter* error_reporter_ = nullptr;
|
||||
|
||||
TfLiteContext context_ = {};
|
||||
TfLiteNode node_ = {};
|
||||
|
||||
int scratch_buffer_count_ = 0;
|
||||
uint8_t* scratch_buffers_[kNumScratchBuffers_];
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
@@ -0,0 +1,83 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Returns a mutable tensor for a given input index. is_variable must be checked
|
||||
// during prepare when the full TfLiteTensor is available.
|
||||
inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->inputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given input index in a node.
|
||||
inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return GetMutableEvalInput(context, node, index);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given output index in a node.
|
||||
inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->outputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
T* GetTensorData(TfLiteEvalTensor* tensor) {
|
||||
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
|
||||
}
|
||||
|
||||
// Returns const data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
const T* GetTensorData(const TfLiteEvalTensor* tensor) {
|
||||
TFLITE_DCHECK(tensor != nullptr);
|
||||
return reinterpret_cast<const T*>(tensor->data.raw);
|
||||
}
|
||||
|
||||
// Returns the shape of a TfLiteEvalTensor struct.
|
||||
inline const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
|
||||
if (tensor == nullptr) {
|
||||
return RuntimeShape();
|
||||
}
|
||||
TfLiteIntArray* dims = tensor->dims;
|
||||
const int dims_size = dims->size;
|
||||
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
|
||||
return RuntimeShape(dims_size, dims_data);
|
||||
}
|
||||
|
||||
// Return true if the given tensors have the same shape.
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2);
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
@@ -0,0 +1,92 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Forward declaration of all micro op kernel registration methods. These
|
||||
// registrations are included with the standard `BuiltinOpResolver`.
|
||||
//
|
||||
// This header is particularly useful in cases where only a subset of ops are
|
||||
// needed. In such cases, the client can selectively add only the registrations
|
||||
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
|
||||
// registration in turn allows the linker to strip unused kernels.
|
||||
|
||||
TfLiteRegistration Register_ABS();
|
||||
TfLiteRegistration Register_ADD();
|
||||
TfLiteRegistration Register_ARG_MAX();
|
||||
TfLiteRegistration Register_ARG_MIN();
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration Register_CEIL();
|
||||
// TODO(b/160234179): Change custom OPs to also return by value.
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER();
|
||||
TfLiteRegistration Register_CONV_2D();
|
||||
TfLiteRegistration Register_CONCATENATION();
|
||||
TfLiteRegistration Register_COS();
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration Register_DEQUANTIZE();
|
||||
TfLiteRegistration Register_EQUAL();
|
||||
TfLiteRegistration Register_FLOOR();
|
||||
TfLiteRegistration Register_FULLY_CONNECTED();
|
||||
TfLiteRegistration Register_GREATER();
|
||||
TfLiteRegistration Register_GREATER_EQUAL();
|
||||
TfLiteRegistration Register_HARD_SWISH();
|
||||
TfLiteRegistration Register_LESS();
|
||||
TfLiteRegistration Register_LESS_EQUAL();
|
||||
TfLiteRegistration Register_LOG();
|
||||
TfLiteRegistration Register_LOGICAL_AND();
|
||||
TfLiteRegistration Register_LOGICAL_NOT();
|
||||
TfLiteRegistration Register_LOGICAL_OR();
|
||||
TfLiteRegistration Register_LOGISTIC();
|
||||
TfLiteRegistration Register_MAXIMUM();
|
||||
TfLiteRegistration Register_MAX_POOL_2D();
|
||||
TfLiteRegistration Register_MEAN();
|
||||
TfLiteRegistration Register_MINIMUM();
|
||||
TfLiteRegistration Register_MUL();
|
||||
TfLiteRegistration Register_NEG();
|
||||
TfLiteRegistration Register_NOT_EQUAL();
|
||||
TfLiteRegistration Register_PACK();
|
||||
TfLiteRegistration Register_PAD();
|
||||
TfLiteRegistration Register_PADV2();
|
||||
TfLiteRegistration Register_PRELU();
|
||||
TfLiteRegistration Register_QUANTIZE();
|
||||
TfLiteRegistration Register_RELU();
|
||||
TfLiteRegistration Register_RELU6();
|
||||
TfLiteRegistration Register_RESHAPE();
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration Register_ROUND();
|
||||
TfLiteRegistration Register_RSQRT();
|
||||
TfLiteRegistration Register_SIN();
|
||||
TfLiteRegistration Register_SOFTMAX();
|
||||
TfLiteRegistration Register_SPLIT();
|
||||
TfLiteRegistration Register_SQRT();
|
||||
TfLiteRegistration Register_SQUARE();
|
||||
TfLiteRegistration Register_STRIDED_SLICE();
|
||||
TfLiteRegistration Register_SUB();
|
||||
TfLiteRegistration Register_SVDF();
|
||||
TfLiteRegistration Register_UNPACK();
|
||||
TfLiteRegistration Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration Register_TANH();
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
|
@@ -0,0 +1,37 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Same as gtl::Greater but defined here to reduce dependencies and
|
||||
// binary size for micro environment.
|
||||
struct Greater {
|
||||
template <typename T>
|
||||
bool operator()(const T& x, const T& y) const {
|
||||
return x > y;
|
||||
}
|
||||
};
|
||||
|
||||
struct Less {
|
||||
template <typename T>
|
||||
bool operator()(const T& x, const T& y) const {
|
||||
return x < y;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
@@ -0,0 +1,59 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Returns the next pointer address aligned to the given alignment.
|
||||
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment);
|
||||
|
||||
// Returns the previous pointer address aligned to the given alignment.
|
||||
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
|
||||
|
||||
// Returns an increased size that's a multiple of alignment.
|
||||
size_t AlignSizeUp(size_t size, size_t alignment);
|
||||
|
||||
// Returns size in bytes for a given TfLiteType.
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
|
||||
|
||||
// How many bytes are needed to hold a tensor's contents.
|
||||
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
|
||||
size_t* bytes, size_t* type_size,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
|
||||
// returned in out_bytes.
|
||||
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
|
||||
size_t* out_bytes);
|
||||
|
||||
// Deduce output dimensions from input and allocate given size.
|
||||
// Useful for operators with two inputs where the largest input should equal the
|
||||
// output dimension.
|
||||
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
@@ -0,0 +1,163 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
constexpr int kOnlinePlannedBuffer = -1;
|
||||
|
||||
// A memory planner that uses a greedy algorithm to arrange buffers in memory
|
||||
// to minimize the overall arena size needed.
|
||||
//
|
||||
// The algorithm works like this:
|
||||
// - The client enters the buffer information through AddBuffer().
|
||||
// - When a function like GetOffsetForBuffer() is called, the
|
||||
// CalculateOffsetsIfNeeded() method is invoked.
|
||||
// - If an up to date plan is not already present, one will be calculated.
|
||||
// - The buffers are sorted in descending order of size.
|
||||
// - The largest buffer is placed at offset zero.
|
||||
// - The rest of the buffers are looped through in descending size order.
|
||||
// - The other buffers that need to be in memory at the same time are found.
|
||||
// - The first gap between simultaneously active buffers that the current
|
||||
// buffer fits into will be used.
|
||||
// - If no large-enough gap is found, the current buffer is placed after the
|
||||
// last buffer that's simultaneously active.
|
||||
// - This continues until all buffers are placed, and the offsets stored.
|
||||
//
|
||||
// This is not guaranteed to produce the best placement, since that's an
|
||||
// NP-Complete problem, but in practice it should produce one that's decent.
|
||||
class GreedyMemoryPlanner : public MemoryPlanner {
|
||||
public:
|
||||
// You need to pass in an area of memory to be used for planning. This memory
|
||||
// needs to have a lifetime as long as the planner, but isn't owned by this
|
||||
// object, so management should be handled by the client. This is so it can be
|
||||
// stack or globally allocated if necessary on devices without dynamic memory
|
||||
// allocation. How many buffers can be planned for will depend on the size of
|
||||
// this scratch memory, so you should enlarge it if you see an error when
|
||||
// calling AddBuffer(). The memory can be reused once you're done with the
|
||||
// planner, as long as you copy the calculated offsets to another location.
|
||||
// Each buffer requires about 36 bytes of scratch.
|
||||
GreedyMemoryPlanner(unsigned char* scratch_buffer, int scratch_buffer_size);
|
||||
~GreedyMemoryPlanner() override;
|
||||
|
||||
// Record details of a buffer we want to place.
|
||||
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||
int first_time_used, int last_time_used) override;
|
||||
|
||||
// Record details of an offline planned buffer offset we want to place.
|
||||
// offline_offset is the buffer offset from the start of the arena.
|
||||
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||
int first_time_used, int last_time_used,
|
||||
int offline_offset);
|
||||
|
||||
// Returns the high-water mark of used memory. This is the minimum size of a
|
||||
// memory arena you'd need to allocate to hold these buffers.
|
||||
size_t GetMaximumMemorySize() override;
|
||||
|
||||
// How many buffers have been recorded.
|
||||
int GetBufferCount() override;
|
||||
|
||||
// Where a given buffer should be placed in the memory arena.
|
||||
// This information is stored in the memory arena itself, so once the arena
|
||||
// is used for inference, it will be overwritten.
|
||||
TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter,
|
||||
int buffer_index, int* offset) override;
|
||||
|
||||
// Prints an ascii-art diagram of the buffer layout plan.
|
||||
void PrintMemoryPlan(ErrorReporter* error_reporter);
|
||||
|
||||
// Debug method to check whether any buffer allocations are overlapping. This
|
||||
// is an O(N^2) complexity operation, so only use for testing.
|
||||
bool DoAnyBuffersOverlap(ErrorReporter* error_reporter);
|
||||
|
||||
// Used to store a list of buffers ordered by their offset.
|
||||
struct ListEntry {
|
||||
int offset;
|
||||
int requirements_index;
|
||||
int next_entry_index;
|
||||
};
|
||||
|
||||
// Number of bytes required in order to plan a buffer.
|
||||
static size_t per_buffer_size() {
|
||||
const int per_buffer_size =
|
||||
sizeof(BufferRequirements) + // requirements_
|
||||
sizeof(int) + // buffer_sizes_sorted_
|
||||
sizeof(int) + // buffer_ids_sorted_
|
||||
sizeof(ListEntry) + // buffers_sorted_by_offset_
|
||||
sizeof(int); // buffer_offsets_;
|
||||
return per_buffer_size;
|
||||
}
|
||||
|
||||
private:
|
||||
// Whether a buffer is active in a given time range.
|
||||
bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
|
||||
const int last_time_used) const;
|
||||
|
||||
// Walks the list to return the next buffer that is active in a given time
|
||||
// range, or a null pointer if there are none.
|
||||
ListEntry* NextSimultaneouslyActiveBuffer(const ListEntry* start,
|
||||
const int first_time_used,
|
||||
const int last_time_used);
|
||||
|
||||
// If there isn't an up to date plan, calculate a new one.
|
||||
void CalculateOffsetsIfNeeded();
|
||||
|
||||
// How many buffers we can plan for, based on the arena size we're given in
|
||||
// the constructor.
|
||||
int max_buffer_count_;
|
||||
|
||||
// The number of buffers added so far.
|
||||
int buffer_count_;
|
||||
|
||||
// Records the client-provided information about each buffer.
|
||||
struct BufferRequirements {
|
||||
int size;
|
||||
int offline_offset;
|
||||
int first_time_used;
|
||||
int last_time_used;
|
||||
};
|
||||
|
||||
// Working arrays used during the layout algorithm.
|
||||
BufferRequirements* requirements_;
|
||||
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
|
||||
// {
|
||||
// offline planned buffers,
|
||||
// online planned buffers sorted by size
|
||||
// }
|
||||
int* buffer_sizes_sorted_;
|
||||
int* buffer_ids_sorted_;
|
||||
ListEntry* buffers_sorted_by_offset_;
|
||||
int next_free_entry_; // Index of the next free entry of
|
||||
// buffers_sorted_by_offset_
|
||||
int first_entry_index_; // Index of the first entry (smallest offset) of
|
||||
// buffers_sorted_by_offset_
|
||||
|
||||
// Stores the outcome of the plan, the location of each buffer in the arena.
|
||||
int* buffer_offsets_;
|
||||
|
||||
// Whether buffers have been added since the last plan was calculated.
|
||||
bool need_to_calculate_offsets_;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
|
@@ -0,0 +1,50 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// The simplest possible memory planner that just lays out all buffers at
|
||||
// increasing offsets without trying to reuse memory.
|
||||
class LinearMemoryPlanner : public MemoryPlanner {
|
||||
public:
|
||||
LinearMemoryPlanner();
|
||||
~LinearMemoryPlanner() override;
|
||||
|
||||
TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter, int size,
|
||||
int first_time_used, int last_time_used) override;
|
||||
|
||||
size_t GetMaximumMemorySize() override;
|
||||
int GetBufferCount() override;
|
||||
TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
|
||||
int buffer_index, int* offset) override;
|
||||
|
||||
private:
|
||||
static constexpr int kMaxBufferCount = 1024;
|
||||
size_t buffer_offsets_[kMaxBufferCount];
|
||||
int current_buffer_count_;
|
||||
size_t next_free_offset_;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
|
@@ -0,0 +1,71 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Interface class for planning the layout of memory buffers during the
|
||||
// execution of a graph.
|
||||
// It's designed to be used by a client that iterates in any order through the
|
||||
// buffers it wants to lay out, and then calls the getter functions for
|
||||
// information about the calculated layout. For example:
|
||||
//
|
||||
// SomeMemoryPlanner planner;
|
||||
// planner.AddBuffer(reporter, 100, 0, 1); // Buffer 0
|
||||
// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 1
|
||||
// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 2
|
||||
//
|
||||
// int offset0;
|
||||
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 0, &offset0));
|
||||
// int offset1;
|
||||
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 1, &offset1));
|
||||
// int offset2;
|
||||
// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 2, &offset2));
|
||||
// const int arena_size_needed = planner.GetMaximumMemorySize();
|
||||
//
|
||||
// The goal is for applications to be able to experiment with different layout
|
||||
// strategies without changing their client code, by swapping out classes that
|
||||
// implement this interface.=
|
||||
class MemoryPlanner {
|
||||
public:
|
||||
MemoryPlanner() {}
|
||||
virtual ~MemoryPlanner() {}
|
||||
|
||||
// Pass information about a buffer's size and lifetime to the layout
|
||||
// algorithm. The order this is called implicitly assigns an index to the
|
||||
// result, so the buffer information that's passed into the N-th call of
|
||||
// this method will be used as the buffer_index argument to
|
||||
// GetOffsetForBuffer().
|
||||
virtual TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter,
|
||||
int size, int first_time_used,
|
||||
int last_time_used) = 0;
|
||||
|
||||
// The largest contiguous block of memory that's needed to hold the layout.
|
||||
virtual size_t GetMaximumMemorySize() = 0;
|
||||
// How many buffers have been added to the planner.
|
||||
virtual int GetBufferCount() = 0;
|
||||
// Calculated layout offset for the N-th buffer added to the planner.
|
||||
virtual TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
|
||||
int buffer_index, int* offset) = 0;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
|
@@ -0,0 +1,250 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
b/160894903
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Namespace used for unittests.
|
||||
namespace internal {
|
||||
|
||||
// Sets up all of the data structure members for a TfLiteTensor based on the
|
||||
// contents of a serialized tensor in the flatbuffer.
|
||||
// TODO(b/160894903): Once all kernels have been updated to the new
|
||||
// TfLiteEvalTensor API - drop the allocate_temp flag. This enables internal
|
||||
// flatbuffer quantization or dimension allocations to take place in either the
|
||||
// temp or tail section of the arena.
|
||||
TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
|
||||
SimpleMemoryAllocator* allocator, bool allocate_temp,
|
||||
const tflite::Tensor& flatbuffer_tensor,
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
|
||||
ErrorReporter* error_reporter, TfLiteTensor* result);
|
||||
|
||||
// A handle tracking scratch buffer allocation. This handle is created by
|
||||
// `RequestScratchBufferInArena`. `data` field is populated in
|
||||
// `FinishModelAllocation` after static memory planning.
|
||||
// TODO(b/150257460) As a future optimization, this struct could be replaced by
|
||||
// a union, since once `data` is populated, `bytes` and `node_idx` is not
|
||||
// needed.
|
||||
typedef struct {
|
||||
// Pointer to the scratch buffer.
|
||||
uint8_t* data;
|
||||
// Number of bytes required by the buffer. The actual allocated size might be
|
||||
// greater than `bytes` due to buffer alignment.
|
||||
size_t bytes;
|
||||
// Node where the buffer is allocated for. This provides useful information to
|
||||
// determine the lifetime of the buffer. In AllocationInfo, this buffer will
|
||||
// have `before` = node_idx and `after` = node_idx.
|
||||
int node_idx;
|
||||
} ScratchBufferHandle;
|
||||
} // namespace internal
|
||||
|
||||
typedef struct {
|
||||
TfLiteNode node;
|
||||
const TfLiteRegistration* registration;
|
||||
} NodeAndRegistration;
|
||||
|
||||
// Allocator responsible for allocating memory for all intermediate tensors
|
||||
// necessary to invoke a model.
|
||||
//
|
||||
// The lifetime of the model, tensor arena and error reporter must be at
|
||||
// least as long as that of the allocator object, since the allocator needs
|
||||
// them to be accessible during its entire lifetime.
|
||||
//
|
||||
// The MicroAllocator simply plans out additional allocations that are required
|
||||
// to standup a model for inference in TF Micro. This class currently relies on
|
||||
// an additional allocator - SimpleMemoryAllocator - for all allocations from an
|
||||
// arena. These allocations are divided into head (non-persistent) and tail
|
||||
// (persistent) regions:
|
||||
//
|
||||
// Memory layout to help understand how it works
|
||||
// This information could change in the future version.
|
||||
// ************** .memory_allocator->GetBuffer()
|
||||
// Tensors/Scratch buffers (head)
|
||||
// ************** .head_watermark
|
||||
// unused memory
|
||||
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
|
||||
// - ->GetDataSize()
|
||||
// persistent area (tail)
|
||||
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
|
||||
class MicroAllocator {
|
||||
public:
|
||||
// Creates a MicroAllocator instance from a given tensor arena. This arena
|
||||
// will be managed by the created instance.
|
||||
// Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
|
||||
// bytes aligned, otherwise some head room will be wasted.
|
||||
// TODO(b/157615197): Cleanup constructor + factory usage.
|
||||
static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
|
||||
// intance. This allocator instance will use the SimpleMemoryAllocator
|
||||
// instance to manage allocations internally.
|
||||
static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Begin allocating internal resources required for model inference.
|
||||
// This method will run through the flatbuffer data supplied in the model to
|
||||
// properly allocate tensor, node, and op registration data. This method is
|
||||
// expected to be followed with a call to FinishModelAllocation() before
|
||||
// resuming allocation with another model. All persistent tensor buffers are
|
||||
// stored in the out-param eval_tensors. This value is allocated from the
|
||||
// persistent memory arena and will be used to host runtime tensor buffers.
|
||||
TfLiteStatus StartModelAllocation(
|
||||
const Model* model, const MicroOpResolver& op_resolver,
|
||||
NodeAndRegistration** node_and_registrations,
|
||||
TfLiteEvalTensor** eval_tensors);
|
||||
|
||||
// Finish allocating internal resources required for model inference.
|
||||
// This method will plan non-persistent buffers and commit a memory plan to
|
||||
// the 'head' section of the memory arena. All variable tensor data will also
|
||||
// be allocated. This method should be called after assigning model resources
|
||||
// in StartModelAllocation(). The eval_tensors pointer should be the value
|
||||
// passed into this class during StartModelAllocation().
|
||||
TfLiteStatus FinishModelAllocation(const Model* model,
|
||||
TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
// Allocates a TfLiteTensor struct and populates the returned value with
|
||||
// properties from the model flatbuffer. This struct is allocated from
|
||||
// persistent arena memory is only guaranteed for the lifetime of the
|
||||
// application. The eval_tensors pointer should be the value passed into this
|
||||
// class during StartModelAllocation() and contains the source-of-truth for
|
||||
// buffers.
|
||||
virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
|
||||
|
||||
// Allocates a TfLiteTensor struct and populates the returned value with
|
||||
// properties from the model flatbuffer. This struct is allocated from
|
||||
// temporary arena memory is only guaranteed until a call is made to
|
||||
// ResetTempAllocations(). The eval_tensors pointer should be the value passed
|
||||
// into this class during StartModelAllocation() and contains the
|
||||
// source-of-truth for buffers.
|
||||
virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
|
||||
TfLiteEvalTensor* eval_tensors,
|
||||
int tensor_index);
|
||||
|
||||
// Resets all temporary allocations. This method should be called after a
|
||||
// chain of temp allocations (e.g. chain of TfLiteTensor objects via
|
||||
// AllocateTfLiteTensor()).
|
||||
virtual void ResetTempAllocations();
|
||||
|
||||
// Allocates persistent buffer which has the same life time as the allocator.
|
||||
// The memory is immediately available and is allocated from the tail of the
|
||||
// arena.
|
||||
void* AllocatePersistentBuffer(size_t bytes);
|
||||
|
||||
// Register a scratch buffer of size `bytes` for Node with `node_id`.
|
||||
// This method only allocates a BufferHandle holding information for memory
|
||||
// planning. The buffer ptr is ready after `FinishModelAllocation` and can
|
||||
// be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
|
||||
// Note that there should be no tail allocation between two consecutive
|
||||
// `RequestScratchBufferInArena` calls.
|
||||
TfLiteStatus RequestScratchBufferInArena(int node_id, size_t bytes,
|
||||
int* buffer_idx);
|
||||
// Returns the pointer to the planned scratch buffer.
|
||||
void* GetScratchBuffer(int buffer_idx) const;
|
||||
|
||||
// Returns the arena usage in bytes, only available after
|
||||
// `FinishModelAllocation`. Otherwise, it will return 0.
|
||||
size_t used_bytes() const;
|
||||
|
||||
protected:
|
||||
MicroAllocator(SimpleMemoryAllocator* memory_allocator,
|
||||
ErrorReporter* error_reporter);
|
||||
virtual ~MicroAllocator();
|
||||
|
||||
// Allocates an array in the arena to hold pointers to the node and
|
||||
// registration pointers required to represent the inference graph of the
|
||||
// model.
|
||||
virtual TfLiteStatus AllocateNodeAndRegistrations(
|
||||
const Model* model, NodeAndRegistration** node_and_registrations);
|
||||
|
||||
// Populates node and registration pointers representing the inference graph
|
||||
// of the model from values inside the flatbuffer (loaded from the TfLiteModel
|
||||
// instance). Persistent data (e.g. operator data) is allocated from the
|
||||
// arena.
|
||||
virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
|
||||
const Model* model, const MicroOpResolver& op_resolver,
|
||||
NodeAndRegistration* node_and_registrations);
|
||||
|
||||
// Allocates the list of persistent TfLiteEvalTensors that are used for the
|
||||
// "eval" phase of model inference. These structs will be the source of truth
|
||||
// for all tensor buffers. Allocation results are stored in the out-param
|
||||
// eval_tensors.
|
||||
virtual TfLiteStatus AllocateTfLiteEvalTensors(
|
||||
const Model* model, TfLiteEvalTensor** eval_tensors);
|
||||
|
||||
// Allocates persistent tensor buffers for variable tensors in the subgraph.
|
||||
virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
|
||||
TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
// TODO(b/160894903): Once all kernels have been updated to the new API drop
|
||||
// this method. It is only used to record TfLiteTensor persistent allocations.
|
||||
virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
|
||||
|
||||
// Populates a TfLiteTensor struct with data from the model flatbuffer. Any
|
||||
// quantization data is allocated from either the tail (persistent) or temp
|
||||
// sections of the arena based on the allocation flag.
|
||||
// TODO(b/160894903): Once all kernels have been updated to the new API drop
|
||||
// this function since all allocations for quantized data will take place in
|
||||
// the temp section.
|
||||
virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
|
||||
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
|
||||
int tensor_index, bool allocate_temp);
|
||||
|
||||
ErrorReporter* error_reporter() const;
|
||||
|
||||
// Returns the first subgraph from the model.
|
||||
const SubGraph* GetSubGraphFromModel(const Model* model);
|
||||
|
||||
private:
|
||||
// Commits a memory plan for all non-persistent buffer allocations in the
|
||||
// 'head' section of the memory arena. The eval_tensors pointer is the list of
|
||||
// pre-allocated TfLiteEvalTensor structs that will point to the buffers that
|
||||
// will be allocated into the head section in this function call.
|
||||
virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
|
||||
const SubGraph* subgraph,
|
||||
TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
// A simple memory allocator that always allocate from the arena tail or head.
|
||||
SimpleMemoryAllocator* memory_allocator_;
|
||||
|
||||
ErrorReporter* error_reporter_;
|
||||
bool model_is_allocating_;
|
||||
|
||||
// In reverse order for efficiency.
|
||||
// i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
|
||||
// corresponding to the last RequestScratchBufferInArena call.
|
||||
internal::ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
|
||||
// How many scratch buffers have been allocated.
|
||||
size_t scratch_buffer_count_ = 0;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
|
@@ -0,0 +1,36 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
class MicroErrorReporter : public ErrorReporter {
|
||||
public:
|
||||
~MicroErrorReporter() override {}
|
||||
int Report(const char* format, va_list args) override;
|
||||
|
||||
private:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
|
@@ -0,0 +1,208 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/profiler.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
#include "tensorflow/lite/type_to_tflitetype.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// A helper class to encapsulate the implementation of APIs in Context.
|
||||
// context->impl_ points to an instance of this class.
|
||||
// Check tensorflow/lite/c/common.h for detailed descriptions.
|
||||
// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
|
||||
class ContextHelper {
|
||||
public:
|
||||
explicit ContextHelper(ErrorReporter* error_reporter,
|
||||
MicroAllocator* allocator, const Model* model);
|
||||
|
||||
// Functions that will be assigned to function pointers on TfLiteContext:
|
||||
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
|
||||
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
|
||||
size_t bytes,
|
||||
int* buffer_idx);
|
||||
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
|
||||
static void ReportOpError(struct TfLiteContext* context, const char* format,
|
||||
...);
|
||||
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
|
||||
// Sets the current node index to assist with scratch buffer allocations:
|
||||
void SetNodeIndex(int idx);
|
||||
|
||||
// Sets the pointer to a list of TfLiteEvalTensor instances.
|
||||
void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
private:
|
||||
MicroAllocator* allocator_;
|
||||
ErrorReporter* error_reporter_;
|
||||
const Model* model_;
|
||||
TfLiteEvalTensor* eval_tensors_;
|
||||
int current_node_idx_ = -1;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
class MicroInterpreter {
|
||||
public:
|
||||
// The lifetime of the model, op resolver, tensor arena, error reporter and
|
||||
// profiler must be at least as long as that of the interpreter object, since
|
||||
// the interpreter may need to access them at any time. This means that you
|
||||
// should usually create them with the same scope as each other, for example
|
||||
// having them all allocated on the stack as local variables through a
|
||||
// top-level function. The interpreter doesn't do any deallocation of any of
|
||||
// the pointed-to objects, ownership remains with the caller.
|
||||
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
|
||||
uint8_t* tensor_arena, size_t tensor_arena_size,
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler = nullptr);
|
||||
|
||||
// Create an interpreter instance using an existing MicroAllocator instance.
|
||||
// This constructor should be used when creating an allocator that needs to
|
||||
// have allocation handled in more than one interpreter or for recording
|
||||
// allocations inside the interpreter. The lifetime of the allocator must be
|
||||
// as long as that of the interpreter object.
|
||||
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
|
||||
MicroAllocator* allocator, ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler = nullptr);
|
||||
|
||||
~MicroInterpreter();
|
||||
|
||||
// Runs through the model and allocates all necessary input, output and
|
||||
// intermediate tensors.
|
||||
TfLiteStatus AllocateTensors();
|
||||
|
||||
// In order to support partial graph runs for strided models, this can return
|
||||
// values other than kTfLiteOk and kTfLiteError.
|
||||
// TODO(b/149795762): Add this to the TfLiteStatus enum.
|
||||
TfLiteStatus Invoke();
|
||||
|
||||
size_t tensors_size() const { return context_.tensors_size; }
|
||||
TfLiteTensor* tensor(size_t tensor_index);
|
||||
template <class T>
|
||||
T* typed_tensor(int tensor_index) {
|
||||
if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) {
|
||||
if (tensor_ptr->type == typeToTfLiteType<T>()) {
|
||||
return GetTensorData<T>(tensor_ptr);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TfLiteTensor* input(size_t index);
|
||||
size_t inputs_size() const { return subgraph_->inputs()->Length(); }
|
||||
const flatbuffers::Vector<int32_t>& inputs() const {
|
||||
return *subgraph_->inputs();
|
||||
}
|
||||
TfLiteTensor* input_tensor(size_t index) { return input(index); }
|
||||
template <class T>
|
||||
T* typed_input_tensor(int tensor_index) {
|
||||
if (TfLiteTensor* tensor_ptr = input_tensor(tensor_index)) {
|
||||
if (tensor_ptr->type == typeToTfLiteType<T>()) {
|
||||
return GetTensorData<T>(tensor_ptr);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TfLiteTensor* output(size_t index);
|
||||
size_t outputs_size() const { return subgraph_->outputs()->Length(); }
|
||||
const flatbuffers::Vector<int32_t>& outputs() const {
|
||||
return *subgraph_->outputs();
|
||||
}
|
||||
TfLiteTensor* output_tensor(size_t index) { return output(index); }
|
||||
template <class T>
|
||||
T* typed_output_tensor(int tensor_index) {
|
||||
if (TfLiteTensor* tensor_ptr = output_tensor(tensor_index)) {
|
||||
if (tensor_ptr->type == typeToTfLiteType<T>()) {
|
||||
return GetTensorData<T>(tensor_ptr);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Reset all variable tensors to the default value.
|
||||
TfLiteStatus ResetVariableTensors();
|
||||
|
||||
TfLiteStatus initialization_status() const { return initialization_status_; }
|
||||
|
||||
size_t operators_size() const { return subgraph_->operators()->size(); }
|
||||
|
||||
// For debugging only.
|
||||
const NodeAndRegistration node_and_registration(int node_index) const {
|
||||
return node_and_registrations_[node_index];
|
||||
}
|
||||
|
||||
// For debugging only.
|
||||
// Returns the actual used arena in bytes. This method gives the optimal arena
|
||||
// size. It's only available after `AllocateTensors` has been called.
|
||||
// Note that normally `tensor_arena` requires 16 bytes alignment to fully
|
||||
// utilize the space. If it's not the case, the optimial arena size would be
|
||||
// arena_used_bytes() + 16.
|
||||
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
|
||||
|
||||
protected:
|
||||
const MicroAllocator& allocator() const { return allocator_; }
|
||||
const TfLiteContext& context() const { return context_; }
|
||||
|
||||
private:
|
||||
// TODO(b/158263161): Consider switching to Create() function to enable better
|
||||
// error reporting during initialization.
|
||||
void Init(tflite::Profiler* profiler);
|
||||
|
||||
void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr);
|
||||
|
||||
template <class T>
|
||||
void CorrectTensorDataEndianness(T* data, int32_t size);
|
||||
|
||||
NodeAndRegistration* node_and_registrations_ = nullptr;
|
||||
|
||||
const Model* model_;
|
||||
const MicroOpResolver& op_resolver_;
|
||||
ErrorReporter* error_reporter_;
|
||||
TfLiteContext context_ = {};
|
||||
MicroAllocator& allocator_;
|
||||
bool tensors_allocated_;
|
||||
|
||||
TfLiteStatus initialization_status_;
|
||||
|
||||
const SubGraph* subgraph_;
|
||||
TfLiteEvalTensor* eval_tensors_;
|
||||
internal::ContextHelper context_helper_;
|
||||
|
||||
// TODO(b/160894903): Clean these pointers up when all APIs are updated to new
|
||||
// TfLiteEvalTensor buffers.
|
||||
TfLiteTensor* input_tensor_;
|
||||
TfLiteTensor* output_tensor_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
|
@@ -0,0 +1,458 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
template <unsigned int tOpCount>
|
||||
class MicroMutableOpResolver : public MicroOpResolver {
|
||||
public:
|
||||
explicit MicroMutableOpResolver(ErrorReporter* error_reporter = nullptr)
|
||||
: error_reporter_(error_reporter) {}
|
||||
|
||||
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override {
|
||||
if (op == BuiltinOperator_CUSTOM) return nullptr;
|
||||
|
||||
for (unsigned int i = 0; i < registrations_len_; ++i) {
|
||||
const TfLiteRegistration& registration = registrations_[i];
|
||||
if (registration.builtin_code == op) {
|
||||
return ®istration;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const TfLiteRegistration* FindOp(const char* op) const override {
|
||||
for (unsigned int i = 0; i < registrations_len_; ++i) {
|
||||
const TfLiteRegistration& registration = registrations_[i];
|
||||
if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
|
||||
(strcmp(registration.custom_name, op) == 0)) {
|
||||
return ®istration;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MicroOpResolver::BuiltinParseFunction GetOpDataParser(
|
||||
BuiltinOperator op) const override {
|
||||
TFLITE_DCHECK(num_buitin_ops_ <= tOpCount);
|
||||
for (unsigned int i = 0; i < num_buitin_ops_; ++i) {
|
||||
if (builtin_codes_[i] == op) return builtin_parsers_[i];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Registers a Custom Operator with the MicroOpResolver.
|
||||
//
|
||||
// Only the first call for a given name will be successful. i.e. if this
|
||||
// function is called again for a previously added Custom Operator, the
|
||||
// MicroOpResolver will be unchanged and this function will return
|
||||
// kTfLiteError.
|
||||
TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) {
|
||||
if (registrations_len_ >= tOpCount) {
|
||||
if (error_reporter_) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Couldn't register custom op '%s', resolver size is too small (%d)",
|
||||
name, tOpCount);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (FindOp(name) != nullptr) {
|
||||
if (error_reporter_ != nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Calling AddCustom for the same op more than once "
|
||||
"is not supported (Op: %s).",
|
||||
name);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
TfLiteRegistration* new_registration = ®istrations_[registrations_len_];
|
||||
registrations_len_ += 1;
|
||||
|
||||
*new_registration = *registration;
|
||||
new_registration->builtin_code = BuiltinOperator_CUSTOM;
|
||||
new_registration->custom_name = name;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// The Add* functions below add the various Builtin operators to the
|
||||
// MicroMutableOpResolver object.
|
||||
|
||||
TfLiteStatus AddAbs() {
|
||||
return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(),
|
||||
ParseAbs);
|
||||
}
|
||||
|
||||
TfLiteStatus AddAdd() {
|
||||
return AddBuiltin(BuiltinOperator_ADD, tflite::ops::micro::Register_ADD(),
|
||||
ParseAdd);
|
||||
}
|
||||
|
||||
TfLiteStatus AddArgMax() {
|
||||
return AddBuiltin(BuiltinOperator_ARG_MAX,
|
||||
tflite::ops::micro::Register_ARG_MAX(), ParseArgMax);
|
||||
}
|
||||
|
||||
TfLiteStatus AddArgMin() {
|
||||
return AddBuiltin(BuiltinOperator_ARG_MIN,
|
||||
tflite::ops::micro::Register_ARG_MIN(), ParseArgMin);
|
||||
}
|
||||
|
||||
TfLiteStatus AddAveragePool2D() {
|
||||
return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
|
||||
tflite::ops::micro::Register_AVERAGE_POOL_2D(),
|
||||
ParsePool);
|
||||
}
|
||||
|
||||
TfLiteStatus AddCeil() {
|
||||
return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
|
||||
ParseCeil);
|
||||
}
|
||||
|
||||
TfLiteStatus AddCircularBuffer() {
|
||||
return AddCustom("CIRCULAR_BUFFER",
|
||||
tflite::ops::micro::Register_CIRCULAR_BUFFER());
|
||||
}
|
||||
|
||||
TfLiteStatus AddConcatenation() {
|
||||
return AddBuiltin(BuiltinOperator_CONCATENATION,
|
||||
tflite::ops::micro::Register_CONCATENATION(),
|
||||
ParseConcatenation);
|
||||
}
|
||||
|
||||
TfLiteStatus AddConv2D() {
|
||||
return AddBuiltin(BuiltinOperator_CONV_2D,
|
||||
tflite::ops::micro::Register_CONV_2D(), ParseConv2D);
|
||||
}
|
||||
|
||||
TfLiteStatus AddCos() {
|
||||
return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(),
|
||||
ParseCos);
|
||||
}
|
||||
|
||||
TfLiteStatus AddDepthwiseConv2D() {
|
||||
return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
|
||||
tflite::ops::micro::Register_DEPTHWISE_CONV_2D(),
|
||||
ParseDepthwiseConv2D);
|
||||
}
|
||||
|
||||
TfLiteStatus AddDequantize() {
|
||||
return AddBuiltin(BuiltinOperator_DEQUANTIZE,
|
||||
tflite::ops::micro::Register_DEQUANTIZE(),
|
||||
ParseDequantize);
|
||||
}
|
||||
|
||||
TfLiteStatus AddEqual() {
|
||||
return AddBuiltin(BuiltinOperator_EQUAL,
|
||||
tflite::ops::micro::Register_EQUAL(), ParseEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddFloor() {
|
||||
return AddBuiltin(BuiltinOperator_FLOOR,
|
||||
tflite::ops::micro::Register_FLOOR(), ParseFloor);
|
||||
}
|
||||
|
||||
TfLiteStatus AddFullyConnected() {
|
||||
return AddBuiltin(BuiltinOperator_FULLY_CONNECTED,
|
||||
tflite::ops::micro::Register_FULLY_CONNECTED(),
|
||||
ParseFullyConnected);
|
||||
}
|
||||
|
||||
TfLiteStatus AddGreater() {
|
||||
return AddBuiltin(BuiltinOperator_GREATER,
|
||||
tflite::ops::micro::Register_GREATER(), ParseGreater);
|
||||
}
|
||||
|
||||
TfLiteStatus AddGreaterEqual() {
|
||||
return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
|
||||
tflite::ops::micro::Register_GREATER_EQUAL(),
|
||||
ParseGreaterEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddHardSwish() {
|
||||
return AddBuiltin(BuiltinOperator_HARD_SWISH,
|
||||
tflite::ops::micro::Register_HARD_SWISH(),
|
||||
ParseHardSwish);
|
||||
}
|
||||
|
||||
TfLiteStatus AddL2Normalization() {
|
||||
return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
|
||||
tflite::ops::micro::Register_L2_NORMALIZATION(),
|
||||
ParseL2Normalization);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLess() {
|
||||
return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
|
||||
ParseLess);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLessEqual() {
|
||||
return AddBuiltin(BuiltinOperator_LESS_EQUAL,
|
||||
tflite::ops::micro::Register_LESS_EQUAL(),
|
||||
ParseLessEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLog() {
|
||||
return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(),
|
||||
ParseLog);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogicalAnd() {
|
||||
return AddBuiltin(BuiltinOperator_LOGICAL_AND,
|
||||
tflite::ops::micro::Register_LOGICAL_AND(),
|
||||
ParseLogicalAnd);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogicalNot() {
|
||||
return AddBuiltin(BuiltinOperator_LOGICAL_NOT,
|
||||
tflite::ops::micro::Register_LOGICAL_NOT(),
|
||||
ParseLogicalNot);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogicalOr() {
|
||||
return AddBuiltin(BuiltinOperator_LOGICAL_OR,
|
||||
tflite::ops::micro::Register_LOGICAL_OR(),
|
||||
ParseLogicalOr);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogistic() {
|
||||
return AddBuiltin(BuiltinOperator_LOGISTIC,
|
||||
tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMaximum() {
|
||||
return AddBuiltin(BuiltinOperator_MAXIMUM,
|
||||
tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMaxPool2D() {
|
||||
return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
|
||||
tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMean() {
|
||||
return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
|
||||
ParseReducer);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMinimum() {
|
||||
return AddBuiltin(BuiltinOperator_MINIMUM,
|
||||
tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMul() {
|
||||
return AddBuiltin(BuiltinOperator_MUL, tflite::ops::micro::Register_MUL(),
|
||||
ParseMul);
|
||||
}
|
||||
|
||||
TfLiteStatus AddNeg() {
|
||||
return AddBuiltin(BuiltinOperator_NEG, tflite::ops::micro::Register_NEG(),
|
||||
ParseNeg);
|
||||
}
|
||||
|
||||
TfLiteStatus AddNotEqual() {
|
||||
return AddBuiltin(BuiltinOperator_NOT_EQUAL,
|
||||
tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPack() {
|
||||
return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
|
||||
ParsePack);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPad() {
|
||||
return AddBuiltin(BuiltinOperator_PAD, tflite::ops::micro::Register_PAD(),
|
||||
ParsePad);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPadV2() {
|
||||
return AddBuiltin(BuiltinOperator_PADV2,
|
||||
tflite::ops::micro::Register_PADV2(), ParsePadV2);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPrelu() {
|
||||
return AddBuiltin(BuiltinOperator_PRELU,
|
||||
tflite::ops::micro::Register_PRELU(), ParsePrelu);
|
||||
}
|
||||
|
||||
TfLiteStatus AddQuantize() {
|
||||
return AddBuiltin(BuiltinOperator_QUANTIZE,
|
||||
tflite::ops::micro::Register_QUANTIZE(), ParseQuantize);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRelu() {
|
||||
return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
|
||||
ParseRelu);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRelu6() {
|
||||
return AddBuiltin(BuiltinOperator_RELU6,
|
||||
tflite::ops::micro::Register_RELU6(), ParseRelu6);
|
||||
}
|
||||
|
||||
TfLiteStatus AddReshape() {
|
||||
return AddBuiltin(BuiltinOperator_RESHAPE,
|
||||
tflite::ops::micro::Register_RESHAPE(), ParseReshape);
|
||||
}
|
||||
|
||||
TfLiteStatus AddResizeNearestNeighbor() {
|
||||
return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
|
||||
tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
|
||||
ParseResizeNearestNeighbor);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRound() {
|
||||
return AddBuiltin(BuiltinOperator_ROUND,
|
||||
tflite::ops::micro::Register_ROUND(), ParseRound);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRsqrt() {
|
||||
return AddBuiltin(BuiltinOperator_RSQRT,
|
||||
tflite::ops::micro::Register_RSQRT(), ParseRsqrt);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSin() {
|
||||
return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(),
|
||||
ParseSin);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSoftmax() {
|
||||
return AddBuiltin(BuiltinOperator_SOFTMAX,
|
||||
tflite::ops::micro::Register_SOFTMAX(), ParseSoftmax);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSplit() {
|
||||
return AddBuiltin(BuiltinOperator_SPLIT,
|
||||
tflite::ops::micro::Register_SPLIT(), ParseSplit);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSqrt() {
|
||||
return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(),
|
||||
ParseSqrt);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSquare() {
|
||||
return AddBuiltin(BuiltinOperator_SQUARE,
|
||||
tflite::ops::micro::Register_SQUARE(), ParseSquare);
|
||||
}
|
||||
|
||||
TfLiteStatus AddStridedSlice() {
|
||||
return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
|
||||
tflite::ops::micro::Register_STRIDED_SLICE(),
|
||||
ParseStridedSlice);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSub() {
|
||||
return AddBuiltin(BuiltinOperator_SUB, tflite::ops::micro::Register_SUB(),
|
||||
ParseSub);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSvdf() {
|
||||
return AddBuiltin(BuiltinOperator_SVDF, tflite::ops::micro::Register_SVDF(),
|
||||
ParseSvdf);
|
||||
}
|
||||
|
||||
TfLiteStatus AddTanh() {
|
||||
return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(),
|
||||
ParseTanh);
|
||||
}
|
||||
|
||||
TfLiteStatus AddUnpack() {
|
||||
return AddBuiltin(BuiltinOperator_UNPACK,
|
||||
tflite::ops::micro::Register_UNPACK(), ParseUnpack);
|
||||
}
|
||||
|
||||
unsigned int GetRegistrationLength() { return registrations_len_; }
|
||||
|
||||
private:
|
||||
TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
|
||||
const TfLiteRegistration& registration,
|
||||
MicroOpResolver::BuiltinParseFunction parser) {
|
||||
if (op == BuiltinOperator_CUSTOM) {
|
||||
if (error_reporter_ != nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Invalid parameter BuiltinOperator_CUSTOM to the "
|
||||
"AddBuiltin function.");
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (FindOp(op) != nullptr) {
|
||||
if (error_reporter_ != nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Calling AddBuiltin with the same op more than "
|
||||
"once is not supported (Op: #%d).",
|
||||
op);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (registrations_len_ >= tOpCount) {
|
||||
if (error_reporter_) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Couldn't register builtin op #%d, resolver size "
|
||||
"is too small (%d).",
|
||||
op, tOpCount);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
registrations_[registrations_len_] = registration;
|
||||
// Strictly speaking, the builtin_code is not necessary for TFLM but filling
|
||||
// it in regardless.
|
||||
registrations_[registrations_len_].builtin_code = op;
|
||||
registrations_len_++;
|
||||
|
||||
builtin_codes_[num_buitin_ops_] = op;
|
||||
builtin_parsers_[num_buitin_ops_] = parser;
|
||||
num_buitin_ops_++;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteRegistration registrations_[tOpCount];
|
||||
unsigned int registrations_len_ = 0;
|
||||
|
||||
// Arrays (and counter) to store the builtin codes and their corresponding
|
||||
// parse functions as these are registered with the Op Resolver.
|
||||
BuiltinOperator builtin_codes_[tOpCount];
|
||||
MicroOpResolver::BuiltinParseFunction builtin_parsers_[tOpCount];
|
||||
unsigned int num_buitin_ops_ = 0;
|
||||
|
||||
ErrorReporter* error_reporter_;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
}; // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
|
@@ -0,0 +1,73 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// This is an interface for the OpResolver for TFLiteMicro. The differences from
|
||||
// the TFLite OpResolver base class are to:
|
||||
// * explicitly remove support for Op versions
|
||||
// * allow for finer grained registration of the Builtin Ops to reduce code
|
||||
// size for TFLiteMicro.
|
||||
//
|
||||
// We need an interface class instead of directly using MicroMutableOpResolver
|
||||
// because MicroMutableOpResolver is a class template with the number of
|
||||
// registered Ops as the template parameter.
|
||||
class MicroOpResolver : public OpResolver {
|
||||
public:
|
||||
typedef TfLiteStatus (*BuiltinParseFunction)(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
// Returns the Op registration struct corresponding to the enum code from the
|
||||
// flatbuffer schema. Returns nullptr if the op is not found or if op ==
|
||||
// BuiltinOperator_CUSTOM.
|
||||
virtual const TfLiteRegistration* FindOp(BuiltinOperator op) const = 0;
|
||||
|
||||
// Returns the Op registration struct corresponding to the custom operator by
|
||||
// name.
|
||||
virtual const TfLiteRegistration* FindOp(const char* op) const = 0;
|
||||
|
||||
// This implementation exists for compatibility with the OpResolver base class
|
||||
// and disregards the version parameter.
|
||||
const TfLiteRegistration* FindOp(BuiltinOperator op,
|
||||
int version) const final {
|
||||
return FindOp(op);
|
||||
}
|
||||
|
||||
// This implementation exists for compatibility with the OpResolver base class
|
||||
// and disregards the version parameter.
|
||||
const TfLiteRegistration* FindOp(const char* op, int version) const final {
|
||||
return FindOp(op);
|
||||
}
|
||||
|
||||
// Returns the operator specific parsing function for the OpData for a
|
||||
// BuiltinOperator (if registered), else nullptr.
|
||||
virtual BuiltinParseFunction GetOpDataParser(BuiltinOperator op) const = 0;
|
||||
|
||||
~MicroOpResolver() override {}
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
|
@@ -0,0 +1,30 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
// Optional debugging functionality. For small sized binaries, these are not
|
||||
// needed.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
|
||||
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
|
||||
namespace tflite {
|
||||
// Helper function to print model flatbuffer data. This function is not called
|
||||
// by default. Hence it's not linked in to the final binary code.
|
||||
void PrintModelData(const Model* model, ErrorReporter* error_reporter);
|
||||
// Prints a dump of what tensors and what nodes are in the interpreter.
|
||||
void PrintInterpreterState(MicroInterpreter* interpreter);
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
|
@@ -0,0 +1,71 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
|
||||
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/profiler.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// MicroProfiler creates a common way to gain fine-grained insight into runtime
|
||||
// performance. Bottleck operators can be identified along with slow code
|
||||
// sections. This can be used in conjunction with running the relevant micro
|
||||
// benchmark to evaluate end-to-end performance.
|
||||
//
|
||||
// Usage example:
|
||||
// MicroProfiler profiler(error_reporter);
|
||||
// {
|
||||
// ScopedProfile scoped_profile(profiler, tag);
|
||||
// work_to_profile();
|
||||
// }
|
||||
//
|
||||
// This will call the following methods in order:
|
||||
// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0)
|
||||
// work_to_profile();
|
||||
// profiler->EndEvent(event_handle)
|
||||
class MicroProfiler : public tflite::Profiler {
|
||||
public:
|
||||
explicit MicroProfiler(tflite::ErrorReporter* reporter);
|
||||
~MicroProfiler() override = default;
|
||||
|
||||
// AddEvent is unused for Tf Micro.
|
||||
void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata1,
|
||||
int64_t event_metadata2) override{};
|
||||
|
||||
// BeginEvent followed by code followed by EndEvent will profile the code
|
||||
// enclosed. Multiple concurrent events are unsupported, so the return value
|
||||
// is always 0. Event_metadata1 and event_metadata2 are unused. The tag
|
||||
// pointer must be valid until EndEvent is called.
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) override;
|
||||
|
||||
// Event_handle is ignored since TF Micro does not support concurrent events.
|
||||
void EndEvent(uint32_t event_handle) override;
|
||||
|
||||
private:
|
||||
tflite::ErrorReporter* reporter_;
|
||||
int32_t start_time_;
|
||||
const char* event_tag_;
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
|
@@ -0,0 +1,33 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
// Implements simple string formatting for numeric types. Returns the number of
|
||||
// bytes written to output.
|
||||
extern "C" {
|
||||
// Functionally equivalent to vsnprintf, trimmed down for TFLite Micro.
|
||||
// MicroSnprintf() is implemented using MicroVsnprintf().
|
||||
int MicroVsnprintf(char* output, int len, const char* format, va_list args);
|
||||
// Functionally equavalent to snprintf, trimmed down for TFLite Micro.
|
||||
// For example, MicroSnprintf(buffer, 10, "int %d", 10) will put the string
|
||||
// "int 10" in the buffer.
|
||||
// Floating point values are logged in exponent notation (1.XXX*2^N).
|
||||
int MicroSnprintf(char* output, int len, const char* format, ...);
|
||||
}
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
|
@@ -0,0 +1,31 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// These functions should be implemented by each target platform, and provide an
|
||||
// accurate tick count along with how many ticks there are per second.
|
||||
int32_t ticks_per_second();
|
||||
|
||||
// Return time in ticks. The meaning of a tick varies per platform.
|
||||
int32_t GetCurrentTimeTicks();
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
|
@@ -0,0 +1,110 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Returns number of elements in the shape array.
|
||||
|
||||
int ElementCount(const TfLiteIntArray& dims);
|
||||
|
||||
uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
|
||||
const int zero_point);
|
||||
|
||||
uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale);
|
||||
|
||||
int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
|
||||
const int zero_point);
|
||||
|
||||
int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
|
||||
const int zero_point);
|
||||
|
||||
int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale);
|
||||
|
||||
// Converts a float value into a signed thirty-two-bit quantized value. Note
|
||||
// that values close to max int and min int may see significant error due to
|
||||
// a lack of floating point granularity for large values.
|
||||
int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale);
|
||||
|
||||
// Helper methods to quantize arrays of floats to the desired format.
|
||||
//
|
||||
// There are several key flavors of quantization in TfLite:
|
||||
// asymmetric symmetric per channel
|
||||
// int8_t | X | X | X |
|
||||
// uint8_t | X | X | |
|
||||
// int16_t | X | | |
|
||||
// int32_t | | X | X |
|
||||
//
|
||||
// The per-op quantization spec can be found here:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
|
||||
void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
|
||||
float scale, int zero_point = 0);
|
||||
|
||||
void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
|
||||
float scale, int zero_point = 128);
|
||||
|
||||
void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
|
||||
float scale, int zero_point = 0);
|
||||
|
||||
void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
|
||||
float scale);
|
||||
|
||||
void SymmetricPerChannelQuantize(const float* input, int32_t* output,
|
||||
int num_elements, int num_channels,
|
||||
float* scales);
|
||||
|
||||
void SignedSymmetricPerChannelQuantize(const float* values,
|
||||
TfLiteIntArray* dims,
|
||||
int quantized_dimension,
|
||||
int8_t* quantized_values,
|
||||
float* scaling_factor);
|
||||
|
||||
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
int8_t* quantized_values, float* scaling_factor);
|
||||
|
||||
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
int16_t* quantized_values, float* scaling_factor);
|
||||
|
||||
void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
int32_t* quantized_values, float* scaling_factor);
|
||||
|
||||
void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
|
||||
uint8_t* quantized_values, float* scaling_factor);
|
||||
|
||||
void SymmetricDequantize(const int8_t* values, const int size,
|
||||
const float dequantization_scale,
|
||||
float* dequantized_values);
|
||||
|
||||
template <typename T>
|
||||
void AsymmetricDequantize(const T* values, const int size,
|
||||
const float dequantization_scale,
|
||||
int dequantization_zero_point,
|
||||
float* dequantized_values) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
dequantized_values[i] =
|
||||
(values[i] - dequantization_zero_point) * dequantization_scale;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
|
@@ -0,0 +1,120 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// List of buckets currently recorded by this class. Each type keeps a list of
|
||||
// allocated information during model initialization.
|
||||
enum class RecordedAllocationType {
|
||||
kTfLiteEvalTensorData,
|
||||
kPersistentTfLiteTensorData,
|
||||
kPersistentTfLiteTensorQuantizationData,
|
||||
kTfLiteTensorVariableBufferData,
|
||||
kNodeAndRegistrationArray,
|
||||
kOpData,
|
||||
};
|
||||
|
||||
// Container for holding information about allocation recordings by a given
|
||||
// type. Each recording contains the number of bytes requested, the actual bytes
|
||||
// allocated (can defer from requested by alignment), and the number of items
|
||||
// allocated.
|
||||
struct RecordedAllocation {
|
||||
size_t requested_bytes;
|
||||
size_t used_bytes;
|
||||
size_t count;
|
||||
};
|
||||
|
||||
// Utility subclass of MicroAllocator that records all allocations
|
||||
// inside the arena. A summary of allocations can be logged through the
|
||||
// ErrorReporter by invoking LogAllocations(). This special allocator requires
|
||||
// an instance of RecordingSimpleMemoryAllocator to capture allocations in the
|
||||
// head and tail. Arena allocation recording can be retrieved by type through
|
||||
// the GetRecordedAllocation() function. This class should only be used for
|
||||
// auditing memory usage or integration testing.
|
||||
class RecordingMicroAllocator : public MicroAllocator {
|
||||
public:
|
||||
static RecordingMicroAllocator* Create(uint8_t* tensor_arena,
|
||||
size_t arena_size,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Returns the recorded allocations information for a given allocation type.
|
||||
RecordedAllocation GetRecordedAllocation(
|
||||
RecordedAllocationType allocation_type) const;
|
||||
|
||||
const RecordingSimpleMemoryAllocator* GetSimpleMemoryAllocator() const;
|
||||
|
||||
// Logs out through the ErrorReporter all allocation recordings by type
|
||||
// defined in RecordedAllocationType.
|
||||
void PrintAllocations() const;
|
||||
|
||||
protected:
|
||||
TfLiteStatus AllocateNodeAndRegistrations(
|
||||
const Model* model,
|
||||
NodeAndRegistration** node_and_registrations) override;
|
||||
TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
|
||||
const Model* model, const MicroOpResolver& op_resolver,
|
||||
NodeAndRegistration* node_and_registrations) override;
|
||||
TfLiteStatus AllocateTfLiteEvalTensors(
|
||||
const Model* model, TfLiteEvalTensor** eval_tensors) override;
|
||||
TfLiteStatus AllocateVariables(const SubGraph* subgraph,
|
||||
TfLiteEvalTensor* eval_tensors) override;
|
||||
// TODO(b/160894903): Once all kernels have been updated to the new API drop
|
||||
// this method. It is only used to record TfLiteTensor persistent allocations.
|
||||
TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors,
|
||||
int tensor_index) override;
|
||||
// TODO(b/160894903): Once all kernels have been updated to the new API drop
|
||||
// this function since all allocations for quantized data will take place in
|
||||
// the temp section.
|
||||
TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
|
||||
const SubGraph* subgraph,
|
||||
TfLiteTensor* tensor,
|
||||
int tensor_index,
|
||||
bool allocate_temp) override;
|
||||
|
||||
private:
|
||||
RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
void PrintRecordedAllocation(RecordedAllocationType allocation_type,
|
||||
const char* allocation_name,
|
||||
const char* allocation_description) const;
|
||||
|
||||
RecordedAllocation SnapshotAllocationUsage() const;
|
||||
void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation,
|
||||
RecordedAllocation& recorded_allocation);
|
||||
|
||||
const RecordingSimpleMemoryAllocator* recording_memory_allocator_;
|
||||
|
||||
RecordedAllocation recorded_tflite_eval_tensor_data_ = {};
|
||||
RecordedAllocation recorded_persistent_tflite_tensor_data_ = {};
|
||||
RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {};
|
||||
RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
|
||||
RecordedAllocation recorded_node_and_registration_array_data_ = {};
|
||||
RecordedAllocation recorded_op_data_ = {};
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
|
@@ -0,0 +1,65 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
#include "tensorflow/lite/micro/recording_micro_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Utility subclass that enables internal recordings of the MicroInterpreter.
|
||||
// This class should be used to audit and analyze memory arena usage for a given
|
||||
// model and interpreter.
|
||||
//
|
||||
// After construction and the first Invoke() or AllocateTensors() call - the
|
||||
// memory usage is recorded and available through the GetMicroAllocator()
|
||||
// function. See RecordingMicroAlloctor for more details on what is currently
|
||||
// recorded from arena allocations.
|
||||
//
|
||||
// It is recommended for users to increase the tensor arena size by at least 1kb
|
||||
// to ensure enough additional memory is available for internal recordings.
|
||||
class RecordingMicroInterpreter : public MicroInterpreter {
|
||||
public:
|
||||
RecordingMicroInterpreter(const Model* model,
|
||||
const MicroOpResolver& op_resolver,
|
||||
uint8_t* tensor_arena, size_t tensor_arena_size,
|
||||
ErrorReporter* error_reporter)
|
||||
: MicroInterpreter(model, op_resolver,
|
||||
RecordingMicroAllocator::Create(
|
||||
tensor_arena, tensor_arena_size, error_reporter),
|
||||
error_reporter),
|
||||
recording_micro_allocator_(
|
||||
static_cast<const RecordingMicroAllocator&>(allocator())) {}
|
||||
|
||||
RecordingMicroInterpreter(const Model* model,
|
||||
const MicroOpResolver& op_resolver,
|
||||
RecordingMicroAllocator* allocator,
|
||||
ErrorReporter* error_reporter)
|
||||
: MicroInterpreter(model, op_resolver, allocator, error_reporter),
|
||||
recording_micro_allocator_(*allocator) {}
|
||||
|
||||
const RecordingMicroAllocator& GetMicroAllocator() const {
|
||||
return recording_micro_allocator_;
|
||||
}
|
||||
|
||||
private:
|
||||
const RecordingMicroAllocator& recording_micro_allocator_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
|
@@ -0,0 +1,64 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Utility class used to log allocations of a SimpleMemoryAllocator. Should only
|
||||
// be used in debug/evaluation settings or unit tests to evaluate allocation
|
||||
// usage.
|
||||
class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
|
||||
public:
|
||||
RecordingSimpleMemoryAllocator(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head, size_t buffer_size);
|
||||
// TODO(b/157615197): Cleanup constructors/destructor and use factory
|
||||
// functions.
|
||||
~RecordingSimpleMemoryAllocator() override;
|
||||
|
||||
static RecordingSimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head,
|
||||
size_t buffer_size);
|
||||
|
||||
// Returns the number of bytes requested from the head or tail.
|
||||
size_t GetRequestedBytes() const;
|
||||
|
||||
// Returns the number of bytes actually allocated from the head or tail. This
|
||||
// value will be >= to the number of requested bytes due to padding and
|
||||
// alignment.
|
||||
size_t GetUsedBytes() const;
|
||||
|
||||
// Returns the number of alloc calls from the head or tail.
|
||||
size_t GetAllocatedCount() const;
|
||||
|
||||
TfLiteStatus EnsureHeadSize(size_t size, size_t alignment) override;
|
||||
uint8_t* AllocateFromTail(size_t size, size_t alignment) override;
|
||||
|
||||
private:
|
||||
size_t requested_head_bytes_;
|
||||
size_t requested_tail_bytes_;
|
||||
size_t used_bytes_;
|
||||
size_t alloc_count_;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
|
@@ -0,0 +1,99 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// TODO(petewarden): This allocator never frees up or reuses any memory, even
|
||||
// though we have enough information about lifetimes of the tensors to do so.
|
||||
// This makes it pretty wasteful, so we should use a more intelligent method.
|
||||
class SimpleMemoryAllocator {
|
||||
public:
|
||||
// TODO(b/157615197): Cleanup constructors/destructor and use factory
|
||||
// functions.
|
||||
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
|
||||
uint8_t* buffer_tail);
|
||||
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
|
||||
size_t buffer_size);
|
||||
virtual ~SimpleMemoryAllocator();
|
||||
|
||||
// Creates a new SimpleMemoryAllocator from a given buffer head and size.
|
||||
static SimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head,
|
||||
size_t buffer_size);
|
||||
|
||||
// Ensure that the head (lowest address and moving upwards) memory allocation
|
||||
// is at least a given size. This function will only increase the head size if
|
||||
// the passed in value is larger than the current head size. Calls to this
|
||||
// method will also invalidate all temporary allocation values. This call will
|
||||
// fail if a chain of allocations through AllocateTemp() have not been cleaned
|
||||
// up with a call to ResetTempAllocations().
|
||||
virtual TfLiteStatus EnsureHeadSize(size_t size, size_t alignment);
|
||||
|
||||
// Allocates memory starting at the tail of the arena (highest address and
|
||||
// moving downwards).
|
||||
virtual uint8_t* AllocateFromTail(size_t size, size_t alignment);
|
||||
|
||||
// Allocates a temporary buffer from the head of the arena (lowest address and
|
||||
// moving upwards) but does not update the actual head allocation size or
|
||||
// position. The returned buffer is guaranteed until either
|
||||
// ResetTempAllocations() is called or another call to AllocateFromHead().
|
||||
// Repeat calls to this function will create a chain of temp allocations. All
|
||||
// calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
|
||||
// AllocateFromHead() is called before a call to ResetTempAllocations(), it
|
||||
// will fail with an error message.
|
||||
virtual uint8_t* AllocateTemp(size_t size, size_t alignment);
|
||||
|
||||
// Resets a chain of temporary allocations back to the current head of the
|
||||
// arena (lowest address).
|
||||
virtual void ResetTempAllocations();
|
||||
|
||||
uint8_t* GetHead() const;
|
||||
uint8_t* GetBufferHead() const;
|
||||
uint8_t* GetTail() const;
|
||||
|
||||
size_t GetHeadUsedBytes() const;
|
||||
size_t GetTailUsedBytes() const;
|
||||
|
||||
// Returns the number of bytes available with a given alignment.
|
||||
size_t GetAvailableMemory(size_t alignment) const;
|
||||
|
||||
size_t GetUsedBytes() const;
|
||||
|
||||
private:
|
||||
size_t GetBufferSize() const;
|
||||
|
||||
ErrorReporter* error_reporter_;
|
||||
uint8_t* buffer_head_;
|
||||
uint8_t* buffer_tail_;
|
||||
uint8_t* head_;
|
||||
uint8_t* tail_;
|
||||
uint8_t* temp_;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
|
@@ -0,0 +1,186 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
|
||||
|
||||
// Useful functions for writing tests.
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/all_ops_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace testing {
|
||||
|
||||
constexpr int kOfflinePlannerHeaderSize = 3;
|
||||
|
||||
struct NodeConnection_ {
|
||||
std::initializer_list<int32_t> input;
|
||||
std::initializer_list<int32_t> output;
|
||||
};
|
||||
typedef struct NodeConnection_ NodeConnection;
|
||||
|
||||
// A simple operator that returns the median of the input with the number of
|
||||
// times the kernel was invoked. The implementation below is deliberately
|
||||
// complicated, just to demonstrate how kernel memory planning works.
|
||||
class SimpleStatefulOp {
|
||||
static constexpr int kBufferNotAllocated = 0;
|
||||
// Inputs:
|
||||
static constexpr int kInputTensor = 0;
|
||||
// Outputs:
|
||||
static constexpr int kMedianTensor = 0;
|
||||
static constexpr int kInvokeCount = 1;
|
||||
struct OpData {
|
||||
int invoke_count = 0;
|
||||
int sorting_buffer = kBufferNotAllocated;
|
||||
};
|
||||
|
||||
public:
|
||||
static const TfLiteRegistration* getRegistration();
|
||||
static TfLiteRegistration* GetMutableRegistration();
|
||||
static void* Init(TfLiteContext* context, const char* buffer, size_t length);
|
||||
static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
|
||||
static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
|
||||
};
|
||||
|
||||
class MockCustom {
|
||||
public:
|
||||
static const TfLiteRegistration* getRegistration();
|
||||
static TfLiteRegistration* GetMutableRegistration();
|
||||
static void* Init(TfLiteContext* context, const char* buffer, size_t length);
|
||||
static void Free(TfLiteContext* context, void* buffer);
|
||||
static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
|
||||
static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
static bool freed_;
|
||||
};
|
||||
|
||||
// Returns an Op Resolver that can be used in the testing code.
|
||||
AllOpsResolver GetOpResolver();
|
||||
|
||||
// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input,
|
||||
// 1 layer of weights, 1 output Tensor, and 1 operator.
|
||||
const Model* GetSimpleMockModel();
|
||||
|
||||
// Returns a flatbuffer TensorFlow Lite model with more inputs, variable
|
||||
// tensors, and operators.
|
||||
const Model* GetComplexMockModel();
|
||||
|
||||
// Returns a simple flatbuffer model with two branches.
|
||||
const Model* GetSimpleModelWithBranch();
|
||||
|
||||
// Returns a simple flatbuffer model with offline planned tensors
|
||||
const Model* GetModelWithOfflinePlanning(int num_tensors,
|
||||
const int32_t* metadata_buffer,
|
||||
NodeConnection* node_conn,
|
||||
int num_conns);
|
||||
|
||||
// Returns a flatbuffer model with `simple_stateful_op`
|
||||
const Model* GetSimpleStatefulModel();
|
||||
|
||||
// Builds a one-dimensional flatbuffer tensor of the given size.
|
||||
const Tensor* Create1dFlatbufferTensor(int size, bool is_variable = false);
|
||||
|
||||
// Builds a one-dimensional flatbuffer tensor of the given size with
|
||||
// quantization metadata.
|
||||
const Tensor* CreateQuantizedFlatbufferTensor(int size);
|
||||
|
||||
// Creates a one-dimensional tensor with no quantization metadata.
|
||||
const Tensor* CreateMissingQuantizationFlatbufferTensor(int size);
|
||||
|
||||
// Creates a vector of flatbuffer buffers.
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>*
|
||||
CreateFlatbufferBuffers();
|
||||
|
||||
// Performs a simple string comparison without requiring standard C library.
|
||||
int TestStrcmp(const char* a, const char* b);
|
||||
|
||||
// Wrapper to forward kernel errors to the interpreter's error reporter.
|
||||
void ReportOpError(struct TfLiteContext* context, const char* format, ...);
|
||||
|
||||
void PopulateContext(TfLiteTensor* tensors, int tensors_size,
|
||||
TfLiteContext* context);
|
||||
|
||||
// Create a TfLiteIntArray from an array of ints. The first element in the
|
||||
// supplied array must be the size of the array expressed as an int.
|
||||
TfLiteIntArray* IntArrayFromInts(const int* int_array);
|
||||
|
||||
// Create a TfLiteFloatArray from an array of floats. The first element in the
|
||||
// supplied array must be the size of the array expressed as a float.
|
||||
TfLiteFloatArray* FloatArrayFromFloats(const float* floats);
|
||||
|
||||
TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims,
|
||||
bool is_variable = false);
|
||||
|
||||
void PopulateFloatTensor(TfLiteTensor* tensor, float* begin, float* end);
|
||||
|
||||
TfLiteTensor CreateBoolTensor(const bool* data, TfLiteIntArray* dims,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateInt32Tensor(const int32_t*, TfLiteIntArray* dims,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
|
||||
float scale, int zero_point,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
|
||||
float scale, int zero_point,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const int16_t* data, TfLiteIntArray* dims,
|
||||
float scale, int zero_point,
|
||||
bool is_variable = false);
|
||||
|
||||
template <typename T>
|
||||
TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized,
|
||||
TfLiteIntArray* dims, float scale,
|
||||
int zero_point, bool is_variable = false) {
|
||||
int input_size = ElementCount(*dims);
|
||||
tflite::AsymmetricQuantize(input, quantized, input_size, scale, zero_point);
|
||||
return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable);
|
||||
}
|
||||
|
||||
TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
|
||||
TfLiteIntArray* dims, float input_scale,
|
||||
float weights_scale,
|
||||
bool is_variable = false);
|
||||
|
||||
// Quantizes int32_t bias tensor with per-channel weights determined by input
|
||||
// scale multiplied by weight scale for each channel.
|
||||
TfLiteTensor CreatePerChannelQuantizedBiasTensor(
|
||||
const float* input, int32_t* quantized, TfLiteIntArray* dims,
|
||||
float input_scale, float* weight_scales, float* scales, int* zero_points,
|
||||
TfLiteAffineQuantization* affine_quant, int quantized_dimension,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
|
||||
const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
|
||||
int* zero_points, TfLiteAffineQuantization* affine_quant,
|
||||
int quantized_dimension, bool is_variable = false);
|
||||
|
||||
// Returns the number of tensors in the default subgraph for a tflite::Model.
|
||||
size_t GetModelTensorCount(const Model* model);
|
||||
|
||||
} // namespace testing
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
|
@@ -0,0 +1,241 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// An ultra-lightweight testing framework designed for use with microcontroller
|
||||
// applications. Its only dependency is on TensorFlow Lite's ErrorReporter
|
||||
// interface, where log messages are output. This is designed to be usable even
|
||||
// when no standard C or C++ libraries are available, and without any dynamic
|
||||
// memory allocation or reliance on global constructors.
|
||||
//
|
||||
// To build a test, you use syntax similar to gunit, but with some extra
|
||||
// decoration to create a hidden 'main' function containing each of the tests to
|
||||
// be run. Your code should look something like:
|
||||
// ----------------------------------------------------------------------------
|
||||
// #include "path/to/this/header"
|
||||
//
|
||||
// TF_LITE_MICRO_TESTS_BEGIN
|
||||
//
|
||||
// TF_LITE_MICRO_TEST(SomeTest) {
|
||||
// TF_LITE_LOG_EXPECT_EQ(true, true);
|
||||
// }
|
||||
//
|
||||
// TF_LITE_MICRO_TESTS_END
|
||||
// ----------------------------------------------------------------------------
|
||||
// If you compile this for your platform, you'll get a normal binary that you
|
||||
// should be able to run. Executing it will output logging information like this
|
||||
// to stderr (or whatever equivalent is available and written to by
|
||||
// ErrorReporter):
|
||||
// ----------------------------------------------------------------------------
|
||||
// Testing SomeTest
|
||||
// 1/1 tests passed
|
||||
// ~~~ALL TESTS PASSED~~~
|
||||
// ----------------------------------------------------------------------------
|
||||
// This is designed to be human-readable, so you can just run tests manually,
|
||||
// but the string "~~~ALL TESTS PASSED~~~" should only appear if all of the
|
||||
// tests do pass. This makes it possible to integrate with automated test
|
||||
// systems by scanning the output logs and looking for that magic value.
|
||||
//
|
||||
// This framework is intended to be a rudimentary alternative to no testing at
|
||||
// all on systems that struggle to run more conventional approaches, so use with
|
||||
// caution!
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_TESTING_MICRO_TEST_H_
|
||||
#define TENSORFLOW_LITE_MICRO_TESTING_MICRO_TEST_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace micro_test {
|
||||
extern int tests_passed;
|
||||
extern int tests_failed;
|
||||
extern bool is_test_complete;
|
||||
extern bool did_test_fail;
|
||||
extern tflite::ErrorReporter* reporter;
|
||||
} // namespace micro_test
|
||||
|
||||
#define TF_LITE_MICRO_TESTS_BEGIN \
|
||||
namespace micro_test { \
|
||||
int tests_passed; \
|
||||
int tests_failed; \
|
||||
bool is_test_complete; \
|
||||
bool did_test_fail; \
|
||||
tflite::ErrorReporter* reporter; \
|
||||
} \
|
||||
\
|
||||
int main(void) { \
|
||||
micro_test::tests_passed = 0; \
|
||||
micro_test::tests_failed = 0; \
|
||||
tflite::MicroErrorReporter error_reporter; \
|
||||
micro_test::reporter = &error_reporter; \
|
||||
HAL_Init(); \
|
||||
SystemClock_Config(); \
|
||||
board_init(); \
|
||||
printf("Init Successful");
|
||||
|
||||
#define TF_LITE_MICRO_TESTS_END \
|
||||
micro_test::reporter->Report( \
|
||||
"%d/%d tests passed", micro_test::tests_passed, \
|
||||
(micro_test::tests_failed + micro_test::tests_passed)); \
|
||||
if (micro_test::tests_failed == 0) { \
|
||||
micro_test::reporter->Report("~~~ALL TESTS PASSED~~~\n"); \
|
||||
} else { \
|
||||
micro_test::reporter->Report("~~~SOME TESTS FAILED~~~\n"); \
|
||||
} \
|
||||
while(1); \
|
||||
}
|
||||
|
||||
// TODO(petewarden): I'm going to hell for what I'm doing to this poor for loop.
|
||||
#define TF_LITE_MICRO_TEST(name) \
|
||||
micro_test::reporter->Report("Testing " #name); \
|
||||
for (micro_test::is_test_complete = false, \
|
||||
micro_test::did_test_fail = false; \
|
||||
!micro_test::is_test_complete; micro_test::is_test_complete = true, \
|
||||
micro_test::tests_passed += (micro_test::did_test_fail) ? 0 : 1, \
|
||||
micro_test::tests_failed += (micro_test::did_test_fail) ? 1 : 0)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
micro_test::reporter->Report(#x " failed at %s:%d", __FILE__, __LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
// TODO(b/139142772): this macro is used with types other than ints even though
|
||||
// the printf specifier is %d.
|
||||
#define TF_LITE_MICRO_EXPECT_EQ(x, y) \
|
||||
do { \
|
||||
auto vx = x; \
|
||||
auto vy = y; \
|
||||
if ((vx) != (vy)) { \
|
||||
micro_test::reporter->Report(#x " == " #y " failed at %s:%d (%d vs %d)", \
|
||||
__FILE__, __LINE__, static_cast<int>(vx), \
|
||||
static_cast<int>(vy)); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT_NE(x, y) \
|
||||
do { \
|
||||
if ((x) == (y)) { \
|
||||
micro_test::reporter->Report(#x " != " #y " failed at %s:%d", __FILE__, \
|
||||
__LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
// TODO(wangtz): Making it more generic once needed.
|
||||
#define TF_LITE_MICRO_ARRAY_ELEMENT_EXPECT_NEAR(arr1, idx1, arr2, idx2, \
|
||||
epsilon) \
|
||||
do { \
|
||||
auto delta = ((arr1)[(idx1)] > (arr2)[(idx2)]) \
|
||||
? ((arr1)[(idx1)] - (arr2)[(idx2)]) \
|
||||
: ((arr2)[(idx2)] - (arr1)[(idx1)]); \
|
||||
if (delta > epsilon) { \
|
||||
micro_test::reporter->Report( \
|
||||
#arr1 "[%d] (%f) near " #arr2 "[%d] (%f) failed at %s:%d", \
|
||||
static_cast<int>(idx1), static_cast<float>((arr1)[(idx1)]), \
|
||||
static_cast<int>(idx2), static_cast<float>((arr2)[(idx2)]), \
|
||||
__FILE__, __LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT_NEAR(x, y, epsilon) \
|
||||
do { \
|
||||
auto vx = (x); \
|
||||
auto vy = (y); \
|
||||
auto delta = ((vx) > (vy)) ? ((vx) - (vy)) : ((vy) - (vx)); \
|
||||
if (delta > epsilon) { \
|
||||
micro_test::reporter->Report( \
|
||||
#x " (%f) near " #y " (%f) failed at %s:%d", \
|
||||
static_cast<double>(vx), static_cast<double>(vy), __FILE__, \
|
||||
__LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT_GT(x, y) \
|
||||
do { \
|
||||
if ((x) <= (y)) { \
|
||||
micro_test::reporter->Report(#x " > " #y " failed at %s:%d", __FILE__, \
|
||||
__LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT_LT(x, y) \
|
||||
do { \
|
||||
if ((x) >= (y)) { \
|
||||
micro_test::reporter->Report(#x " < " #y " failed at %s:%d", __FILE__, \
|
||||
__LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT_GE(x, y) \
|
||||
do { \
|
||||
if ((x) < (y)) { \
|
||||
micro_test::reporter->Report(#x " >= " #y " failed at %s:%d", __FILE__, \
|
||||
__LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT_LE(x, y) \
|
||||
do { \
|
||||
if ((x) > (y)) { \
|
||||
micro_test::reporter->Report(#x " <= " #y " failed at %s:%d", __FILE__, \
|
||||
__LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT_TRUE(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
micro_test::reporter->Report(#x " was not true failed at %s:%d", \
|
||||
__FILE__, __LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT_FALSE(x) \
|
||||
do { \
|
||||
if (x) { \
|
||||
micro_test::reporter->Report(#x " was not false failed at %s:%d", \
|
||||
__FILE__, __LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_FAIL(msg) \
|
||||
do { \
|
||||
micro_test::reporter->Report("FAIL: %s", msg, __FILE__, __LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MICRO_EXPECT_STRING_EQ(string1, string2) \
|
||||
do { \
|
||||
for (int i = 0; string1[i] != '\0' && string2[i] != '\0'; i++) { \
|
||||
if (string1[i] != string2[i]) { \
|
||||
micro_test::reporter->Report("FAIL: %s did not match %s", string1, \
|
||||
string2, __FILE__, __LINE__); \
|
||||
micro_test::did_test_fail = true; \
|
||||
} \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_TESTING_MICRO_TEST_H_
|
@@ -0,0 +1,23 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_TESTING_TEST_CONV_MODEL_H_
|
||||
#define TENSORFLOW_LITE_MICRO_TESTING_TEST_CONV_MODEL_H_
|
||||
|
||||
// See generate_test_models.py for updating the contents of this model:
|
||||
extern const unsigned char kTestConvModelData[];
|
||||
extern const unsigned int kTestConvModelDataSize;
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_TESTING_TEST_CONV_MODEL_H_
|
@@ -0,0 +1,116 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_TESTING_TEST_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_TESTING_TEST_UTILS_H_
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/tensor_utils.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
#include "tensorflow/lite/micro/test_helpers.h"
|
||||
#include "tensorflow/lite/micro/testing/micro_test.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace testing {
|
||||
|
||||
// Note: These methods are deprecated, do not use. See b/141332970.
|
||||
|
||||
|
||||
// Derives the quantization range max from scaling factor and zero point.
|
||||
template <typename T>
|
||||
inline float MaxFromZeroPointScale(const int zero_point, const float scale) {
|
||||
return (std::numeric_limits<T>::max() - zero_point) * scale;
|
||||
}
|
||||
|
||||
// Derives the quantization range min from scaling factor and zero point.
|
||||
template <typename T>
|
||||
inline float MinFromZeroPointScale(const int zero_point, const float scale) {
|
||||
return (std::numeric_limits<T>::min() - zero_point) * scale;
|
||||
}
|
||||
|
||||
// Derives the quantization scaling factor from a min and max range.
|
||||
template <typename T>
|
||||
inline float ScaleFromMinMax(const float min, const float max) {
|
||||
return (max - min) /
|
||||
static_cast<float>((std::numeric_limits<T>::max() * 1.0) -
|
||||
std::numeric_limits<T>::min());
|
||||
}
|
||||
|
||||
// Derives the quantization zero point from a min and max range.
|
||||
template <typename T>
|
||||
inline int ZeroPointFromMinMax(const float min, const float max) {
|
||||
return static_cast<int>(std::numeric_limits<T>::min()) +
|
||||
static_cast<int>(-min / ScaleFromMinMax<T>(min, max) + 0.5f);
|
||||
}
|
||||
|
||||
// Converts a float value into an unsigned eight-bit quantized value.
|
||||
uint8_t F2Q(float value, float min, float max);
|
||||
|
||||
// Converts a float value into a signed eight-bit quantized value.
|
||||
int8_t F2QS(const float value, const float min, const float max);
|
||||
|
||||
// Converts a float value into a signed thirty-two-bit quantized value. Note
|
||||
// that values close to max int and min int may see significant error due to
|
||||
// a lack of floating point granularity for large values.
|
||||
int32_t F2Q32(const float value, const float scale);
|
||||
|
||||
// TODO(b/141330728): Move this method elsewhere as part clean up.
|
||||
void PopulateContext(TfLiteTensor* tensors, int tensors_size,
|
||||
ErrorReporter* error_reporter, TfLiteContext* context);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims,
|
||||
float min, float max,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims,
|
||||
float min, float max,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(float* data, uint8_t* quantized_data,
|
||||
TfLiteIntArray* dims,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(float* data, int8_t* quantized_data,
|
||||
TfLiteIntArray* dims,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantizedTensor(float* data, int16_t* quantized_data,
|
||||
TfLiteIntArray* dims,
|
||||
bool is_variable = false);
|
||||
|
||||
TfLiteTensor CreateQuantized32Tensor(const int32_t* data, TfLiteIntArray* dims,
|
||||
float scale, bool is_variable = false);
|
||||
|
||||
template <typename input_type = int32_t,
|
||||
TfLiteType tensor_input_type = kTfLiteInt32>
|
||||
inline TfLiteTensor CreateTensor(const input_type* data, TfLiteIntArray* dims,
|
||||
bool is_variable = false) {
|
||||
TfLiteTensor result;
|
||||
result.type = tensor_input_type;
|
||||
result.data.raw = reinterpret_cast<char*>(const_cast<input_type*>(data));
|
||||
result.dims = dims;
|
||||
result.allocation_type = kTfLiteMemNone;
|
||||
result.bytes = ElementCount(*dims) * sizeof(input_type);
|
||||
result.is_variable = is_variable;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace testing
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_TESTING_TEST_UTILS_H_
|
@@ -0,0 +1,528 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_common_tables.h
|
||||
* Description: Extern declaration for common tables
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_COMMON_TABLES_H
|
||||
#define _ARM_COMMON_TABLES_H
|
||||
|
||||
#include "cmsis/CMSIS/DSP/Include/arm_math.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
/* Double Precision Float CFFT twiddles */
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
|
||||
extern const uint16_t armBitRevTable[1024];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_16)
|
||||
extern const uint64_t twiddleCoefF64_16[32];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_32)
|
||||
extern const uint64_t twiddleCoefF64_32[64];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_64)
|
||||
extern const uint64_t twiddleCoefF64_64[128];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_128)
|
||||
extern const uint64_t twiddleCoefF64_128[256];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_256)
|
||||
extern const uint64_t twiddleCoefF64_256[512];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_512)
|
||||
extern const uint64_t twiddleCoefF64_512[1024];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_1024)
|
||||
extern const uint64_t twiddleCoefF64_1024[2048];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_2048)
|
||||
extern const uint64_t twiddleCoefF64_2048[4096];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F64_4096)
|
||||
extern const uint64_t twiddleCoefF64_4096[8192];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_16)
|
||||
extern const float32_t twiddleCoef_16[32];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_32)
|
||||
extern const float32_t twiddleCoef_32[64];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_64)
|
||||
extern const float32_t twiddleCoef_64[128];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_128)
|
||||
extern const float32_t twiddleCoef_128[256];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_256)
|
||||
extern const float32_t twiddleCoef_256[512];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_512)
|
||||
extern const float32_t twiddleCoef_512[1024];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_1024)
|
||||
extern const float32_t twiddleCoef_1024[2048];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_2048)
|
||||
extern const float32_t twiddleCoef_2048[4096];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096)
|
||||
extern const float32_t twiddleCoef_4096[8192];
|
||||
#define twiddleCoef twiddleCoef_4096
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
/* Q31 */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_16)
|
||||
extern const q31_t twiddleCoef_16_q31[24];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_32)
|
||||
extern const q31_t twiddleCoef_32_q31[48];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_64)
|
||||
extern const q31_t twiddleCoef_64_q31[96];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_128)
|
||||
extern const q31_t twiddleCoef_128_q31[192];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_256)
|
||||
extern const q31_t twiddleCoef_256_q31[384];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_512)
|
||||
extern const q31_t twiddleCoef_512_q31[768];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_1024)
|
||||
extern const q31_t twiddleCoef_1024_q31[1536];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_2048)
|
||||
extern const q31_t twiddleCoef_2048_q31[3072];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096)
|
||||
extern const q31_t twiddleCoef_4096_q31[6144];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_16)
|
||||
extern const q15_t twiddleCoef_16_q15[24];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_32)
|
||||
extern const q15_t twiddleCoef_32_q15[48];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_64)
|
||||
extern const q15_t twiddleCoef_64_q15[96];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_128)
|
||||
extern const q15_t twiddleCoef_128_q15[192];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_256)
|
||||
extern const q15_t twiddleCoef_256_q15[384];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_512)
|
||||
extern const q15_t twiddleCoef_512_q15[768];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_1024)
|
||||
extern const q15_t twiddleCoef_1024_q15[1536];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_2048)
|
||||
extern const q15_t twiddleCoef_2048_q15[3072];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096)
|
||||
extern const q15_t twiddleCoef_4096_q15[6144];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
/* Double Precision Float RFFT twiddles */
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_32)
|
||||
extern const uint64_t twiddleCoefF64_rfft_32[32];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_64)
|
||||
extern const uint64_t twiddleCoefF64_rfft_64[64];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_128)
|
||||
extern const uint64_t twiddleCoefF64_rfft_128[128];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_256)
|
||||
extern const uint64_t twiddleCoefF64_rfft_256[256];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_512)
|
||||
extern const uint64_t twiddleCoefF64_rfft_512[512];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_1024)
|
||||
extern const uint64_t twiddleCoefF64_rfft_1024[1024];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_2048)
|
||||
extern const uint64_t twiddleCoefF64_rfft_2048[2048];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F64_4096)
|
||||
extern const uint64_t twiddleCoefF64_rfft_4096[4096];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32)
|
||||
extern const float32_t twiddleCoef_rfft_32[32];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64)
|
||||
extern const float32_t twiddleCoef_rfft_64[64];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128)
|
||||
extern const float32_t twiddleCoef_rfft_128[128];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256)
|
||||
extern const float32_t twiddleCoef_rfft_256[256];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512)
|
||||
extern const float32_t twiddleCoef_rfft_512[512];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024)
|
||||
extern const float32_t twiddleCoef_rfft_1024[1024];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048)
|
||||
extern const float32_t twiddleCoef_rfft_2048[2048];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096)
|
||||
extern const float32_t twiddleCoef_rfft_4096[4096];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
|
||||
/* Double precision floating-point bit reversal tables */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_16)
|
||||
#define ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH ((uint16_t)12)
|
||||
extern const uint16_t armBitRevIndexTableF64_16[ARMBITREVINDEXTABLEF64_16_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_32)
|
||||
#define ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH ((uint16_t)24)
|
||||
extern const uint16_t armBitRevIndexTableF64_32[ARMBITREVINDEXTABLEF64_32_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_64)
|
||||
#define ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH ((uint16_t)56)
|
||||
extern const uint16_t armBitRevIndexTableF64_64[ARMBITREVINDEXTABLEF64_64_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_128)
|
||||
#define ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH ((uint16_t)112)
|
||||
extern const uint16_t armBitRevIndexTableF64_128[ARMBITREVINDEXTABLEF64_128_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_256)
|
||||
#define ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH ((uint16_t)240)
|
||||
extern const uint16_t armBitRevIndexTableF64_256[ARMBITREVINDEXTABLEF64_256_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_512)
|
||||
#define ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH ((uint16_t)480)
|
||||
extern const uint16_t armBitRevIndexTableF64_512[ARMBITREVINDEXTABLEF64_512_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_1024)
|
||||
#define ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH ((uint16_t)992)
|
||||
extern const uint16_t armBitRevIndexTableF64_1024[ARMBITREVINDEXTABLEF64_1024_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_2048)
|
||||
#define ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH ((uint16_t)1984)
|
||||
extern const uint16_t armBitRevIndexTableF64_2048[ARMBITREVINDEXTABLEF64_2048_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT64_4096)
|
||||
#define ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH ((uint16_t)4032)
|
||||
extern const uint16_t armBitRevIndexTableF64_4096[ARMBITREVINDEXTABLEF64_4096_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
/* floating-point bit reversal tables */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_16)
|
||||
#define ARMBITREVINDEXTABLE_16_TABLE_LENGTH ((uint16_t)20)
|
||||
extern const uint16_t armBitRevIndexTable16[ARMBITREVINDEXTABLE_16_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_32)
|
||||
#define ARMBITREVINDEXTABLE_32_TABLE_LENGTH ((uint16_t)48)
|
||||
extern const uint16_t armBitRevIndexTable32[ARMBITREVINDEXTABLE_32_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_64)
|
||||
#define ARMBITREVINDEXTABLE_64_TABLE_LENGTH ((uint16_t)56)
|
||||
extern const uint16_t armBitRevIndexTable64[ARMBITREVINDEXTABLE_64_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_128)
|
||||
#define ARMBITREVINDEXTABLE_128_TABLE_LENGTH ((uint16_t)208)
|
||||
extern const uint16_t armBitRevIndexTable128[ARMBITREVINDEXTABLE_128_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_256)
|
||||
#define ARMBITREVINDEXTABLE_256_TABLE_LENGTH ((uint16_t)440)
|
||||
extern const uint16_t armBitRevIndexTable256[ARMBITREVINDEXTABLE_256_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_512)
|
||||
#define ARMBITREVINDEXTABLE_512_TABLE_LENGTH ((uint16_t)448)
|
||||
extern const uint16_t armBitRevIndexTable512[ARMBITREVINDEXTABLE_512_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_1024)
|
||||
#define ARMBITREVINDEXTABLE_1024_TABLE_LENGTH ((uint16_t)1800)
|
||||
extern const uint16_t armBitRevIndexTable1024[ARMBITREVINDEXTABLE_1024_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_2048)
|
||||
#define ARMBITREVINDEXTABLE_2048_TABLE_LENGTH ((uint16_t)3808)
|
||||
extern const uint16_t armBitRevIndexTable2048[ARMBITREVINDEXTABLE_2048_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FLT_4096)
|
||||
#define ARMBITREVINDEXTABLE_4096_TABLE_LENGTH ((uint16_t)4032)
|
||||
extern const uint16_t armBitRevIndexTable4096[ARMBITREVINDEXTABLE_4096_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
|
||||
/* fixed-point bit reversal tables */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16)
|
||||
#define ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH ((uint16_t)12)
|
||||
extern const uint16_t armBitRevIndexTable_fixed_16[ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_32)
|
||||
#define ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH ((uint16_t)24)
|
||||
extern const uint16_t armBitRevIndexTable_fixed_32[ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64)
|
||||
#define ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH ((uint16_t)56)
|
||||
extern const uint16_t armBitRevIndexTable_fixed_64[ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_128)
|
||||
#define ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH ((uint16_t)112)
|
||||
extern const uint16_t armBitRevIndexTable_fixed_128[ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256)
|
||||
#define ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH ((uint16_t)240)
|
||||
extern const uint16_t armBitRevIndexTable_fixed_256[ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_512)
|
||||
#define ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH ((uint16_t)480)
|
||||
extern const uint16_t armBitRevIndexTable_fixed_512[ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024)
|
||||
#define ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH ((uint16_t)992)
|
||||
extern const uint16_t armBitRevIndexTable_fixed_1024[ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
|
||||
#define ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH ((uint16_t)1984)
|
||||
extern const uint16_t armBitRevIndexTable_fixed_2048[ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
|
||||
#define ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH ((uint16_t)4032)
|
||||
extern const uint16_t armBitRevIndexTable_fixed_4096[ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_F32)
|
||||
extern const float32_t realCoefA[8192];
|
||||
extern const float32_t realCoefB[8192];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q31)
|
||||
extern const q31_t realCoefAQ31[8192];
|
||||
extern const q31_t realCoefBQ31[8192];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q15)
|
||||
extern const q15_t realCoefAQ15[8192];
|
||||
extern const q15_t realCoefBQ15[8192];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_128)
|
||||
extern const float32_t Weights_128[256];
|
||||
extern const float32_t cos_factors_128[128];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_512)
|
||||
extern const float32_t Weights_512[1024];
|
||||
extern const float32_t cos_factors_512[512];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_2048)
|
||||
extern const float32_t Weights_2048[4096];
|
||||
extern const float32_t cos_factors_2048[2048];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_8192)
|
||||
extern const float32_t Weights_8192[16384];
|
||||
extern const float32_t cos_factors_8192[8192];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_128)
|
||||
extern const q15_t WeightsQ15_128[256];
|
||||
extern const q15_t cos_factorsQ15_128[128];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_512)
|
||||
extern const q15_t WeightsQ15_512[1024];
|
||||
extern const q15_t cos_factorsQ15_512[512];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_2048)
|
||||
extern const q15_t WeightsQ15_2048[4096];
|
||||
extern const q15_t cos_factorsQ15_2048[2048];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_8192)
|
||||
extern const q15_t WeightsQ15_8192[16384];
|
||||
extern const q15_t cos_factorsQ15_8192[8192];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_128)
|
||||
extern const q31_t WeightsQ31_128[256];
|
||||
extern const q31_t cos_factorsQ31_128[128];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_512)
|
||||
extern const q31_t WeightsQ31_512[1024];
|
||||
extern const q31_t cos_factorsQ31_512[512];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_2048)
|
||||
extern const q31_t WeightsQ31_2048[4096];
|
||||
extern const q31_t cos_factorsQ31_2048[2048];
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_8192)
|
||||
extern const q31_t WeightsQ31_8192[16384];
|
||||
extern const q31_t cos_factorsQ31_8192[8192];
|
||||
#endif
|
||||
|
||||
#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q15)
|
||||
extern const q15_t armRecipTableQ15[64];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_RECIP_Q31)
|
||||
extern const q31_t armRecipTableQ31[64];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
|
||||
|
||||
/* Tables for Fast Math Sine and Cosine */
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_F32)
|
||||
extern const float32_t sinTable_f32[FAST_MATH_TABLE_SIZE + 1];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q31)
|
||||
extern const q31_t sinTable_q31[FAST_MATH_TABLE_SIZE + 1];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q15)
|
||||
extern const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE)
|
||||
extern const q31_t sqrtTable_Q31[256];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
|
||||
#endif
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q15_MVE)
|
||||
extern const q15_t sqrtTable_Q15[256];
|
||||
#endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
|
||||
#endif
|
||||
|
||||
#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_TABLES) */
|
||||
|
||||
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
extern const float32_t exp_tab[8];
|
||||
extern const float32_t __logf_lut_f32[8];
|
||||
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM))
|
||||
extern const unsigned char hwLUT[256];
|
||||
#endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ARM_COMMON_TABLES_H */
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,283 @@
|
||||
/**************************************************************************//**
|
||||
* @file cmsis_compiler.h
|
||||
* @brief CMSIS compiler generic header file
|
||||
* @version V5.1.0
|
||||
* @date 09. October 2018
|
||||
******************************************************************************/
|
||||
/*
|
||||
* Copyright (c) 2009-2018 Arm Limited. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __CMSIS_COMPILER_H
|
||||
#define __CMSIS_COMPILER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
* Arm Compiler 4/5
|
||||
*/
|
||||
#if defined ( __CC_ARM )
|
||||
#include "cmsis_armcc.h"
|
||||
|
||||
|
||||
/*
|
||||
* Arm Compiler 6.6 LTM (armclang)
|
||||
*/
|
||||
#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) && (__ARMCC_VERSION < 6100100)
|
||||
#include "cmsis_armclang_ltm.h"
|
||||
|
||||
/*
|
||||
* Arm Compiler above 6.10.1 (armclang)
|
||||
*/
|
||||
#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100)
|
||||
#include "cmsis_armclang.h"
|
||||
|
||||
|
||||
/*
|
||||
* GNU Compiler
|
||||
*/
|
||||
#elif defined ( __GNUC__ )
|
||||
#include "cmsis_gcc.h"
|
||||
|
||||
|
||||
/*
|
||||
* IAR Compiler
|
||||
*/
|
||||
#elif defined ( __ICCARM__ )
|
||||
#include <cmsis_iccarm.h>
|
||||
|
||||
|
||||
/*
|
||||
* TI Arm Compiler
|
||||
*/
|
||||
#elif defined ( __TI_ARM__ )
|
||||
#include <cmsis_ccs.h>
|
||||
|
||||
#ifndef __ASM
|
||||
#define __ASM __asm
|
||||
#endif
|
||||
#ifndef __INLINE
|
||||
#define __INLINE inline
|
||||
#endif
|
||||
#ifndef __STATIC_INLINE
|
||||
#define __STATIC_INLINE static inline
|
||||
#endif
|
||||
#ifndef __STATIC_FORCEINLINE
|
||||
#define __STATIC_FORCEINLINE __STATIC_INLINE
|
||||
#endif
|
||||
#ifndef __NO_RETURN
|
||||
#define __NO_RETURN __attribute__((noreturn))
|
||||
#endif
|
||||
#ifndef __USED
|
||||
#define __USED __attribute__((used))
|
||||
#endif
|
||||
#ifndef __WEAK
|
||||
#define __WEAK __attribute__((weak))
|
||||
#endif
|
||||
#ifndef __PACKED
|
||||
#define __PACKED __attribute__((packed))
|
||||
#endif
|
||||
#ifndef __PACKED_STRUCT
|
||||
#define __PACKED_STRUCT struct __attribute__((packed))
|
||||
#endif
|
||||
#ifndef __PACKED_UNION
|
||||
#define __PACKED_UNION union __attribute__((packed))
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT32 /* deprecated */
|
||||
struct __attribute__((packed)) T_UINT32 { uint32_t v; };
|
||||
#define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v)
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT16_WRITE
|
||||
__PACKED_STRUCT T_UINT16_WRITE { uint16_t v; };
|
||||
#define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void*)(addr))->v) = (val))
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT16_READ
|
||||
__PACKED_STRUCT T_UINT16_READ { uint16_t v; };
|
||||
#define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v)
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT32_WRITE
|
||||
__PACKED_STRUCT T_UINT32_WRITE { uint32_t v; };
|
||||
#define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val))
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT32_READ
|
||||
__PACKED_STRUCT T_UINT32_READ { uint32_t v; };
|
||||
#define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v)
|
||||
#endif
|
||||
#ifndef __ALIGNED
|
||||
#define __ALIGNED(x) __attribute__((aligned(x)))
|
||||
#endif
|
||||
#ifndef __RESTRICT
|
||||
#define __RESTRICT __restrict
|
||||
#endif
|
||||
#ifndef __COMPILER_BARRIER
|
||||
#warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored.
|
||||
#define __COMPILER_BARRIER() (void)0
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* TASKING Compiler
|
||||
*/
|
||||
#elif defined ( __TASKING__ )
|
||||
/*
|
||||
* The CMSIS functions have been implemented as intrinsics in the compiler.
|
||||
* Please use "carm -?i" to get an up to date list of all intrinsics,
|
||||
* Including the CMSIS ones.
|
||||
*/
|
||||
|
||||
#ifndef __ASM
|
||||
#define __ASM __asm
|
||||
#endif
|
||||
#ifndef __INLINE
|
||||
#define __INLINE inline
|
||||
#endif
|
||||
#ifndef __STATIC_INLINE
|
||||
#define __STATIC_INLINE static inline
|
||||
#endif
|
||||
#ifndef __STATIC_FORCEINLINE
|
||||
#define __STATIC_FORCEINLINE __STATIC_INLINE
|
||||
#endif
|
||||
#ifndef __NO_RETURN
|
||||
#define __NO_RETURN __attribute__((noreturn))
|
||||
#endif
|
||||
#ifndef __USED
|
||||
#define __USED __attribute__((used))
|
||||
#endif
|
||||
#ifndef __WEAK
|
||||
#define __WEAK __attribute__((weak))
|
||||
#endif
|
||||
#ifndef __PACKED
|
||||
#define __PACKED __packed__
|
||||
#endif
|
||||
#ifndef __PACKED_STRUCT
|
||||
#define __PACKED_STRUCT struct __packed__
|
||||
#endif
|
||||
#ifndef __PACKED_UNION
|
||||
#define __PACKED_UNION union __packed__
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT32 /* deprecated */
|
||||
struct __packed__ T_UINT32 { uint32_t v; };
|
||||
#define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v)
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT16_WRITE
|
||||
__PACKED_STRUCT T_UINT16_WRITE { uint16_t v; };
|
||||
#define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val))
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT16_READ
|
||||
__PACKED_STRUCT T_UINT16_READ { uint16_t v; };
|
||||
#define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v)
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT32_WRITE
|
||||
__PACKED_STRUCT T_UINT32_WRITE { uint32_t v; };
|
||||
#define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val))
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT32_READ
|
||||
__PACKED_STRUCT T_UINT32_READ { uint32_t v; };
|
||||
#define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v)
|
||||
#endif
|
||||
#ifndef __ALIGNED
|
||||
#define __ALIGNED(x) __align(x)
|
||||
#endif
|
||||
#ifndef __RESTRICT
|
||||
#warning No compiler specific solution for __RESTRICT. __RESTRICT is ignored.
|
||||
#define __RESTRICT
|
||||
#endif
|
||||
#ifndef __COMPILER_BARRIER
|
||||
#warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored.
|
||||
#define __COMPILER_BARRIER() (void)0
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* COSMIC Compiler
|
||||
*/
|
||||
#elif defined ( __CSMC__ )
|
||||
#include <cmsis_csm.h>
|
||||
|
||||
#ifndef __ASM
|
||||
#define __ASM _asm
|
||||
#endif
|
||||
#ifndef __INLINE
|
||||
#define __INLINE inline
|
||||
#endif
|
||||
#ifndef __STATIC_INLINE
|
||||
#define __STATIC_INLINE static inline
|
||||
#endif
|
||||
#ifndef __STATIC_FORCEINLINE
|
||||
#define __STATIC_FORCEINLINE __STATIC_INLINE
|
||||
#endif
|
||||
#ifndef __NO_RETURN
|
||||
// NO RETURN is automatically detected hence no warning here
|
||||
#define __NO_RETURN
|
||||
#endif
|
||||
#ifndef __USED
|
||||
#warning No compiler specific solution for __USED. __USED is ignored.
|
||||
#define __USED
|
||||
#endif
|
||||
#ifndef __WEAK
|
||||
#define __WEAK __weak
|
||||
#endif
|
||||
#ifndef __PACKED
|
||||
#define __PACKED @packed
|
||||
#endif
|
||||
#ifndef __PACKED_STRUCT
|
||||
#define __PACKED_STRUCT @packed struct
|
||||
#endif
|
||||
#ifndef __PACKED_UNION
|
||||
#define __PACKED_UNION @packed union
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT32 /* deprecated */
|
||||
@packed struct T_UINT32 { uint32_t v; };
|
||||
#define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v)
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT16_WRITE
|
||||
__PACKED_STRUCT T_UINT16_WRITE { uint16_t v; };
|
||||
#define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val))
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT16_READ
|
||||
__PACKED_STRUCT T_UINT16_READ { uint16_t v; };
|
||||
#define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v)
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT32_WRITE
|
||||
__PACKED_STRUCT T_UINT32_WRITE { uint32_t v; };
|
||||
#define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val))
|
||||
#endif
|
||||
#ifndef __UNALIGNED_UINT32_READ
|
||||
__PACKED_STRUCT T_UINT32_READ { uint32_t v; };
|
||||
#define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v)
|
||||
#endif
|
||||
#ifndef __ALIGNED
|
||||
#warning No compiler specific solution for __ALIGNED. __ALIGNED is ignored.
|
||||
#define __ALIGNED(x)
|
||||
#endif
|
||||
#ifndef __RESTRICT
|
||||
#warning No compiler specific solution for __RESTRICT. __RESTRICT is ignored.
|
||||
#define __RESTRICT
|
||||
#endif
|
||||
#ifndef __COMPILER_BARRIER
|
||||
#warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored.
|
||||
#define __COMPILER_BARRIER() (void)0
|
||||
#endif
|
||||
|
||||
|
||||
#else
|
||||
#error Unknown compiler.
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CMSIS_COMPILER_H */
|
||||
|
@@ -0,0 +1,56 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS NN Library
|
||||
* Title: arm_nn_tables.h
|
||||
* Description: Extern declaration for NN tables
|
||||
*
|
||||
* $Date: 17. January 2018
|
||||
* $Revision: V.1.0.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _ARM_NN_TABLES_H
|
||||
#define _ARM_NN_TABLES_H
|
||||
|
||||
#include "cmsis/CMSIS/DSP/Include/arm_math.h"
|
||||
|
||||
/**
|
||||
* @brief tables for various activation functions
|
||||
*
|
||||
*/
|
||||
|
||||
extern const q15_t sigmoidTable_q15[256];
|
||||
extern const q7_t sigmoidTable_q7[256];
|
||||
|
||||
extern const q7_t tanhTable_q7[256];
|
||||
extern const q15_t tanhTable_q15[256];
|
||||
|
||||
/**
|
||||
* @brief 2-way tables for various activation functions
|
||||
*
|
||||
* 2-way table, H table for value larger than 1/4
|
||||
* L table for value smaller than 1/4, H table for remaining
|
||||
* We have this only for the q15_t version. It does not make
|
||||
* sense to have it for q7_t type
|
||||
*/
|
||||
extern const q15_t sigmoidHTable_q15[192];
|
||||
extern const q15_t sigmoidLTable_q15[128];
|
||||
|
||||
#endif /* ARM_NN_TABLES_H */
|
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS NN Library
|
||||
* Title: arm_nn_types.h
|
||||
* Description: Public header file to contain the CMSIS-NN structs for the
|
||||
* TensorFlowLite micro compliant functions
|
||||
*
|
||||
* $Date: April 23, 2020
|
||||
* $Revision: V.0.5.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
|
||||
#ifndef _ARM_NN_TYPES_H
|
||||
#define _ARM_NN_TYPES_H
|
||||
|
||||
/** CMSIS-NN object to contain the width and height of a tile */
|
||||
typedef struct
|
||||
{
|
||||
int32_t w; /**< Width */
|
||||
int32_t h; /**< Height */
|
||||
} cmsis_nn_tile;
|
||||
|
||||
/** CMSIS-NN object used for the function context. */
|
||||
typedef struct
|
||||
{
|
||||
void *buf; /**< Pointer to a buffer needed for the optimization */
|
||||
int32_t size; /**< Buffer size */
|
||||
} cmsis_nn_context;
|
||||
|
||||
/** CMSIS-NN object to contain the dimensions of the tensors */
|
||||
typedef struct
|
||||
{
|
||||
int32_t n; /**< Generic dimension to contain either the batch size or output channels. Please refer to the function documentation for more information */
|
||||
int32_t h; /**< Height */
|
||||
int32_t w; /**< Width */
|
||||
int32_t c; /**< Input channels */
|
||||
} cmsis_nn_dims;
|
||||
|
||||
/** CMSIS-NN object for the per-channel quantization parameters */
|
||||
typedef struct
|
||||
{
|
||||
int32_t *multiplier; /**< Multiplier values */
|
||||
int32_t *shift; /**< Shift values */
|
||||
} cmsis_nn_per_channel_quant_params;
|
||||
|
||||
/** CMSIS-NN object for the per-tensor quantization parameters */
|
||||
typedef struct
|
||||
{
|
||||
int32_t multiplier; /**< Multiplier value */
|
||||
int32_t shift; /**< Shift value */
|
||||
} cmsis_nn_per_tensor_quant_params;
|
||||
|
||||
/** CMSIS-NN object for the quantized Relu activation */
|
||||
typedef struct
|
||||
{
|
||||
int32_t min; /**< Min value used to clamp the result */
|
||||
int32_t max; /**< Max value used to clamp the result */
|
||||
} cmsis_nn_activation;
|
||||
|
||||
/** CMSIS-NN object for the convolution layer parameters */
|
||||
typedef struct
|
||||
{
|
||||
int32_t input_offset; /**< Zero value for the input tensor */
|
||||
int32_t output_offset; /**< Zero value for the output tensor */
|
||||
cmsis_nn_tile stride;
|
||||
cmsis_nn_tile padding;
|
||||
cmsis_nn_tile dilation;
|
||||
cmsis_nn_activation activation;
|
||||
} cmsis_nn_conv_params;
|
||||
|
||||
/** CMSIS-NN object for Depthwise convolution layer parameters */
|
||||
typedef struct
|
||||
{
|
||||
int32_t input_offset; /**< Zero value for the input tensor */
|
||||
int32_t output_offset; /**< Zero value for the output tensor */
|
||||
int32_t ch_mult; /**< Channel Multiplier. ch_mult * in_ch = out_ch */
|
||||
cmsis_nn_tile stride;
|
||||
cmsis_nn_tile padding;
|
||||
cmsis_nn_tile dilation;
|
||||
cmsis_nn_activation activation;
|
||||
} cmsis_nn_dw_conv_params;
|
||||
/** CMSIS-NN object for pooling layer parameters */
|
||||
typedef struct
|
||||
{
|
||||
cmsis_nn_tile stride;
|
||||
cmsis_nn_tile padding;
|
||||
cmsis_nn_activation activation;
|
||||
} cmsis_nn_pool_params;
|
||||
|
||||
/** CMSIS-NN object for Fully Connected layer parameters */
|
||||
typedef struct
|
||||
{
|
||||
int32_t input_offset; /**< Zero value for the input tensor */
|
||||
int32_t filter_offset; /**< Zero value for the filter tensor */
|
||||
int32_t output_offset; /**< Zero value for the output tensor */
|
||||
cmsis_nn_activation activation;
|
||||
} cmsis_nn_fc_params;
|
||||
|
||||
#endif // _ARM_NN_TYPES_H
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,905 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS NN Library
|
||||
* Title: arm_nnsupportfunctions.h
|
||||
* Description: Public header file of support functions for CMSIS NN Library
|
||||
*
|
||||
* $Date: May 11, 2020
|
||||
* $Revision: V.4.0.4
|
||||
*
|
||||
* Target Processor: Cortex-M CPUs
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#ifndef _ARM_NNSUPPORTFUNCTIONS_H_
|
||||
#define _ARM_NNSUPPORTFUNCTIONS_H_
|
||||
|
||||
#include "cmsis/CMSIS/DSP/Include/arm_math.h"
|
||||
#include "cmsis/CMSIS/DSP/Include/arm_common_tables.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
|
||||
#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
|
||||
#define MASK_IF_ZERO(x) (x) == 0 ? ~0 : 0
|
||||
#define MASK_IF_NON_ZERO(x) (x) != 0 ? ~0 : 0
|
||||
#define SELECT_USING_MASK(mask, a, b) ((mask) & (a)) ^ (~(mask) & (b))
|
||||
|
||||
#define MAX(A,B) ((A) > (B) ? (A) : (B))
|
||||
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
||||
#define CLAMP(x, h, l) MAX(MIN((x), (h)), (l))
|
||||
|
||||
/**
|
||||
* @brief Union for SIMD access of q31/q15/q7 types
|
||||
*/
|
||||
union arm_nnword
|
||||
{
|
||||
q31_t word;
|
||||
/**< q31 type */
|
||||
q15_t half_words[2];
|
||||
/**< q15 type */
|
||||
q7_t bytes[4];
|
||||
/**< q7 type */
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Struct for specifying activation function types
|
||||
*
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ARM_SIGMOID = 0,
|
||||
/**< Sigmoid activation function */
|
||||
ARM_TANH = 1,
|
||||
/**< Tanh activation function */
|
||||
} arm_nn_activation_type;
|
||||
|
||||
/**
|
||||
* @defgroup nndata_convert Neural Network Data Conversion Functions
|
||||
*
|
||||
* Perform data type conversion in-between neural network operations
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the q7 vector to q15 vector without left-shift
|
||||
* @param[in] *pSrc points to the q7 input vector
|
||||
* @param[out] *pDst points to the q15 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
*
|
||||
*/
|
||||
void arm_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize);
|
||||
|
||||
/**
|
||||
* @brief Non-saturating addition of elements of a q7 vector
|
||||
* @param[in] *input Pointer to the q7 input vector
|
||||
* @param[out] *output Pointer to the q31 output variable.
|
||||
* @param[in] block_size length of the input vector
|
||||
* \par Description:
|
||||
*
|
||||
* 2^24 samples can be added without saturating the result.
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* sum = input[0] + input[1] + .. + input[block_size -1]
|
||||
* </pre>
|
||||
*
|
||||
* */
|
||||
void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size);
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the q7 vector to reordered q15 vector without left-shift
|
||||
* @param[in] *pSrc points to the q7 input vector
|
||||
* @param[out] *pDst points to the q15 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
void arm_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
|
||||
|
||||
/**
|
||||
* @brief Converts the elements from a q7 vector to a q15 vector with an added offset
|
||||
* @param[in] src pointer to the q7 input vector
|
||||
* @param[out] dst pointer to the q15 output vector
|
||||
* @param[in] block_size length of the input vector
|
||||
* @param[in] offset q7 offset to be added to each input vector element.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* dst[n] = (q15_t) src[n] + offset; 0 <= n < block_size.
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
void arm_q7_to_q15_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset);
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the q7 vector to reordered q15 vector with an added offset
|
||||
* @param[in] src pointer to the q7 input vector
|
||||
* @param[out] dst pointer to the q15 output vector
|
||||
* @param[in] block_size length of the input vector
|
||||
* @param[in] offset offset to be added to each input vector element.
|
||||
* @return none.
|
||||
*
|
||||
* @details This function does the q7 to q15 expansion with re-ordering of bytes. Re-ordering is a consequence of
|
||||
* the sign extension intrinsic(DSP extension). The tail (i.e., last (N % 4) elements) retains its original
|
||||
* order.
|
||||
*
|
||||
*/
|
||||
void arm_q7_to_q15_reordered_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset);
|
||||
|
||||
/**
|
||||
* @brief Converts the elements from a q7 vector and accumulate to a q15 vector
|
||||
* @param[in] *src points to the q7 input vector
|
||||
* @param[out] *dst points to the q15 output vector
|
||||
* @param[in] block_size length of the input vector
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* dst[n] += (q15_t) src[n] ; 0 <= n < block_size.
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
void arm_nn_accumulate_q7_to_q15(q15_t *dst, const q7_t *src, uint32_t block_size);
|
||||
|
||||
/**
|
||||
* @brief Depthwise conv on an im2col buffer where the input channel equals output channel.
|
||||
* @param[in] row pointer to row
|
||||
* @param[in] col pointer to im2col buffer, always consists of 2 columns.
|
||||
* @param[in] num_ch number of channels
|
||||
* @param[in] out_shift pointer to per output channel requantization shift parameter.
|
||||
* @param[in] out_mult pointer to per output channel requantization multiplier parameter.
|
||||
* @param[in] out_offset output tensor offset.
|
||||
* @param[in] activation_min minimum value to clamp the output to. Range : int8
|
||||
* @param[in] activation_max maximum value to clamp the output to. Range : int8
|
||||
* @param[in] kernel_size number of elements in one column.
|
||||
* @param[in] output_bias per output channel bias. Range : int32
|
||||
* @param[out] out pointer to output
|
||||
* @return The function returns one of the two
|
||||
* 1. The incremented output pointer for a successful operation or
|
||||
* 2. NULL if implementation is not available.
|
||||
*
|
||||
* @details Supported framework: TensorFlow Lite micro.
|
||||
*/
|
||||
q7_t *arm_nn_depthwise_conv_s8_core(const q7_t *row,
|
||||
const q15_t *col,
|
||||
const uint16_t num_ch,
|
||||
const int32_t *out_shift,
|
||||
const int32_t *out_mult,
|
||||
const int32_t out_offset,
|
||||
const int32_t activation_min,
|
||||
const int32_t activation_max,
|
||||
const uint16_t kernel_size,
|
||||
const int32_t *const output_bias,
|
||||
q7_t *out);
|
||||
|
||||
/**
|
||||
* @brief General Matrix-multiplication function with per-channel requantization.
|
||||
* @param[in] input_row pointer to row operand
|
||||
* @param[in] input_col pointer to col operand
|
||||
* @param[in] output_ch number of rows of input_row
|
||||
* @param[in] col_batches number of column batches. Range: 1 to 4
|
||||
* @param[in] output_shift pointer to per output channel requantization shift parameter.
|
||||
* @param[in] output_mult pointer to per output channel requantization multiplier parameter.
|
||||
* @param[in] out_offset output tensor offset.
|
||||
* @param[in] col_offset input tensor(col) offset.
|
||||
* @param[in] row_offset kernel offset(row). Not used.
|
||||
* @param[in] out_activation_min minimum value to clamp the output to. Range : int8
|
||||
* @param[in] out_activation_max maximum value to clamp the output to. Range : int8
|
||||
* @param[in] row_len number of elements in each row
|
||||
* @param[in] bias per output channel bias. Range : int32
|
||||
* @param[in,out] out pointer to output
|
||||
* @return The function returns one of the two
|
||||
* 1. The incremented output pointer for a successful operation or
|
||||
* 2. NULL if implementation is not available.
|
||||
*
|
||||
* @details Supported framework: TensorFlow Lite
|
||||
*/
|
||||
q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
|
||||
const q7_t *input_col,
|
||||
const uint16_t output_ch,
|
||||
const uint16_t col_batches,
|
||||
const int32_t *output_shift,
|
||||
const int32_t *output_mult,
|
||||
const int32_t out_offset,
|
||||
const int32_t col_offset,
|
||||
const int32_t row_offset,
|
||||
const int16_t out_activation_min,
|
||||
const int16_t out_activation_max,
|
||||
const uint16_t row_len,
|
||||
const int32_t *const bias,
|
||||
q7_t *out);
|
||||
|
||||
/**
|
||||
* @brief General Matrix-multiplication without requantization for one row & one column
|
||||
* @param[in] row_elements number of row elements
|
||||
* @param[in] row_base pointer to row operand
|
||||
* @param[in] col_base pointer to col operand
|
||||
* @param[out] sum_col pointer to store sum of column elements
|
||||
* @param[out] output pointer to store result of multiply-accumulate
|
||||
* @return The function returns the multiply-accumulated result of the row by column.
|
||||
*
|
||||
* @details Pseudo-code
|
||||
* *output = 0
|
||||
* sum_col = 0
|
||||
* for (i = 0; i < row_elements; i++)
|
||||
* *output += row_base[i] * col_base[i]
|
||||
* sum_col += col_base[i]
|
||||
*
|
||||
*/
|
||||
arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements,
|
||||
const int8_t *row_base,
|
||||
const int8_t *col_base,
|
||||
int32_t *const sum_col,
|
||||
int32_t *const output);
|
||||
|
||||
/**
|
||||
* @brief General Matrix-multiplication without requantization for four rows and one column
|
||||
* @param[in] row_elements number of row elements
|
||||
* @param[in] offset offset between rows. Can be the same as row_elements.
|
||||
* For e.g, in a 1x1 conv scenario with stride as 1.
|
||||
* @param[in] row_base pointer to row operand
|
||||
* @param[in] col_base pointer to col operand
|
||||
* @param[out] sum_col pointer to store sum of column elements
|
||||
* @param[out] output pointer to store result(4 int32's) of multiply-accumulate
|
||||
* @return The function returns the multiply-accumulated result of the row by column
|
||||
*
|
||||
* @details Pseudo-code
|
||||
* output[0] = 0
|
||||
* ..
|
||||
* output[3] = 0
|
||||
* sum_col = 0
|
||||
* for (i = 0; i < row_elements; i++)
|
||||
* output[0] += row_base[i] * col_base[i]
|
||||
* ..
|
||||
* output[3] += row_base[i + (row_elements * 3)] * col_base[i]
|
||||
* sum_col += col_base[i]
|
||||
*/
|
||||
arm_status arm_nn_mat_mul_core_4x_s8(const int32_t row_elements,
|
||||
const int32_t offset,
|
||||
const int8_t *row_base,
|
||||
const int8_t *col_base,
|
||||
int32_t *const sum_col,
|
||||
int32_t *const output);
|
||||
|
||||
/**
|
||||
* @brief General Matrix-multiplication function with per-channel requantization.
|
||||
* This function assumes:
|
||||
* - LHS input matrix NOT transposed (nt)
|
||||
* - RHS input matrix transposed (t)
|
||||
*
|
||||
* @note This operation also performs the broadcast bias addition before the requantization
|
||||
*
|
||||
* @param[in] lhs Pointer to the LHS input matrix
|
||||
* @param[in] rhs Pointer to the RHS input matrix
|
||||
* @param[in] bias Pointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows)
|
||||
* @param[out] dst Pointer to the output matrix with "m" rows and "n" columns
|
||||
* @param[in] dst_multipliers Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to
|
||||
* the number of output columns (or RHS input rows)
|
||||
* @param[in] dst_shifts Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to
|
||||
* the number of output columns (or RHS input rows)
|
||||
* @param[in] lhs_rows Number of LHS input rows
|
||||
* @param[in] rhs_rows Number of RHS input rows
|
||||
* @param[in] rhs_cols Number of LHS/RHS input columns
|
||||
* @param[in] lhs_offset Offset to be applied to the LHS input value
|
||||
* @param[in] dst_offset Offset to be applied the output result
|
||||
* @param[in] activation_min Minimum value to clamp down the output. Range : int8
|
||||
* @param[in] activation_max Maximum value to clamp up the output. Range : int8
|
||||
*
|
||||
* @return The function returns <code>ARM_MATH_SUCCESS</code>
|
||||
*
|
||||
*/
|
||||
arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
|
||||
const q7_t *rhs,
|
||||
const q31_t *bias,
|
||||
q7_t *dst,
|
||||
const int32_t *dst_multipliers,
|
||||
const int32_t *dst_shifts,
|
||||
const int32_t lhs_rows,
|
||||
const int32_t rhs_rows,
|
||||
const int32_t rhs_cols,
|
||||
const int32_t lhs_offset,
|
||||
const int32_t dst_offset,
|
||||
const int32_t activation_min,
|
||||
const int32_t activation_max);
|
||||
|
||||
/**
|
||||
* @brief s8 Vector by Matrix (transposed) multiplication
|
||||
*
|
||||
* @param[in] lhs Input left-hand side vector
|
||||
* @param[in] rhs Input right-hand side matrix (transposed)
|
||||
* @param[in] bias Input bias
|
||||
* @param[out] dst Output vector
|
||||
* @param[in] lhs_offset Offset to be added to the input values of the left-hand side vector. Range: -127 to 128
|
||||
* @param[in] rhs_offset Offset to be added to the input values of the right-hand side matrix. Range: -127 to 128
|
||||
* @param[in] dst_offset Offset to be added to the output values. Range: -127 to 128
|
||||
* @param[in] dst_multiplier Output multiplier
|
||||
* @param[in] dst_shift Output shift
|
||||
* @param[in] rhs_cols Number of columns in the right-hand side input matrix
|
||||
* @param[in] rhs_rows Number of rows in the right-hand side input matrix
|
||||
* @param[in] activation_min Minimum value to clamp the output to. Range: int8
|
||||
* @param[in] activation_max Maximum value to clamp the output to. Range: int8
|
||||
*
|
||||
* @return The function returns <code>ARM_MATH_SUCCESS</code>
|
||||
*
|
||||
*/
|
||||
arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
|
||||
const q7_t *rhs,
|
||||
const q31_t *bias,
|
||||
q7_t *dst,
|
||||
const int32_t lhs_offset,
|
||||
const int32_t rhs_offset,
|
||||
const int32_t dst_offset,
|
||||
const int32_t dst_multiplier,
|
||||
const int32_t dst_shift,
|
||||
const int32_t rhs_cols,
|
||||
const int32_t rhs_rows,
|
||||
const int32_t activation_min,
|
||||
const int32_t activation_max);
|
||||
|
||||
/**
|
||||
* @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where
|
||||
* the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs.
|
||||
*
|
||||
* @param[in] lhs Input left-hand side matrix
|
||||
* @param[in] rhs Input right-hand side matrix (transposed)
|
||||
* @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128
|
||||
* @param[in] num_ch Number of channels in LHS/RHS
|
||||
* @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels
|
||||
* @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels
|
||||
* @param[in] out_offset Offset to be added to the output values. Range: -127 to 128
|
||||
* @param[in] activation_min Minimum value to clamp the output to. Range: int8
|
||||
* @param[in] activation_max Maximum value to clamp the output to. Range: int8
|
||||
* @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix
|
||||
* @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels
|
||||
* @param[in] out Output pointer
|
||||
*
|
||||
* @return The function returns one of the two
|
||||
* - Updated output pointer if an implementaiton is available
|
||||
* - NULL if no implementation is available.
|
||||
*
|
||||
* @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
|
||||
* for the following.
|
||||
* - Output shift
|
||||
* - Output multiplier
|
||||
* - Output bias
|
||||
* - rhs
|
||||
*/
|
||||
q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
|
||||
const q7_t *rhs,
|
||||
const int32_t lhs_offset,
|
||||
const uint16_t num_ch,
|
||||
const int32_t *out_shift,
|
||||
const int32_t *out_mult,
|
||||
const int32_t out_offset,
|
||||
const int32_t activation_min,
|
||||
const int32_t activation_max,
|
||||
const uint16_t row_x_col,
|
||||
const int32_t *const output_bias,
|
||||
q7_t *out);
|
||||
|
||||
/**
|
||||
* @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases.
|
||||
* Dimensions are the same for lhs and rhs.
|
||||
*
|
||||
* @param[in] lhs Input left-hand side matrix
|
||||
* @param[in] rhs Input right-hand side matrix (transposed)
|
||||
* @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128
|
||||
* @param[in] num_ch Number of channels in LHS/RHS
|
||||
* @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels.
|
||||
* @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels.
|
||||
* @param[in] out_offset Offset to be added to the output values. Range: -127 to 128
|
||||
* @param[in] activation_min Minimum value to clamp the output to. Range: int8
|
||||
* @param[in] activation_max Maximum value to clamp the output to. Range: int8
|
||||
* @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix
|
||||
* @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels.
|
||||
* @param[in] out Output pointer
|
||||
*
|
||||
* @return The function returns one of the two
|
||||
* - Updated output pointer if an implementaiton is available
|
||||
* - NULL if no implementation is available.
|
||||
*
|
||||
* @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
|
||||
* for the following.
|
||||
* - Output shift
|
||||
* - Output multiplier
|
||||
* - Output bias
|
||||
* - rhs
|
||||
*/
|
||||
q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
|
||||
const q7_t *rhs,
|
||||
const int32_t lhs_offset,
|
||||
const uint16_t num_ch,
|
||||
const int32_t *out_shift,
|
||||
const int32_t *out_mult,
|
||||
const int32_t out_offset,
|
||||
const int32_t activation_min,
|
||||
const int32_t activation_max,
|
||||
const uint16_t row_x_col,
|
||||
const int32_t *const output_bias,
|
||||
q7_t *out);
|
||||
|
||||
/**
|
||||
@brief Read 2 q15 elements and post increment pointer.
|
||||
@param[in] in_q15 Pointer to pointer that holds address of input.
|
||||
@return q31 value
|
||||
*/
|
||||
__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia(const q15_t **in_q15)
|
||||
{
|
||||
q31_t val;
|
||||
|
||||
memcpy(&val, *in_q15, 4);
|
||||
*in_q15 += 2;
|
||||
|
||||
return (val);
|
||||
}
|
||||
|
||||
/**
|
||||
@brief Read 4 q7 from q7 pointer and post increment pointer.
|
||||
@param[in] in_q7 Pointer to pointer that holds address of input.
|
||||
@return q31 value
|
||||
*/
|
||||
__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia(const q7_t **in_q7)
|
||||
{
|
||||
q31_t val;
|
||||
memcpy(&val, *in_q7, 4);
|
||||
*in_q7 += 4;
|
||||
|
||||
return (val);
|
||||
}
|
||||
|
||||
/**
|
||||
@brief Read 2 q15 from q15 pointer.
|
||||
@param[in] in_q15 pointer to address of input.
|
||||
@return q31 value
|
||||
*/
|
||||
__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2(const q15_t *in_q15)
|
||||
{
|
||||
q31_t val;
|
||||
memcpy(&val, in_q15, 4);
|
||||
|
||||
return (val);
|
||||
}
|
||||
|
||||
/**
|
||||
@brief Read 4 q7 values.
|
||||
@param[in] in_q7 pointer to address of input.
|
||||
@return q31 value
|
||||
*/
|
||||
__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4(const q7_t *in_q7)
|
||||
{
|
||||
q31_t val;
|
||||
memcpy(&val, in_q7, 4);
|
||||
|
||||
return (val);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief memset optimized for MVE
|
||||
* @param[in, out] dst Destination pointer
|
||||
* @param[in] val Value to set
|
||||
* @param[in] block_size Number of bytes to copy.
|
||||
*
|
||||
*/
|
||||
__STATIC_FORCEINLINE void arm_memset_q7(q7_t *dst,
|
||||
const q7_t val,
|
||||
uint32_t block_size)
|
||||
{
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
__asm volatile (
|
||||
" vdup.8 q0, %[set_val] \n"
|
||||
" wlstp.8 lr, %[cnt], 1f \n"
|
||||
"2: \n"
|
||||
" vstrb.8 q0, [%[in]], 16 \n"
|
||||
" letp lr, 2b \n"
|
||||
"1: \n"
|
||||
:[in] "+r"(dst)
|
||||
:[cnt] "r"(block_size), [set_val] "r"(val)
|
||||
:"q0", "memory", "r14");
|
||||
#else
|
||||
memset(dst, val, block_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/**
|
||||
* @brief read and expand one q7 word into two q15 words
|
||||
*/
|
||||
|
||||
__STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t * out1, q31_t * out2)
|
||||
{
|
||||
q31_t inA = arm_nn_read_q7x4_ia(&source);
|
||||
q31_t inAbuf1 = __SXTB16(__ROR((uint32_t)inA, 8));
|
||||
q31_t inAbuf2 = __SXTB16(inA);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
*out2 = (int32_t) (__PKHTB (inAbuf1, inAbuf2, 16));
|
||||
*out1 = (int32_t) (__PKHBT (inAbuf2, inAbuf1, 16));
|
||||
#else
|
||||
*out1 = (int32_t) (__PKHTB(inAbuf1, inAbuf2, 16));
|
||||
*out2 = (int32_t) (__PKHBT(inAbuf2, inAbuf1, 16));
|
||||
#endif
|
||||
|
||||
return source;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief read and expand one q7 word into two q15 words with reordering
|
||||
*/
|
||||
|
||||
__STATIC_FORCEINLINE const q7_t *read_and_pad_reordered(const q7_t *source, q31_t * out1, q31_t * out2)
|
||||
{
|
||||
q31_t inA = arm_nn_read_q7x4_ia(&source);
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
*out2 = __SXTB16(__ROR((uint32_t)inA, 8));
|
||||
*out1 = __SXTB16(inA);
|
||||
#else
|
||||
*out1 = __SXTB16(__ROR((uint32_t)inA, 8));
|
||||
*out2 = __SXTB16(inA);
|
||||
#endif
|
||||
|
||||
return source;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief read and expand one q7 word into two q15 words with reordering and add an offset
|
||||
*/
|
||||
__STATIC_FORCEINLINE const q7_t *read_and_pad_reordered_with_offset(const q7_t *source, q31_t * out1, q31_t * out2, q31_t offset)
|
||||
{
|
||||
q31_t inA = arm_nn_read_q7x4_ia(&source);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
*out2 = __SXTB16(__ROR((uint32_t)inA, 8));
|
||||
*out1 = __SXTB16(inA);
|
||||
#else
|
||||
*out1 = __SXTB16(__ROR((uint32_t)inA, 8));
|
||||
*out2 = __SXTB16(inA);
|
||||
#endif
|
||||
*out1 = __QADD16(*out1,offset);
|
||||
*out2 = __QADD16(*out2,offset);
|
||||
|
||||
return source;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @defgroup NNBasicMath Basic Math Functions for Neural Network Computation
|
||||
*
|
||||
* Basic Math Functions for Neural Network Computation
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief q7 vector multiplication with variable output shifts
|
||||
* @param[in] *pSrcA pointer to the first input vector
|
||||
* @param[in] *pSrcB pointer to the second input vector
|
||||
* @param[out] *pDst pointer to the output vector
|
||||
* @param[in] out_shift amount of right-shift for output
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable q15 range [0x8000 0x7FFF] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_nn_mult_q15(
|
||||
q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
const uint16_t out_shift,
|
||||
uint32_t blockSize);
|
||||
|
||||
/**
|
||||
* @brief q7 vector multiplication with variable output shifts
|
||||
* @param[in] *pSrcA pointer to the first input vector
|
||||
* @param[in] *pSrcB pointer to the second input vector
|
||||
* @param[out] *pDst pointer to the output vector
|
||||
* @param[in] out_shift amount of right-shift for output
|
||||
* @param[in] blockSize number of samples in each vector
|
||||
* @return none.
|
||||
*
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable q7 range [0x80 0x7F] will be saturated.
|
||||
*/
|
||||
|
||||
void arm_nn_mult_q7(
|
||||
q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
const uint16_t out_shift,
|
||||
uint32_t blockSize);
|
||||
|
||||
/**
|
||||
* @brief macro for adding rounding offset
|
||||
*/
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
#define NN_ROUND(out_shift) ( (0x1u << out_shift) >> 1 )
|
||||
#else
|
||||
#define NN_ROUND(out_shift) 0
|
||||
#endif
|
||||
|
||||
// Macros for shortening quantization functions' names and avoid long lines
|
||||
#define MUL_SAT(a, b) arm_nn_sat_doubling_high_mult((a), (b))
|
||||
#define MUL_SAT_MVE(a, b) arm_sat_doubling_high_mult_mve_32x4((a), (b))
|
||||
#define MUL_POW2(a, b) arm_nn_mult_by_power_of_two((a), (b))
|
||||
|
||||
|
||||
#define DIV_POW2(a, b) arm_nn_divide_by_power_of_two((a), (b))
|
||||
#define DIV_POW2_MVE(a, b) arm_divide_by_power_of_two_mve((a), (b))
|
||||
|
||||
|
||||
#define EXP_ON_NEG(x) arm_nn_exp_on_negative_values((x))
|
||||
#define ONE_OVER1(x) arm_nn_one_over_one_plus_x_for_x_in_0_1((x))
|
||||
|
||||
/**
|
||||
* @brief Saturating doubling high multiply. Result matches
|
||||
* NEON instruction VQRDMULH.
|
||||
* @param[in] m1 Multiplicand
|
||||
* @param[in] m2 Multiplier
|
||||
* @return Result of multiplication.
|
||||
*
|
||||
*/
|
||||
__STATIC_FORCEINLINE q31_t arm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
|
||||
{
|
||||
q31_t result = 0;
|
||||
// Rounding offset to add for a right shift of 31
|
||||
q63_t mult = 1 << 30;
|
||||
|
||||
if ((m1 < 0) ^ (m2 < 0))
|
||||
{
|
||||
mult = 1 - mult;
|
||||
}
|
||||
// Gets resolved as a SMLAL instruction
|
||||
mult = mult + (q63_t)m1 * m2;
|
||||
|
||||
// Utilize all of the upper 32 bits. This is the doubling step
|
||||
// as well.
|
||||
result = (int32_t) (mult / (1ll << 31));
|
||||
|
||||
if ((m1 == m2) && (m1 == (int32_t)Q31_MIN))
|
||||
{
|
||||
result = Q31_MAX;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Rounding divide by power of two.
|
||||
* @param[in] dividend - Dividend
|
||||
* @param[in] exponent - Divisor = power(2, exponent)
|
||||
* Range: [0, 31]
|
||||
* @return Rounded result of division. Midpoint is rounded away from zero.
|
||||
*
|
||||
*/
|
||||
__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
|
||||
{
|
||||
q31_t result = 0;
|
||||
const q31_t remainder_mask = (1 << exponent) - 1;
|
||||
int32_t remainder = remainder_mask & dividend;
|
||||
|
||||
// Basic division
|
||||
result = dividend >> exponent;
|
||||
|
||||
// Adjust 'result' for rounding (mid point away from zero)
|
||||
q31_t threshold = remainder_mask >> 1;
|
||||
if (result < 0)
|
||||
{
|
||||
threshold++;
|
||||
}
|
||||
if (remainder > threshold)
|
||||
{
|
||||
result++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Requantize a given value.
|
||||
* @param[in] val Value to be requantized
|
||||
* @param[in] multiplier multiplier
|
||||
* @param[in] shift left or right shift for 'val * multiplier'
|
||||
*
|
||||
* @return Returns (val * multiplier)/(2 ^ shift)
|
||||
*
|
||||
*/
|
||||
__STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
|
||||
{
|
||||
return arm_nn_divide_by_power_of_two(arm_nn_sat_doubling_high_mult(val * (1 << LEFT_SHIFT(shift)), multiplier),
|
||||
RIGHT_SHIFT(shift));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief memcpy optimized for MVE
|
||||
* @param[in, out] dst Destination pointer
|
||||
* @param[in] src Source pointer.
|
||||
* @param[in] block_size Number of bytes to copy.
|
||||
*
|
||||
*/
|
||||
__STATIC_FORCEINLINE void arm_memcpy_q7(q7_t *__RESTRICT dst,
|
||||
const q7_t *__RESTRICT src,
|
||||
uint32_t block_size)
|
||||
{
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
__asm volatile (
|
||||
" wlstp.8 lr, %[cnt], 1f \n"
|
||||
"2: \n"
|
||||
" vldrb.8 q0, [%[in]], 16 \n"
|
||||
" vstrb.8 q0, [%[out]], 16 \n"
|
||||
" letp lr, 2b \n"
|
||||
"1: \n"
|
||||
:[in] "+r"(src)
|
||||
,[out] "+r"(dst)
|
||||
:[cnt] "r"(block_size)
|
||||
:"q0", "memory", "r14");
|
||||
#else
|
||||
memcpy(dst, src, block_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(ARM_MATH_MVEI)
|
||||
/**
|
||||
* @brief Vector saturating doubling high multiply returning high half.
|
||||
* @param[in] m1 Multiplicand
|
||||
* @param[in] m2 Multiplier
|
||||
* @return Result of multiplication.
|
||||
*
|
||||
*/
|
||||
__STATIC_FORCEINLINE int32x4_t arm_sat_doubling_high_mult_mve(const int32x4_t m1, const q31_t m2)
|
||||
{
|
||||
return vqrdmulhq_n_s32(m1, m2);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Vector rounding divide by power of two.
|
||||
* @param[in] dividend - Dividend vector
|
||||
* @param[in] exponent - Divisor = power(2, exponent)
|
||||
* Range: [0, 31]
|
||||
* @return Rounded result of division. Midpoint is rounded away from zero.
|
||||
*
|
||||
*/
|
||||
__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve(const int32x4_t dividend, const q31_t exponent)
|
||||
{
|
||||
const int32x4_t shift = vdupq_n_s32(-exponent);
|
||||
const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31);
|
||||
const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup);
|
||||
return vrshlq_s32(fixed_up_dividend, shift);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Requantize a given vector.
|
||||
* @param[in] val Vector to be requantized
|
||||
* @param[in] multiplier multiplier
|
||||
* @param[in] shift shift
|
||||
*
|
||||
* @return Returns (val * multiplier)/(2 ^ shift)
|
||||
*
|
||||
*/
|
||||
__STATIC_FORCEINLINE int32x4_t arm_requantize_mve(const int32x4_t val, const q31_t multiplier, const q31_t shift)
|
||||
{
|
||||
return arm_divide_by_power_of_two_mve(
|
||||
arm_sat_doubling_high_mult_mve(vshlq_s32(val, vdupq_n_s32(LEFT_SHIFT(shift))), multiplier),
|
||||
RIGHT_SHIFT(shift));
|
||||
}
|
||||
|
||||
__STATIC_FORCEINLINE int32x4_t arm_sat_doubling_high_mult_mve_32x4(const int32x4_t m1, const int32x4_t m2)
|
||||
{
|
||||
return vqrdmulhq_s32(m1, m2);
|
||||
}
|
||||
|
||||
__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve_32x4(const int32x4_t dividend, const int32x4_t exponent)
|
||||
{
|
||||
const int32x4_t shift = -exponent;
|
||||
const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31);
|
||||
const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup);
|
||||
return vrshlq_s32(fixed_up_dividend, shift);
|
||||
}
|
||||
|
||||
__STATIC_FORCEINLINE int32x4_t arm_requantize_mve_32x4(const int32x4_t val, const int32x4_t multiplier, const int32x4_t shift)
|
||||
{
|
||||
const int32x4_t zz = vdupq_n_s32(0);
|
||||
const mve_pred16_t p = vcmpgtq_n_s32(shift, 0);
|
||||
|
||||
const int32x4_t left_shift = vpselq_s32(shift, zz, p);
|
||||
const int32x4_t right_shift = -vpselq_s32(zz, shift, p);
|
||||
|
||||
return arm_divide_by_power_of_two_mve_32x4(arm_sat_doubling_high_mult_mve_32x4(vshlq_s32(val, left_shift), multiplier), right_shift);
|
||||
}
|
||||
#endif
|
||||
|
||||
// @note The following functions are used only for softmax layer, scaled bits = 5 assumed
|
||||
|
||||
__STATIC_FORCEINLINE int32_t arm_nn_exp_on_negative_values(int32_t val)
|
||||
{
|
||||
int32_t mask = 0;
|
||||
int32_t shift = 24;
|
||||
|
||||
const int32_t val_mod_minus_quarter = (val & ((1 << shift) - 1)) - (1 << shift);
|
||||
const int32_t remainder = val_mod_minus_quarter - val;
|
||||
const int32_t x = (val_mod_minus_quarter << 5) + (1 << 28);
|
||||
const int32_t x2 = MUL_SAT(x, x);
|
||||
|
||||
int32_t result = 1895147668 + MUL_SAT(1895147668, x +
|
||||
DIV_POW2(MUL_SAT(DIV_POW2(MUL_SAT(x2, x2), 2) + MUL_SAT(x2, x), 715827883) + x2, 1));
|
||||
|
||||
#define SELECT_IF_NON_ZERO(x) \
|
||||
{ \
|
||||
mask = MASK_IF_NON_ZERO(remainder & (1 << shift++)); \
|
||||
result = SELECT_USING_MASK(mask, MUL_SAT(result, x), result); \
|
||||
}
|
||||
|
||||
SELECT_IF_NON_ZERO(1672461947)
|
||||
SELECT_IF_NON_ZERO(1302514674)
|
||||
SELECT_IF_NON_ZERO(790015084)
|
||||
SELECT_IF_NON_ZERO(290630308)
|
||||
SELECT_IF_NON_ZERO(39332535)
|
||||
SELECT_IF_NON_ZERO(720401)
|
||||
SELECT_IF_NON_ZERO(242)
|
||||
|
||||
#undef SELECT_IF_NON_ZERO
|
||||
|
||||
mask = MASK_IF_ZERO(val);
|
||||
return SELECT_USING_MASK(mask, Q31_MAX, result);
|
||||
}
|
||||
|
||||
__STATIC_FORCEINLINE q31_t arm_nn_mult_by_power_of_two(const int32_t val, const int32_t exp)
|
||||
{
|
||||
const int32_t thresh = ((1 << (31 - exp)) - 1);
|
||||
int32_t result = val << exp;
|
||||
result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), Q31_MAX, result);
|
||||
result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), Q31_MIN, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
__STATIC_FORCEINLINE int32_t arm_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val)
|
||||
{
|
||||
const int64_t sum = (int64_t)val + (int64_t)Q31_MAX;
|
||||
const int32_t half_denominator = (int32_t)((sum + (sum >= 0 ? 1 : -1)) / 2L);
|
||||
int32_t x = 1515870810 + MUL_SAT(half_denominator, -1010580540);
|
||||
|
||||
const int32_t shift = (1 << 29);
|
||||
x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
|
||||
x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
|
||||
x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
|
||||
|
||||
return MUL_POW2(x, 1);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user