Merge pull request #288 from yangqings/master

fix TFlite_Micro_Component & update TFlite_Micro_Component_User_Guide.md
2021-02-08 12:33:08 +08:00
parent 4bb2038a56 b8a205d3be
commit ccf5400642
204 changed files with 509 additions and 67727 deletions
--- a/board/NUCLEO_STM32L496ZG/BSP/Inc/mcu_init.h
+++ b/board/NUCLEO_STM32L496ZG/BSP/Inc/mcu_init.h
@@ -13,8 +13,6 @@
 #include "i2c.h"
 #include "spi.h"
 #include "tim.h"
-#include "ov2640.h"
-#include "lcd_2inch4.h"
 #include "tos_k.h"
 void board_init(void);
 void SystemClock_Config(void);
--- a/board/NUCLEO_STM32L496ZG/BSP/Src/main.c
+++ b/board/NUCLEO_STM32L496ZG/BSP/Src/main.c
@@ -18,7 +18,6 @@ int main(void)
 {
    board_init();
    printf("Welcome to TencentOS tiny\r\n");
-		person_detect_init();
    osKernelInitialize(); // TOS Tiny kernel initialize
    osThreadCreate(osThread(application_entry), NULL); // Create TOS Tiny task
    osKernelStart(); // Start TOS Tiny
--- a/board/NUCLEO_STM32L496ZG/BSP/Src/mcu_init.c
+++ b/board/NUCLEO_STM32L496ZG/BSP/Src/mcu_init.c
@@ -1,12 +1,9 @@
 #include "mcu_init.h"

-uint16_t camera_buffer[OV2640_PIXEL_WIDTH*OV2640_PIXEL_HEIGHT];
 uint8_t frame_flag = 0;
-uint8_t tensor_flag = 0;

 extern DCMI_HandleTypeDef hdcmi;

-
 int fputc(int ch, FILE *f)
 {
  if (ch == '\n') {
@@ -42,18 +39,6 @@ void board_init(void)
  MX_I2C1_Init();
  MX_SPI1_Init();
  MX_TIM4_Init();
-	
-	LCD_2IN4_Init();
-	OV2640_Init();
-	OV2640_RGB565_Mode();
-	OV2640_OutSize_Set(OV2640_PIXEL_WIDTH,OV2640_PIXEL_HEIGHT);
-	
-	__HAL_DCMI_DISABLE_IT(&hdcmi, DCMI_IT_LINE | DCMI_IT_VSYNC);
-	if (HAL_DCMI_Start_DMA(&hdcmi, DCMI_MODE_CONTINUOUS,  (uint32_t)camera_buffer , (OV2640_PIXEL_WIDTH*OV2640_PIXEL_HEIGHT)/2))
-	{
-		Error_Handler();
-	}
-	//setup(); //tensorflow init
 }

 /**
--- a/board/NUCLEO_STM32L496ZG/KEIL/tflitemicro_person_detection/TFlite_Micro_Demo移植参考指南（Keil版）.md
+++ b/board/NUCLEO_STM32L496ZG/KEIL/tflitemicro_person_detection/TFlite_Micro_Demo移植参考指南（Keil版）.md
@@ -2,11 +2,11 @@

 **作者：**

-Github: [Derekduke](https://github.com/Derekduke)   E-mail:  dkeji627@gmail.com 
+Github ID: [Derekduke](https://github.com/Derekduke)   E-mail:  dkeji627@gmail.com 

-Github: [QingChuanWS](https://github.com/QingChuanWS)  E-mail: bingshan45@163.com
+Github ID: [QingChuanWS](https://github.com/QingChuanWS)  E-mail: bingshan45@163.com

-Github: [yangqings](https://github.com/yangqings)  E-mail: yangqingsheng12@outlook.com
+Github ID: [yangqings](https://github.com/yangqings)  E-mail: yangqingsheng12@outlook.com

 ## 概述

@@ -50,7 +50,7 @@ Github: [yangqings](https://github.com/yangqings)  E-mail: yangqingsheng12@outlo
 有三种方式获取tflite_micro：

 1. 从TencentOS tiny 代码仓库 `components\ai\tflite_micro`目录获取；
-2. 以lib文件的形式使用tflite_micro组件，lib文件`TencentOS-tiny\components\ai\tflite_micro`的ARM_CortexM4_lib、ARM_CortexM7_lib和ARM_CortexM55_lib文件夹；
+2. 以lib文件的形式使用tflite_micro组件，lib文件`TencentOS-tiny\components\ai\tflite_micro`的ARM_CortexM4_lib、ARM_CortexM7_lib和ARM_CortexM55_lib文件夹
 3. 从Tensorflow代码仓库获取，TFlite_Micro的源码已经开源，github仓库地址为：https://github.com/tensorflow/tensorflow ，可根据google TFLite Micro官方教程获得Tensorflow Lite Micro的全部源码。

 如果没有tflite_micro开发经验，建议以**第一种**或者**第二种**方式获取tflite_micro，希望自行获取最新源码，或者编译lib文件，请参考`TencentOS-tiny\components\tflite_micro`目录的TFlite_Micro_Component_User_Guide.md文档，本指南将直接使用TencentOS tiny 代码仓库内的tflite_micro组件。
@@ -61,17 +61,16 @@ Github: [yangqings](https://github.com/yangqings)  E-mail: yangqingsheng12@outlo

 以下是整个例程的目录规划：

-| 一级目录  |           二级目录           |       三级目录        |                             说明                             |
-| :-------: | :--------------------------: | :-------------------: | :----------------------------------------------------------: |
-|   arch    |             arm              |                       | TencentOS tiny适配的IP核架构（含M核中断、调度、tick相关代码） |
-|   board   |      NUCLEO_STM32L496ZG      |                       |                    移植目标芯片的工程文件                    |
-|           |                              |          BSP          |            板级支持包，外设驱动代码在Hardware目录            |
-| component |              ai              |     tflite_micro      |                 tflite_micro源码及有关库文件                 |
-| examples  | tflitemicro_person_detection |                       |                       行人检测demo示例                       |
-|           |                              | tflu_person_detection |                       行人检测实例代码                       |
-|  kernel   |             core             |                       |                    TencentOS tiny内核源码                    |
-|           |              pm              |                       |                 TencentOS tiny低功耗模块源码                 |
-|   osal    |           cmsis_os           |                       |              TencentOS tiny提供的cmsis os 适配               |
+| 一级目录  |           二级目录           |   三级目录   |                             说明                             |
+| :-------: | :--------------------------: | :----------: | :----------------------------------------------------------: |
+|   arch    |             arm              |              | TencentOS tiny适配的IP核架构（含M核中断、调度、tick相关代码） |
+|   board   |      NUCLEO_STM32L496ZG      |              |                    移植目标芯片的工程文件                    |
+|           |                              |     BSP      |            板级支持包，外设驱动代码在Hardware目录            |
+| component |              ai              | tflite_micro |                       tflite_micro源码                       |
+| examples  | tflitemicro_person_detection |              |                       行人检测demo示例                       |
+|  kernel   |             core             |              |                    TencentOS tiny内核源码                    |
+|           |              pm              |              |                 TencentOS tiny低功耗模块源码                 |
+|   osal    |           cmsis_os           |              |              TencentOS tiny提供的cmsis os 适配               |

 完成TencentOS tiny基础keil工程准备工作后，在这个keil工程的基础上继续添加外设驱动代码。

@@ -210,9 +209,9 @@ void task1(void *arg)

 其中，retarget.c的路径为：`TencentOS-tiny\components\ai\tflite_micro\KEIL\retarget.c`

-tensorflow_lite_micro.lib的路径为：`TencentOS-stiny\components\ai\tflite_micro\ARM_CortexM4_lib\tensorflow_lite_micro.lib`
+tensorflow_lite_micro.lib的路径为：`TencentOS-tiny\components\ai\tflite_micro\ARM_CortexM4_lib\tensorflow_lite_micro.lib`

-其余.cc文件和.h均在`examples\tflu_person_detection\tflu_person_detection`文件夹中。
+其余.cc文件均在当前目录下的`tflu_person_detection`文件夹中。

 #### 1.3 关闭Keil的MicroLib库

@@ -244,7 +243,7 @@ TencentOS-tiny\components\ai\tflite_micro\ARM_CortexM4_lib\tensorflow\lite\micro

 本例程的任务函数在

-`TencentOS-tiny\examples\tflitemicro_person_detection\tflitemicro_person_detection.c`
+`TencentOS-tiny\examples\tflitemicro_person_detection\tflitemicro_person_detection.c`目录下

 #### 2.1 图像预处理

@@ -312,13 +311,13 @@ void task2(void *arg)

 #### 2.3 运行效果

-通过串行输出实时打印信息，移动摄像头，没有对准行人时，输出如下：
+通过串行输出实时打印信息，移动摄像头，镜头没有对准行人时，输出如下：

 <div align=center>
 <img src="./image/reasult_no_person.png" width=70% />
 </div>

-当摄像头对准行人时，输出如下：
+当镜头对准行人时，输出如下：

 <div align=center>
 <img src="./image/reasult_person.png" width=70% />
--- a/board/NUCLEO_STM32L496ZG/KEIL/tflitemicro_person_detection/TencentOS_tiny.uvoptx
+++ b/board/NUCLEO_STM32L496ZG/KEIL/tflitemicro_person_detection/TencentOS_tiny.uvoptx
@@ -12,7 +12,7 @@
    <lExt>*.lib</lExt>
    <tExt>*.txt; *.h; *.inc</tExt>
    <pExt>*.plm</pExt>
-    <CppX>*.cpp</CppX>
+    <CppX>*.cpp;*.cc</CppX>
    <nMigrate>0</nMigrate>
  </Extensions>

--- a/board/NUCLEO_STM32L496ZG/KEIL/tflitemicro_person_detection/TencentOS_tiny.uvprojx
+++ b/board/NUCLEO_STM32L496ZG/KEIL/tflitemicro_person_detection/TencentOS_tiny.uvprojx
@@ -16,8 +16,8 @@
        <TargetCommonOption>
          <Device>STM32L496ZGTx</Device>
          <Vendor>STMicroelectronics</Vendor>
-          <PackID>Keil.STM32L4xx_DFP.2.4.0</PackID>
-          <PackURL>http://www.keil.com/pack/</PackURL>
+          <PackID>Keil.STM32L4xx_DFP.2.5.0</PackID>
+          <PackURL>https://www.keil.com/pack/</PackURL>
          <Cpu>IRAM(0x20000000-0x2004FFFF) IROM(0x8000000-0x80FFFFF) CLOCK(8000000) FPU2 CPUTYPE("Cortex-M4")</Cpu>
          <FlashUtilSpec></FlashUtilSpec>
          <StartupFile></StartupFile>
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/LICENSE
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/LICENSE
@@ -1,203 +0,0 @@
-Copyright 2019 The TensorFlow Authors.  All rights reserved.
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/core/public/version.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/core/public/version.h
@@ -1,139 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_PUBLIC_VERSION_H_
-#define TENSORFLOW_CORE_PUBLIC_VERSION_H_
-
-// TensorFlow uses semantic versioning, see http://semver.org/.
-
-// Also update tensorflow/tensorflow.bzl and
-// tensorflow/tools/pip_package/setup.py
-#define TF_MAJOR_VERSION 2
-#define TF_MINOR_VERSION 4
-#define TF_PATCH_VERSION 0
-
-// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
-// "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX ""
-
-#define TF_STR_HELPER(x) #x
-#define TF_STR(x) TF_STR_HELPER(x)
-
-// e.g. "0.5.0" or "0.6.0-alpha".
-#define TF_VERSION_STRING                                            \
-  (TF_STR(TF_MAJOR_VERSION) "." TF_STR(TF_MINOR_VERSION) "." TF_STR( \
-      TF_PATCH_VERSION) TF_VERSION_SUFFIX)
-
-// GraphDef compatibility versions (the versions field in graph.proto).
-//
-// Each graph has producer and min_consumer versions, and each
-// consumer has its own version and a min_producer.  In addition, graphs can
-// mark specific consumer versions as bad (to prevent bugs from executing).
-// A consumer will execute a graph if the consumer's version is at least the
-// graph's min_consumer, the graph's producer version is at least the consumer's
-// min_producer, and the consumer version isn't specifically disallowed by the
-// graph.
-//
-// By default, newly created graphs have producer version TF_GRAPH_DEF_VERSION
-// min_consumer TF_GRAPH_DEF_MIN_CONSUMER, and no other bad consumer versions.
-//
-// Version history:
-//
-// 0. Graphs created before GraphDef versioning
-// 1. First real version (2dec2015)
-// 2. adjust_contrast only takes float, doesn't perform clamping (11dec2015)
-// 3. Remove TileGrad, since it was equivalent to reduce_sum (30dec2015)
-// 4. When support for this version is removed, we can safely make AttrValue
-//    parsing more strict with respect to empty list values (see
-//    111635679, 7jan2016).
-// 5. Graphs are wholly-validated during Session::Create() (7jan2016).
-// 6. TensorFlow is scalar strict within Google (27jan2016).
-// 7. Remove TopK in favor of TopKV2 (5feb2016).
-// 8. Replace RandomCrop from C++ with pure Python (5feb2016).
-// 9. Deprecate batch_norm_with_global_normalization (16feb2016).
-// 10. Deprecate conv3d_backprop_{filter,input} (10jun2016).
-// 11. Deprecate {batch}_self_adjoint_eig (3aug2016).
-// 12. Graph consumers understand the node_def field of FunctionDef (22aug2016).
-// 13. Deprecate multiple batch linear algebra ops (9sep2016).
-// 14. Deprecate batch_matrix_* ops. (10sep2016).
-// 15. Deprecate batch_fft_* ops. (14sep2016).
-// 16. Deprecate tensor_array (v1) ops in favor of v2 (10nov2016).
-// 17. Deprecate inv (11nov2016).
-// 17. Expose reverse_v2 (10nov2016)
-// 18. Add VariableV2 (30nov2016)
-// 19. Deprecated ops created by models moved out of core SkipGram, NegTrain.
-//     (08dec2016)
-// 20. Catch all version 1.0 changes to Python API generation. SplitV is now
-//     used for tf.split, ReverseV2 is now used by tf.reverse, ConcatV2 is
-//     now used by tf.concat. Graphs use flooring
-//     division and mod semantics. TensorArrayV3. (12dec2016)
-//     Also considered the version for when it is required for reduction
-//     ops' indices to be scalar or vector, and not higher rank.
-//     Some earlier graph def versions allowed this.
-// 21. Dropped FunctionDef.Node support, switched to node_def introduced
-//     in version 12. (11jan2017)
-// 22. Placeholder now can specify and enforce scalar and partial
-//     shapes, particularly when restoring a graph from GraphDef
-//     produced at version 22 or later.  (04/10/2016)
-// 23. Remove NonMaxSuppression in favor of NonMaxSuppressionV2.
-// 24. Deprecate lookup ops (v1) ops in favor of v2 (30may2017)
-// 25. Deprecate stack (v1) ops in favor of v2 (2017/6/15).
-// 25. Deprecate RandomPoisson (v1) ops in favor of v2 (2017/10/25).
-// 26. Add a bool 'stripped_default_attrs' to MetaInfoDef indicating
-//     whether default-valued attrs have been stripped from the nodes in the
-//     GraphDef. (7dec2017)
-// 27. Deprecate TensorArray ops v2 in favor of v3 and deprecated io_ops
-//     deprecated in favor of V2 ops. (2018/01/23)
-// 28. Deprecate MatrixExponential op in favor of Python implementation.
-//     (2018/08/21).
-// (2019/02/15). Added `control_ret` field to FunctionDef proto, and
-//     `control_output` field to OpDef proto.
-// 29. Deprecate StatefulStandardNormal op in favor of StatefulStandardNormalV2.
-//     (2019/03/25).
-// (2019/04/17). Added `arg_attr` field to FunctionDefProto.
-// 30. (2019/05/09) First date based GraphDef version. GraphDef
-//     versions advance by 1 each day after this point.
-
-#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
-#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 485  // Updated: 2020/8/6
-
-// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
-//
-// The checkpoint versions have the same semantics as GraphDef versions, but the
-// numbering scheme is separate.  We have no plans to ever deprecate checkpoint
-// versions, but it's good to have this in place in case we ever need to.
-//
-// Version history:
-//
-// 0. Checkpoints saved before checkpoint versioning.
-// 1. First real version (10feb2015).
-#define TF_CHECKPOINT_VERSION_MIN_PRODUCER 0
-#define TF_CHECKPOINT_VERSION_MIN_CONSUMER 0
-#define TF_CHECKPOINT_VERSION 1
-
-/// Version query functions (defined in generated version_info.cc)
-
-// Host compiler version (declared elsewhere to be __VERSION__)
-extern const char* tf_compiler_version();
-// The git commit designator when tensorflow was built
-// If no git repository, this will be "internal".
-extern const char* tf_git_version();
-// Value of the _GLIBCXX_USE_CXX11_ABI flag, or 0 if it's not set.
-extern int tf_cxx11_abi_flag();
-// Returns 1 if build is monolithic, or 0 otherwise.
-extern int tf_monolithic_build();
-
-#endif  // TENSORFLOW_CORE_PUBLIC_VERSION_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/c/builtin_op_data.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/c/builtin_op_data.h
@@ -1,472 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
-#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
-
-#include <stdint.h>
-
-#include "tensorflow/lite/c/common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
-// number of dimensions.
-#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
-
-// TODO(aselle): Consider using "if this then that" for testing.
-
-// Useful placeholder to put in otherwise empty structs to avoid size warnings.
-typedef struct {
-  char dummy;
-} EmptyStructPlaceholder;
-
-// IMPORTANT: All new members of structs must be added at the end to ensure
-// backwards compatibility.
-
-// Possible padding types (for convolutions)
-typedef enum {
-  kTfLitePaddingUnknown = 0,
-  kTfLitePaddingSame,
-  kTfLitePaddingValid,
-} TfLitePadding;
-
-typedef enum {
-  kTfLiteMirrorPaddingUnknown = 0,
-  kTfLiteMirrorPaddingReflect,
-  kTfLiteMirrorPaddingSymmetric,
-} TfLiteMirrorPaddingMode;
-
-// TODO(b/130259536): We should move this out of builtin_op_data.
-typedef struct {
-  int width;
-  int height;
-  int width_offset;
-  int height_offset;
-} TfLitePaddingValues;
-
-typedef struct {
-  TfLiteMirrorPaddingMode mode;
-} TfLiteMirrorPaddingParams;
-
-// Possible fused activation functions.
-// TODO(aselle): rename to TfLiteActivation
-typedef enum {
-  kTfLiteActNone = 0,
-  kTfLiteActRelu,
-  kTfLiteActReluN1To1,                    // min(max(-1, x), 1)
-  kTfLiteActRelu1 = kTfLiteActReluN1To1,  // kTfLiteActRelu1 will be deprecated.
-  kTfLiteActRelu6,                        // min(max(0, x), 6)
-  kTfLiteActTanh,
-  kTfLiteActSignBit,
-  kTfLiteActSigmoid,
-} TfLiteFusedActivation;
-
-typedef struct {
-  // Parameters for CONV_2D version 1.
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  TfLiteFusedActivation activation;
-
-  // Parameters for CONV_2D version 2.
-  // Note: Version 2 supports dilation values not equal to 1.
-  int dilation_width_factor;
-  int dilation_height_factor;
-} TfLiteConvParams;
-
-typedef struct {
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  int filter_width;
-  int filter_height;
-  TfLiteFusedActivation activation;
-  struct {
-    TfLitePaddingValues padding;
-  } computed;
-} TfLitePoolParams;
-
-typedef struct {
-  // Parameters for DepthwiseConv version 1 or above.
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  // `depth_multiplier` is redundant. It's used by CPU kernels in
-  // TensorFlow 2.0 or below, but ignored in versions above.
-  //
-  // The information can be deduced from the shape of input and the shape of
-  // weights. Since the TFLiteConverter toolchain doesn't support partially
-  // specified shapes, relying on `depth_multiplier` stops us from supporting
-  // graphs with dynamic shape tensors.
-  //
-  // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
-  // field.
-  int depth_multiplier;
-  TfLiteFusedActivation activation;
-  // Parameters for DepthwiseConv version 2 or above.
-  int dilation_width_factor;
-  int dilation_height_factor;
-} TfLiteDepthwiseConvParams;
-
-typedef struct {
-  int rank;
-  TfLiteFusedActivation activation;
-
-  // Parameter for SVDF version 4.
-  bool asymmetric_quantize_inputs;
-} TfLiteSVDFParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-
-  // Parameter for RNN version 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteRNNParams;
-
-typedef struct {
-  bool time_major;
-  TfLiteFusedActivation activation;
-
-  // Parameter for Sequence RNN version 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteSequenceRNNParams;
-
-typedef struct {
-  bool time_major;
-  TfLiteFusedActivation activation;
-  bool merge_outputs;
-
-  // Parameter for Bidirectional RNN verison 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteBidirectionalSequenceRNNParams;
-
-typedef enum {
-  kTfLiteFullyConnectedWeightsFormatDefault = 0,
-  kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
-} TfLiteFullyConnectedWeightsFormat;
-
-typedef struct {
-  // Parameters for FullyConnected version 1 or above.
-  TfLiteFusedActivation activation;
-
-  // Parameters for FullyConnected version 2 or above.
-  TfLiteFullyConnectedWeightsFormat weights_format;
-
-  // Parameters for FullyConnected version 5 or above.
-  // If set to true, then the number of dimensions in the input and the output
-  // tensors are the same. Furthermore, all but the last dimension of the input
-  // and output shapes will be equal.
-  bool keep_num_dims;
-
-  // Parameters for FullyConnected version 7 or above.
-  // If set to true and the weights are quantized, then non constant inputs
-  // are quantized at evaluation time with asymmetric quantization.
-  bool asymmetric_quantize_inputs;
-} TfLiteFullyConnectedParams;
-
-typedef enum {
-  kTfLiteLshProjectionUnknown = 0,
-  kTfLiteLshProjectionSparse = 1,
-  kTfLiteLshProjectionDense = 2,
-} TfLiteLSHProjectionType;
-
-typedef struct {
-  TfLiteLSHProjectionType type;
-} TfLiteLSHProjectionParams;
-
-typedef struct {
-  float beta;
-} TfLiteSoftmaxParams;
-
-typedef struct {
-  int axis;
-  TfLiteFusedActivation activation;
-} TfLiteConcatenationParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-  // Parameter added for the version 4.
-  bool pot_scale_int16;
-} TfLiteAddParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteSpaceToBatchNDParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteBatchToSpaceNDParams;
-
-typedef struct {
-  bool adj_x;
-  bool adj_y;
-} TfLiteBatchMatMulParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-} TfLiteMulParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-  // Parameter added for the version 5.
-  bool pot_scale_int16;
-} TfLiteSubParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-} TfLiteDivParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-} TfLiteL2NormParams;
-
-typedef struct {
-  int radius;
-  float bias;
-  float alpha;
-  float beta;
-} TfLiteLocalResponseNormParams;
-
-typedef enum {
-  kTfLiteLSTMFullKernel = 0,
-  kTfLiteLSTMBasicKernel
-} TfLiteLSTMKernelType;
-
-typedef struct {
-  // Parameters for LSTM version 1.
-  TfLiteFusedActivation activation;
-  float cell_clip;
-  float proj_clip;
-
-  // Parameters for LSTM version 2.
-  // kTfLiteLSTMBasicKernel is only supported in version 2 or above.
-  TfLiteLSTMKernelType kernel_type;
-
-  // Parameters for LSTM version 4.
-  bool asymmetric_quantize_inputs;
-} TfLiteLSTMParams;
-
-typedef struct {
-  // Parameters needed for the underlying LSTM.
-  TfLiteFusedActivation activation;
-  float cell_clip;
-  float proj_clip;
-
-  // If set to true then the first dimension is time, otherwise batch.
-  bool time_major;
-
-  // Parameter for unidirectional sequence RNN version 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteUnidirectionalSequenceLSTMParams;
-
-typedef struct {
-  // Parameters supported by version 1:
-  // Parameters inherited for the LSTM kernel.
-  TfLiteFusedActivation activation;
-  float cell_clip;
-  float proj_clip;
-
-  // If true, store the outputs of both directions in the first output.
-  bool merge_outputs;
-
-  // Parameters supported by version 2:
-  // If set to true then the first dimension is time, otherwise batch.
-  bool time_major;
-
-  // Parameters supported by version 4:
-  // If set to true, then hybrid ops use asymmetric quantization for inputs.
-  bool asymmetric_quantize_inputs;
-} TfLiteBidirectionalSequenceLSTMParams;
-
-typedef struct {
-  bool align_corners;
-  // half_pixel_centers assumes pixels are of half the actual dimensions, and
-  // yields more accurate resizes. Corresponds to the same argument for the
-  // original TensorFlow op in TF2.0.
-  bool half_pixel_centers;
-} TfLiteResizeBilinearParams;
-
-typedef struct {
-  bool align_corners;
-  bool half_pixel_centers;
-} TfLiteResizeNearestNeighborParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLitePadParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLitePadV2Params;
-
-typedef struct {
-  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
-  // For now we will fix the maximum possible number of dimensions.
-  int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
-  int num_dimensions;
-} TfLiteReshapeParams;
-
-typedef struct {
-  int ngram_size;
-  int max_skip_size;
-  bool include_all_ngrams;
-} TfLiteSkipGramParams;
-
-typedef struct {
-  int block_size;
-} TfLiteSpaceToDepthParams;
-
-typedef struct {
-  int block_size;
-} TfLiteDepthToSpaceParams;
-
-typedef struct {
-  TfLiteType in_data_type;
-  TfLiteType out_data_type;
-} TfLiteCastParams;
-
-typedef enum {
-  kTfLiteCombinerTypeSum = 0,
-  kTfLiteCombinerTypeMean = 1,
-  kTfLiteCombinerTypeSqrtn = 2,
-} TfLiteCombinerType;
-
-typedef struct {
-  TfLiteCombinerType combiner;
-} TfLiteEmbeddingLookupSparseParams;
-
-typedef struct {
-  int axis;
-} TfLiteGatherParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteTransposeParams;
-
-typedef struct {
-  bool keep_dims;
-} TfLiteReducerParams;
-
-typedef struct {
-  int num_splits;
-} TfLiteSplitParams;
-
-typedef struct {
-  int num_splits;
-} TfLiteSplitVParams;
-
-typedef struct {
-  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
-  // For now we will fix the maximum possible number of dimensions.
-  int squeeze_dims[8];
-  int num_squeeze_dims;
-} TfLiteSqueezeParams;
-
-typedef struct {
-  int begin_mask;
-  int end_mask;
-  int ellipsis_mask;
-  int new_axis_mask;
-  int shrink_axis_mask;
-} TfLiteStridedSliceParams;
-
-typedef struct {
-  TfLiteType output_type;
-} TfLiteArgMaxParams;
-
-typedef struct {
-  TfLiteType output_type;
-} TfLiteArgMinParams;
-
-typedef struct {
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-} TfLiteTransposeConvParams;
-
-typedef struct {
-  bool validate_indices;
-} TfLiteSparseToDenseParams;
-
-typedef struct {
-  TfLiteType out_type;
-} TfLiteShapeParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteRankParams;
-
-typedef struct {
-  // Parameters supported by version 1:
-  float min;
-  float max;
-  int num_bits;
-
-  // Parameters supported by version 2:
-  bool narrow_range;
-} TfLiteFakeQuantParams;
-
-typedef struct {
-  int values_count;
-  int axis;
-} TfLitePackParams;
-
-typedef struct {
-  int axis;
-} TfLiteOneHotParams;
-
-typedef struct {
-  int num;
-  int axis;
-} TfLiteUnpackParams;
-
-typedef struct {
-  float alpha;
-} TfLiteLeakyReluParams;
-
-typedef struct {
-  TfLiteType index_out_type;
-} TfLiteUniqueParams;
-
-typedef struct {
-  int seq_dim;
-  int batch_dim;
-} TfLiteReverseSequenceParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteMatrixDiagParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteMatrixSetDiagParams;
-
-typedef struct {
-  int then_subgraph_index;
-  int else_subgraph_index;
-} TfLiteIfParams;
-
-typedef struct {
-  int cond_subgraph_index;
-  int body_subgraph_index;
-} TfLiteWhileParams;
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
-
-#endif  // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/c/common.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/c/common.h
@@ -1,936 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// This file defines common C types and APIs for implementing operations,
-// delegates and other constructs in TensorFlow Lite. The actual operations and
-// delegates can be defined using C++, but the interface between the interpreter
-// and the operations are C.
-//
-// Summary of abstractions
-// TF_LITE_ENSURE - Self-sufficient error checking
-// TfLiteStatus - Status reporting
-// TfLiteIntArray - stores tensor shapes (dims),
-// TfLiteContext - allows an op to access the tensors
-// TfLiteTensor - tensor (a multidimensional array)
-// TfLiteNode - a single node or operation
-// TfLiteRegistration - the implementation of a conceptual operation.
-// TfLiteDelegate - allows delegation of nodes to alternative backends.
-//
-// Some abstractions in this file are created and managed by Interpreter.
-//
-// NOTE: The order of values in these structs are "semi-ABI stable". New values
-// should be added only to the end of structs and never reordered.
-
-#ifndef TENSORFLOW_LITE_C_COMMON_H_
-#define TENSORFLOW_LITE_C_COMMON_H_
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-typedef enum TfLiteStatus {
-  kTfLiteOk = 0,
-  kTfLiteError = 1,
-  kTfLiteDelegateError = 2
-} TfLiteStatus;
-
-// The list of external context types known to TF Lite. This list exists solely
-// to avoid conflicts and to ensure ops can share the external contexts they
-// need. Access to the external contexts is controlled by one of the
-// corresponding support files.
-typedef enum TfLiteExternalContextType {
-  kTfLiteEigenContext = 0,       // include eigen_support.h to use.
-  kTfLiteGemmLowpContext = 1,    // include gemm_support.h to use.
-  kTfLiteEdgeTpuContext = 2,     // Placeholder for Edge TPU support.
-  kTfLiteCpuBackendContext = 3,  // include cpu_backend_context.h to use.
-  kTfLiteMaxExternalContexts = 4
-} TfLiteExternalContextType;
-
-// Forward declare so dependent structs and methods can reference these types
-// prior to the struct definitions.
-struct TfLiteContext;
-struct TfLiteDelegate;
-struct TfLiteRegistration;
-
-// An external context is a collection of information unrelated to the TF Lite
-// framework, but useful to a subset of the ops. TF Lite knows very little
-// about about the actual contexts, but it keeps a list of them, and is able to
-// refresh them if configurations like the number of recommended threads
-// change.
-typedef struct TfLiteExternalContext {
-  TfLiteExternalContextType type;
-  TfLiteStatus (*Refresh)(struct TfLiteContext* context);
-} TfLiteExternalContext;
-
-#define kTfLiteOptionalTensor (-1)
-
-// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
-// indices
-typedef struct TfLiteIntArray {
-  int size;
-// gcc 6.1+ have a bug where flexible members aren't properly handled
-// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
-#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
-     __GNUC_MINOR__ >= 1) ||                                      \
-    defined(HEXAGON)
-  int data[0];
-#else
-  int data[];
-#endif
-} TfLiteIntArray;
-
-// Given the size (number of elements) in a TfLiteIntArray, calculate its size
-// in bytes.
-int TfLiteIntArrayGetSizeInBytes(int size);
-
-#ifndef TF_LITE_STATIC_MEMORY
-// Create a array of a given `size` (uninitialized entries).
-// This returns a pointer, that you must free using TfLiteIntArrayFree().
-TfLiteIntArray* TfLiteIntArrayCreate(int size);
-#endif
-
-// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise.
-int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b);
-
-// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise.
-int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
-                              const int b_data[]);
-
-#ifndef TF_LITE_STATIC_MEMORY
-// Create a copy of an array passed as `src`.
-// You are expected to free memory with TfLiteIntArrayFree
-TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src);
-
-// Free memory of array `a`.
-void TfLiteIntArrayFree(TfLiteIntArray* a);
-#endif  // TF_LITE_STATIC_MEMORY
-
-// Fixed size list of floats. Used for per-channel quantization.
-typedef struct TfLiteFloatArray {
-  int size;
-// gcc 6.1+ have a bug where flexible members aren't properly handled
-// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
-// This also applies to the toolchain used for Qualcomm Hexagon DSPs.
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
-    __GNUC_MINOR__ >= 1
-  float data[0];
-#else
-  float data[];
-#endif
-} TfLiteFloatArray;
-
-// Given the size (number of elements) in a TfLiteFloatArray, calculate its size
-// in bytes.
-int TfLiteFloatArrayGetSizeInBytes(int size);
-
-#ifndef TF_LITE_STATIC_MEMORY
-// Create a array of a given `size` (uninitialized entries).
-// This returns a pointer, that you must free using TfLiteFloatArrayFree().
-TfLiteFloatArray* TfLiteFloatArrayCreate(int size);
-
-// Free memory of array `a`.
-void TfLiteFloatArrayFree(TfLiteFloatArray* a);
-#endif  // TF_LITE_STATIC_MEMORY
-
-// Since we must not depend on any libraries, define a minimal subset of
-// error macros while avoiding names that have pre-conceived meanings like
-// assert and check.
-
-// Try to make all reporting calls through TF_LITE_KERNEL_LOG rather than
-// calling the context->ReportError function directly, so that message strings
-// can be stripped out if the binary size needs to be severely optimized.
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-#define TF_LITE_KERNEL_LOG(context, ...)            \
-  do {                                              \
-    (context)->ReportError((context), __VA_ARGS__); \
-  } while (false)
-
-#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)        \
-  do {                                                \
-    if ((context) != nullptr) {                       \
-      (context)->ReportError((context), __VA_ARGS__); \
-    }                                                 \
-  } while (false)
-#else  // TF_LITE_STRIP_ERROR_STRINGS
-#define TF_LITE_KERNEL_LOG(context, ...)
-#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
-#endif  // TF_LITE_STRIP_ERROR_STRINGS
-
-// Check whether value is true, and if not return kTfLiteError from
-// the current function (and report the error string msg).
-#define TF_LITE_ENSURE_MSG(context, value, msg)        \
-  do {                                                 \
-    if (!(value)) {                                    \
-      TF_LITE_KERNEL_LOG((context), __FILE__ " " msg); \
-      return kTfLiteError;                             \
-    }                                                  \
-  } while (0)
-
-// Check whether the value `a` is true, and if not return kTfLiteError from
-// the current function, while also reporting the location of the error.
-#define TF_LITE_ENSURE(context, a)                                      \
-  do {                                                                  \
-    if (!(a)) {                                                         \
-      TF_LITE_KERNEL_LOG((context), "%s:%d %s was not true.", __FILE__, \
-                         __LINE__, #a);                                 \
-      return kTfLiteError;                                              \
-    }                                                                   \
-  } while (0)
-
-#define TF_LITE_ENSURE_STATUS(a) \
-  do {                           \
-    const TfLiteStatus s = (a);  \
-    if (s != kTfLiteOk) {        \
-      return s;                  \
-    }                            \
-  } while (0)
-
-// Check whether the value `a == b` is true, and if not return kTfLiteError from
-// the current function, while also reporting the location of the error.
-// `a` and `b` may be evaluated more than once, so no side effects or
-// extremely expensive computations should be done.
-// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
-#define TF_LITE_ENSURE_EQ(context, a, b)                                   \
-  do {                                                                     \
-    if ((a) != (b)) {                                                      \
-      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%d != %d)", __FILE__, \
-                         __LINE__, #a, #b, (a), (b));                      \
-      return kTfLiteError;                                                 \
-    }                                                                      \
-  } while (0)
-
-#define TF_LITE_ENSURE_TYPES_EQ(context, a, b)                             \
-  do {                                                                     \
-    if ((a) != (b)) {                                                      \
-      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%s != %s)", __FILE__, \
-                         __LINE__, #a, #b, TfLiteTypeGetName(a),           \
-                         TfLiteTypeGetName(b));                            \
-      return kTfLiteError;                                                 \
-    }                                                                      \
-  } while (0)
-
-#define TF_LITE_ENSURE_OK(context, status) \
-  do {                                     \
-    const TfLiteStatus s = (status);       \
-    if ((s) != kTfLiteOk) {                \
-      return s;                            \
-    }                                      \
-  } while (0)
-
-// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
-// library.
-#ifdef SWIG
-#define TFL_CAPI_EXPORT
-#else
-#if defined(_WIN32)
-#ifdef TFL_COMPILE_LIBRARY
-#define TFL_CAPI_EXPORT __declspec(dllexport)
-#else
-#define TFL_CAPI_EXPORT __declspec(dllimport)
-#endif  // TFL_COMPILE_LIBRARY
-#else
-#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
-#endif  // _WIN32
-#endif  // SWIG
-
-// Single-precision complex data type compatible with the C99 definition.
-typedef struct TfLiteComplex64 {
-  float re, im;  // real and imaginary parts, respectively.
-} TfLiteComplex64;
-
-// Double-precision complex data type compatible with the C99 definition.
-typedef struct TfLiteComplex128 {
-  double re, im;  // real and imaginary parts, respectively.
-} TfLiteComplex128;
-
-// Half precision data type compatible with the C99 definition.
-typedef struct TfLiteFloat16 {
-  uint16_t data;
-} TfLiteFloat16;
-
-// Types supported by tensor
-typedef enum {
-  kTfLiteNoType = 0,
-  kTfLiteFloat32 = 1,
-  kTfLiteInt32 = 2,
-  kTfLiteUInt8 = 3,
-  kTfLiteInt64 = 4,
-  kTfLiteString = 5,
-  kTfLiteBool = 6,
-  kTfLiteInt16 = 7,
-  kTfLiteComplex64 = 8,
-  kTfLiteInt8 = 9,
-  kTfLiteFloat16 = 10,
-  kTfLiteFloat64 = 11,
-  kTfLiteComplex128 = 12,
-} TfLiteType;
-
-// Return the name of a given type, for error reporting purposes.
-const char* TfLiteTypeGetName(TfLiteType type);
-
-// SupportedQuantizationTypes.
-typedef enum TfLiteQuantizationType {
-  // No quantization.
-  kTfLiteNoQuantization = 0,
-  // Affine quantization (with support for per-channel quantization).
-  // Corresponds to TfLiteAffineQuantization.
-  kTfLiteAffineQuantization = 1,
-} TfLiteQuantizationType;
-
-// Structure specifying the quantization used by the tensor, if-any.
-typedef struct TfLiteQuantization {
-  // The type of quantization held by params.
-  TfLiteQuantizationType type;
-  // Holds a reference to one of the quantization param structures specified
-  // below.
-  void* params;
-} TfLiteQuantization;
-
-// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
-// If per-layer quantization is specified this field will still be populated in
-// addition to TfLiteAffineQuantization.
-// Parameters for asymmetric quantization. Quantized values can be converted
-// back to float using:
-//     real_value = scale * (quantized_value - zero_point)
-typedef struct TfLiteQuantizationParams {
-  float scale;
-  int32_t zero_point;
-} TfLiteQuantizationParams;
-
-// Parameters for asymmetric quantization across a dimension (i.e per output
-// channel quantization).
-// quantized_dimension specifies which dimension the scales and zero_points
-// correspond to.
-// For a particular value in quantized_dimension, quantized values can be
-// converted back to float using:
-//     real_value = scale * (quantized_value - zero_point)
-typedef struct TfLiteAffineQuantization {
-  TfLiteFloatArray* scale;
-  TfLiteIntArray* zero_point;
-  int32_t quantized_dimension;
-} TfLiteAffineQuantization;
-
-/* A union of pointers that points to memory for a given tensor. */
-typedef union TfLitePtrUnion {
-  /* Do not access these members directly, if possible, use
-   * GetTensorData<TYPE>(tensor) instead, otherwise only access .data, as other
-   * members are deprecated. */
-  int32_t* i32;
-  int64_t* i64;
-  float* f;
-  TfLiteFloat16* f16;
-  double* f64;
-  char* raw;
-  const char* raw_const;
-  uint8_t* uint8;
-  bool* b;
-  int16_t* i16;
-  TfLiteComplex64* c64;
-  TfLiteComplex128* c128;
-  int8_t* int8;
-  /* Only use this member. */
-  void* data;
-} TfLitePtrUnion;
-
-// Memory allocation strategies.
-//  * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
-//  * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
-//        and available during eval.
-//  * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
-//        only available during eval.
-//  * kTfLiteDynamic: Allocated during eval, or for string tensors.
-//  * kTfLitePersistentRo: Allocated and populated during prepare. This is
-//        useful for tensors that can be computed during prepare and treated
-//        as constant inputs for downstream ops (also in prepare).
-typedef enum TfLiteAllocationType {
-  kTfLiteMemNone = 0,
-  kTfLiteMmapRo,
-  kTfLiteArenaRw,
-  kTfLiteArenaRwPersistent,
-  kTfLiteDynamic,
-  kTfLitePersistentRo,
-} TfLiteAllocationType;
-
-// The delegates should use zero or positive integers to represent handles.
-// -1 is reserved from unallocated status.
-typedef int TfLiteBufferHandle;
-enum {
-  kTfLiteNullBufferHandle = -1,
-};
-
-// Storage format of each dimension in a sparse tensor.
-typedef enum TfLiteDimensionType {
-  kTfLiteDimDense = 0,
-  kTfLiteDimSparseCSR,
-} TfLiteDimensionType;
-
-// Metadata to encode each dimension in a sparse tensor.
-typedef struct TfLiteDimensionMetadata {
-  TfLiteDimensionType format;
-  int dense_size;
-  TfLiteIntArray* array_segments;
-  TfLiteIntArray* array_indices;
-} TfLiteDimensionMetadata;
-
-// Parameters used to encode a sparse tensor. For detailed explanation of each
-// field please refer to lite/schema/schema.fbs.
-typedef struct TfLiteSparsity {
-  TfLiteIntArray* traversal_order;
-  TfLiteIntArray* block_map;
-  TfLiteDimensionMetadata* dim_metadata;
-  int dim_metadata_size;
-} TfLiteSparsity;
-
-// An tensor in the interpreter system which is a wrapper around a buffer of
-// data including a dimensionality (or NULL if not currently defined).
-#ifndef TF_LITE_STATIC_MEMORY
-typedef struct TfLiteTensor {
-  // The data type specification for data stored in `data`. This affects
-  // what member of `data` union should be used.
-  TfLiteType type;
-  // A union of data pointers. The appropriate type should be used for a typed
-  // tensor based on `type`.
-  TfLitePtrUnion data;
-  // A pointer to a structure representing the dimensionality interpretation
-  // that the buffer should have. NOTE: the product of elements of `dims`
-  // and the element datatype size should be equal to `bytes` below.
-  TfLiteIntArray* dims;
-  // Quantization information.
-  TfLiteQuantizationParams params;
-  // How memory is mapped
-  //  kTfLiteMmapRo: Memory mapped read only.
-  //  i.e. weights
-  //  kTfLiteArenaRw: Arena allocated read write memory
-  //  (i.e. temporaries, outputs).
-  TfLiteAllocationType allocation_type;
-  // The number of bytes required to store the data of this Tensor. I.e.
-  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
-  // type is kTfLiteFloat32 and dims = {3, 2} then
-  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
-  size_t bytes;
-
-  // An opaque pointer to a tflite::MMapAllocation
-  const void* allocation;
-
-  // Null-terminated name of this tensor.
-  const char* name;
-
-  // The delegate which knows how to handle `buffer_handle`.
-  // WARNING: This is an experimental interface that is subject to change.
-  struct TfLiteDelegate* delegate;
-
-  // An integer buffer handle that can be handled by `delegate`.
-  // The value is valid only when delegate is not null.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteBufferHandle buffer_handle;
-
-  // If the delegate uses its own buffer (e.g. GPU memory), the delegate is
-  // responsible to set data_is_stale to true.
-  // `delegate->CopyFromBufferHandle` can be called to copy the data from
-  // delegate buffer.
-  // WARNING: This is an // experimental interface that is subject to change.
-  bool data_is_stale;
-
-  // True if the tensor is a variable.
-  bool is_variable;
-
-  // Quantization information. Replaces params field above.
-  TfLiteQuantization quantization;
-
-  // Parameters used to encode a sparse tensor.
-  // This is optional. The field is NULL if a tensor is dense.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteSparsity* sparsity;
-
-  // Optional. Encodes shapes with unknown dimensions with -1. This field is
-  // only populated when unknown dimensions exist in a read-write tensor (i.e.
-  // an input or output tensor). (e.g.  `dims` contains [1, 1, 1, 3] and
-  // `dims_signature` contains [1, -1, -1, 3]).
-  const TfLiteIntArray* dims_signature;
-} TfLiteTensor;
-
-// A structure representing an instance of a node.
-// This structure only exhibits the inputs, outputs and user defined data, not
-// other features like the type.
-typedef struct TfLiteNode {
-  // Inputs to this node expressed as indices into the simulator's tensors.
-  TfLiteIntArray* inputs;
-
-  // Outputs to this node expressed as indices into the simulator's tensors.
-  TfLiteIntArray* outputs;
-
-  // intermediate tensors to this node expressed as indices into the simulator's
-  // tensors.
-  TfLiteIntArray* intermediates;
-
-  // Temporary tensors uses during the computations. This usually contains no
-  // tensors, but ops are allowed to change that if they need scratch space of
-  // any sort.
-  TfLiteIntArray* temporaries;
-
-  // Opaque data provided by the node implementer through `Registration.init`.
-  void* user_data;
-
-  // Opaque data provided to the node if the node is a builtin. This is usually
-  // a structure defined in builtin_op_data.h
-  void* builtin_data;
-
-  // Custom initial data. This is the opaque data provided in the flatbuffer.
-  // WARNING: This is an experimental interface that is subject to change.
-  const void* custom_initial_data;
-  int custom_initial_data_size;
-
-  // The pointer to the delegate. This is non-null only when the node is
-  // created by calling `interpreter.ModifyGraphWithDelegate`.
-  // WARNING: This is an experimental interface that is subject to change.
-  struct TfLiteDelegate* delegate;
-} TfLiteNode;
-#else  // defined(TF_LITE_STATIC_MEMORY)?
-// NOTE: This flag is opt-in only at compile time.
-//
-// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
-// contains only the minimum fields required to initialize and prepare a micro
-// inference graph. The fields in this struct have been ordered from
-// largest-to-smallest for optimal struct sizeof.
-//
-// This struct does not use:
-// - allocation
-// - buffer_handle
-// - data_is_stale
-// - delegate
-// - dims_signature
-// - name
-// - sparsity
-typedef struct TfLiteTensor {
-  // TODO(b/155784997): Consider consolidating these quantization fields:
-  // Quantization information. Replaces params field above.
-  TfLiteQuantization quantization;
-
-  // Quantization information.
-  TfLiteQuantizationParams params;
-
-  // A union of data pointers. The appropriate type should be used for a typed
-  // tensor based on `type`.
-  TfLitePtrUnion data;
-
-  // A pointer to a structure representing the dimensionality interpretation
-  // that the buffer should have. NOTE: the product of elements of `dims`
-  // and the element datatype size should be equal to `bytes` below.
-  TfLiteIntArray* dims;
-
-  // The number of bytes required to store the data of this Tensor. I.e.
-  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
-  // type is kTfLiteFloat32 and dims = {3, 2} then
-  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
-  size_t bytes;
-
-  // The data type specification for data stored in `data`. This affects
-  // what member of `data` union should be used.
-  TfLiteType type;
-
-  // How memory is mapped
-  //  kTfLiteMmapRo: Memory mapped read only.
-  //  i.e. weights
-  //  kTfLiteArenaRw: Arena allocated read write memory
-  //  (i.e. temporaries, outputs).
-  TfLiteAllocationType allocation_type;
-
-  // True if the tensor is a variable.
-  bool is_variable;
-} TfLiteTensor;
-
-// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains
-// only the minimum fields required to represent a node.
-//
-// This struct does not use:
-// - delegate
-// - intermediates
-// - temporaries
-typedef struct TfLiteNode {
-  // Inputs to this node expressed as indices into the simulator's tensors.
-  TfLiteIntArray* inputs;
-
-  // Outputs to this node expressed as indices into the simulator's tensors.
-  TfLiteIntArray* outputs;
-
-  // Opaque data provided by the node implementer through `Registration.init`.
-  void* user_data;
-
-  // Opaque data provided to the node if the node is a builtin. This is usually
-  // a structure defined in builtin_op_data.h
-  void* builtin_data;
-
-  // Custom initial data. This is the opaque data provided in the flatbuffer.
-  // WARNING: This is an experimental interface that is subject to change.
-  const void* custom_initial_data;
-  int custom_initial_data_size;
-} TfLiteNode;
-#endif  // TF_LITE_STATIC_MEMORY
-
-// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount
-// of information required for a kernel to run during TfLiteRegistration::Eval.
-// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM
-// builds with this flag by default internally.
-typedef struct TfLiteEvalTensor {
-  // A union of data pointers. The appropriate type should be used for a typed
-  // tensor based on `type`.
-  TfLitePtrUnion data;
-
-  // A pointer to a structure representing the dimensionality interpretation
-  // that the buffer should have.
-  TfLiteIntArray* dims;
-
-  // The data type specification for data stored in `data`. This affects
-  // what member of `data` union should be used.
-  TfLiteType type;
-} TfLiteEvalTensor;
-
-#ifndef TF_LITE_STATIC_MEMORY
-// Free data memory of tensor `t`.
-void TfLiteTensorDataFree(TfLiteTensor* t);
-
-// Free quantization data.
-void TfLiteQuantizationFree(TfLiteQuantization* quantization);
-
-// Free sparsity parameters.
-void TfLiteSparsityFree(TfLiteSparsity* sparsity);
-
-// Free memory of tensor `t`.
-void TfLiteTensorFree(TfLiteTensor* t);
-
-// Set all of a tensor's fields (and free any previously allocated data).
-void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
-                       TfLiteQuantizationParams quantization, char* buffer,
-                       size_t size, TfLiteAllocationType allocation_type,
-                       const void* allocation, bool is_variable,
-                       TfLiteTensor* tensor);
-
-// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
-// types other than kTfLiteDynamic will be ignored.
-void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
-#endif  // TF_LITE_STATIC_MEMORY
-
-// WARNING: This is an experimental interface that is subject to change.
-//
-// Currently, TfLiteDelegateParams has to be allocated in a way that it's
-// trivially destructable. It will be stored as `builtin_data` field in
-// `TfLiteNode` of the delegate node.
-//
-// See also the `CreateDelegateParams` function in `interpreter.cc` details.
-typedef struct TfLiteDelegateParams {
-  struct TfLiteDelegate* delegate;
-  TfLiteIntArray* nodes_to_replace;
-  TfLiteIntArray* input_tensors;
-  TfLiteIntArray* output_tensors;
-} TfLiteDelegateParams;
-
-typedef struct TfLiteContext {
-  // Number of tensors in the context.
-  size_t tensors_size;
-
-  // The execution plan contains a list of the node indices in execution
-  // order. execution_plan->size is the current number of nodes. And,
-  // execution_plan->data[0] is the first node that needs to be run.
-  // TfLiteDelegates can traverse the current execution plan by iterating
-  // through each member of this array and using GetNodeAndRegistration() to
-  // access details about a node. i.e.
-  // TfLiteIntArray* execution_plan;
-  // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
-  // for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
-  //    int node_index = execution_plan->data[exec_index];
-  //    TfLiteNode* node;
-  //    TfLiteRegistration* reg;
-  //    context->GetNodeAndRegistration(context, node_index, &node, &reg);
-  // }
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
-                                   TfLiteIntArray** execution_plan);
-
-  // An array of tensors in the interpreter context (of length `tensors_size`)
-  TfLiteTensor* tensors;
-
-  // opaque full context ptr (an opaque c++ data structure)
-  void* impl_;
-
-  // Request memory pointer be resized. Updates dimensions on the tensor.
-  // NOTE: ResizeTensor takes ownership of newSize.
-  TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor* tensor,
-                               TfLiteIntArray* new_size);
-  // Request that an error be reported with format string msg.
-  void (*ReportError)(struct TfLiteContext*, const char* msg, ...);
-
-  // Add `tensors_to_add` tensors, preserving pre-existing Tensor entries.  If
-  // non-null, the value pointed to by `first_new_tensor_index` will be set to
-  // the index of the first new tensor.
-  TfLiteStatus (*AddTensors)(struct TfLiteContext*, int tensors_to_add,
-                             int* first_new_tensor_index);
-
-  // Get a Tensor node by node_index.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*GetNodeAndRegistration)(
-      struct TfLiteContext*, int node_index, TfLiteNode** node,
-      struct TfLiteRegistration** registration);
-
-  // Replace ops with one or more stub delegate operations. This function
-  // does not take ownership of `nodes_to_replace`.
-  TfLiteStatus (*ReplaceNodeSubsetsWithDelegateKernels)(
-      struct TfLiteContext*, struct TfLiteRegistration registration,
-      const TfLiteIntArray* nodes_to_replace, struct TfLiteDelegate* delegate);
-
-  // Number of threads that are recommended to subsystems like gemmlowp and
-  // eigen.
-  int recommended_num_threads;
-
-  // Access external contexts by type.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteExternalContext* (*GetExternalContext)(struct TfLiteContext*,
-                                               TfLiteExternalContextType);
-  // Set the value of a external context. Does not take ownership of the
-  // pointer.
-  // WARNING: This is an experimental interface that is subject to change.
-  void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType,
-                             TfLiteExternalContext*);
-
-  // Flag for allowing float16 precision for FP32 calculation.
-  // default: false.
-  // WARNING: This is an experimental API and subject to change.
-  bool allow_fp32_relax_to_fp16;
-
-  // Pointer to the op-level profiler, if set; nullptr otherwise.
-  void* profiler;
-
-  // Allocate persistent buffer which has the same life time as the interpreter.
-  // Returns nullptr on failure.
-  // The memory is allocated from heap for TFL, and from tail in TFLM.
-  // This method is only available in Init or Prepare stage.
-  // WARNING: This is an experimental interface that is subject to change.
-  void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes);
-
-  // Allocate a buffer which will be deallocated right after invoke phase.
-  // The memory is allocated from heap in TFL, and from volatile arena in TFLM.
-  // This method is only available in invoke stage.
-  // NOTE: If possible use RequestScratchBufferInArena method to avoid memory
-  // allocation during inference time.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*AllocateBufferForEval)(struct TfLiteContext* ctx, size_t bytes,
-                                        void** ptr);
-
-  // Request a scratch buffer in the arena through static memory planning.
-  // This method is only available in Prepare stage and the buffer is allocated
-  // by the interpreter between Prepare and Eval stage. In Eval stage,
-  // GetScratchBuffer API can be used to fetch the address.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*RequestScratchBufferInArena)(struct TfLiteContext* ctx,
-                                              size_t bytes, int* buffer_idx);
-
-  // Get the scratch buffer pointer.
-  // This method is only available in Eval stage.
-  // WARNING: This is an experimental interface that is subject to change.
-  void* (*GetScratchBuffer)(struct TfLiteContext* ctx, int buffer_idx);
-
-  // Resize the memory pointer of the `tensor`. This method behaves the same as
-  // `ResizeTensor`, except that it makes a copy of the shape array internally
-  // so the shape array could be deallocated right afterwards.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*ResizeTensorExplicit)(struct TfLiteContext* ctx,
-                                       TfLiteTensor* tensor, int dims,
-                                       const int* shape);
-
-  // This method provides a preview of post-delegation partitioning. Each
-  // TfLiteDelegateParams in the referenced array corresponds to one instance of
-  // the delegate kernel.
-  // Example usage:
-  //
-  // TfLiteIntArray* nodes_to_replace = ...;
-  // TfLiteDelegateParams* params_array;
-  // int num_partitions = 0;
-  // TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
-  //    context, delegate, nodes_to_replace, &params_array, &num_partitions));
-  // for (int idx = 0; idx < num_partitions; idx++) {
-  //    const auto& partition_params = params_array[idx];
-  //    ...
-  // }
-  //
-  // NOTE: The context owns the memory referenced by partition_params_array. It
-  // will be cleared with another call to PreviewDelegateParitioning, or after
-  // TfLiteDelegateParams::Prepare returns.
-  //
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*PreviewDelegatePartitioning)(
-      struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
-      TfLiteDelegateParams** partition_params_array, int* num_partitions);
-
-  // Returns a TfLiteTensor struct for a given index.
-  // WARNING: This is an experimental interface that is subject to change.
-  // WARNING: This method may not be available on all platforms.
-  TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context,
-                             int tensor_idx);
-
-  // Returns a TfLiteEvalTensor struct for a given index.
-  // WARNING: This is an experimental interface that is subject to change.
-  // WARNING: This method may not be available on all platforms.
-  TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
-                                     int tensor_idx);
-} TfLiteContext;
-
-typedef struct TfLiteRegistration {
-  // Initializes the op from serialized data.
-  // If a built-in op:
-  //   `buffer` is the op's params data (TfLiteLSTMParams*).
-  //   `length` is zero.
-  // If custom op:
-  //   `buffer` is the op's `custom_options`.
-  //   `length` is the size of the buffer.
-  //
-  // Returns a type-punned (i.e. void*) opaque data (e.g. a primitive pointer
-  // or an instance of a struct).
-  //
-  // The returned pointer will be stored with the node in the `user_data` field,
-  // accessible within prepare and invoke functions below.
-  // NOTE: if the data is already in the desired format, simply implement this
-  // function to return `nullptr` and implement the free function to be a no-op.
-  void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
-
-  // The pointer `buffer` is the data previously returned by an init invocation.
-  void (*free)(TfLiteContext* context, void* buffer);
-
-  // prepare is called when the inputs this node depends on have been resized.
-  // context->ResizeTensor() can be called to request output tensors to be
-  // resized.
-  //
-  // Returns kTfLiteOk on success.
-  TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
-
-  // Execute the node (should read node->inputs and output to node->outputs).
-  // Returns kTfLiteOk on success.
-  TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
-
-  // profiling_string is called during summarization of profiling information
-  // in order to group executions together. Providing a value here will cause a
-  // given op to appear multiple times is the profiling report. This is
-  // particularly useful for custom ops that can perform significantly
-  // different calculations depending on their `user-data`.
-  const char* (*profiling_string)(const TfLiteContext* context,
-                                  const TfLiteNode* node);
-
-  // Builtin codes. If this kernel refers to a builtin this is the code
-  // of the builtin. This is so we can do marshaling to other frameworks like
-  // NN API.
-  // Note: It is the responsibility of the registration binder to set this
-  // properly.
-  int32_t builtin_code;
-
-  // Custom op name. If the op is a builtin, this will be null.
-  // Note: It is the responsibility of the registration binder to set this
-  // properly.
-  // WARNING: This is an experimental interface that is subject to change.
-  const char* custom_name;
-
-  // The version of the op.
-  // Note: It is the responsibility of the registration binder to set this
-  // properly.
-  int version;
-} TfLiteRegistration;
-
-// The flags used in `TfLiteDelegate`. Note that this is a bitmask, so the
-// values should be 1, 2, 4, 8, ...etc.
-typedef enum TfLiteDelegateFlags {
-  kTfLiteDelegateFlagsNone = 0,
-  // The flag is set if the delegate can handle dynamic sized tensors.
-  // For example, the output shape of a `Resize` op with non-constant shape
-  // can only be inferred when the op is invoked.
-  // In this case, the Delegate is responsible for calling
-  // `SetTensorToDynamic` to mark the tensor as a dynamic tensor, and calling
-  // `ResizeTensor` when invoking the op.
-  //
-  // If the delegate isn't capable to handle dynamic tensors, this flag need
-  // to be set to false.
-  kTfLiteDelegateFlagsAllowDynamicTensors = 1,
-
-  // This flag can be used by delegates (that allow dynamic tensors) to ensure
-  // applicable tensor shapes are automatically propagated in the case of tensor
-  // resizing.
-  // This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors
-  // of a delegate kernel will have correct shapes before its Prepare() method
-  // is called. The runtime leverages TFLite builtin ops in the original
-  // execution plan to propagate shapes.
-  //
-  // A few points to note:
-  // 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is
-  // false, this one is redundant since the delegate kernels are re-initialized
-  // every time tensors are resized.
-  // 2. Enabling this flag adds some overhead to AllocateTensors(), since extra
-  // work is required to prepare the original execution plan.
-  // 3. This flag requires that the original execution plan only have ops with
-  // valid registrations (and not 'dummy' custom ops like with Flex).
-  // WARNING: This feature is experimental and subject to change.
-  kTfLiteDelegateFlagsRequirePropagatedShapes = 2
-} TfLiteDelegateFlags;
-
-// WARNING: This is an experimental interface that is subject to change.
-typedef struct TfLiteDelegate {
-  // Data that delegate needs to identify itself. This data is owned by the
-  // delegate. The delegate is owned in the user code, so the delegate is
-  // responsible for doing this when it is destroyed.
-  void* data_;
-
-  // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the
-  // delegate a view of the current graph through TfLiteContext*. It typically
-  // will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels()
-  // to ask the TensorFlow lite runtime to create macro-nodes to represent
-  // delegated subgraphs of the original graph.
-  TfLiteStatus (*Prepare)(TfLiteContext* context,
-                          struct TfLiteDelegate* delegate);
-
-  // Copy the data from delegate buffer handle into raw memory of the given
-  // 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
-  // long as it follows the rules for kTfLiteDynamic tensors, in which case this
-  // cannot be null.
-  TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
-                                       struct TfLiteDelegate* delegate,
-                                       TfLiteBufferHandle buffer_handle,
-                                       TfLiteTensor* tensor);
-
-  // Copy the data from raw memory of the given 'tensor' to delegate buffer
-  // handle. This can be null if the delegate doesn't use its own buffer.
-  TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context,
-                                     struct TfLiteDelegate* delegate,
-                                     TfLiteBufferHandle buffer_handle,
-                                     TfLiteTensor* tensor);
-
-  // Free the Delegate Buffer Handle. Note: This only frees the handle, but
-  // this doesn't release the underlying resource (e.g. textures). The
-  // resources are either owned by application layer or the delegate.
-  // This can be null if the delegate doesn't use its own buffer.
-  void (*FreeBufferHandle)(TfLiteContext* context,
-                           struct TfLiteDelegate* delegate,
-                           TfLiteBufferHandle* handle);
-
-  // Bitmask flags. See the comments in `TfLiteDelegateFlags`.
-  int64_t flags;
-} TfLiteDelegate;
-
-// Build a 'null' delegate, with all the fields properly set to their default
-// values.
-TfLiteDelegate TfLiteDelegateCreate();
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
-#endif  // TENSORFLOW_LITE_C_COMMON_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/error_reporter.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/error_reporter.h
@@ -1,59 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
-#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
-
-#include <cstdarg>
-
-namespace tflite {
-
-/// A functor that reports error to supporting system. Invoked similar to
-/// printf.
-///
-/// Usage:
-///  ErrorReporter foo;
-///  foo.Report("test %d", 5);
-/// or
-///  va_list args;
-///  foo.Report("test %d", args); // where args is va_list
-///
-/// Subclass ErrorReporter to provide another reporting destination.
-/// For example, if you have a GUI program, you might redirect to a buffer
-/// that drives a GUI error log box.
-class ErrorReporter {
- public:
-  virtual ~ErrorReporter() {}
-  virtual int Report(const char* format, va_list args) = 0;
-  int Report(const char* format, ...);
-  int ReportError(void*, const char* format, ...);
-};
-
-}  // namespace tflite
-
-// You should not make bare calls to the error reporter, instead use the
-// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
-// stripped when the binary size has to be optimized. If you are looking to
-// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
-// every call will be stubbed out, taking no memory.
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-#define TF_LITE_REPORT_ERROR(reporter, ...)                             \
-  do {                                                                  \
-    static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
-  } while (false)
-#else  // TF_LITE_STRIP_ERROR_STRINGS
-#define TF_LITE_REPORT_ERROR(reporter, ...)
-#endif  // TF_LITE_STRIP_ERROR_STRINGS
-
-#endif  // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/flatbuffer_conversions.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/flatbuffer_conversions.h
@@ -1,253 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
-#define TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
-
-// These functions transform codes and data structures that are defined in the
-// flatbuffer serialization format into in-memory values that are used by the
-// runtime API and interpreter.
-
-#include <cstddef>
-#include <new>
-#include <type_traits>
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-
-// Interface class for builtin data allocations.
-class BuiltinDataAllocator {
- public:
-  virtual void* Allocate(size_t size, size_t alignment_hint) = 0;
-  virtual void Deallocate(void* data) = 0;
-
-  // Allocate a structure, but make sure it is a POD structure that doesn't
-  // require constructors to run. The reason we do this, is that Interpreter's C
-  // extension part will take ownership so destructors  will not be run during
-  // deallocation.
-  template <typename T>
-  T* AllocatePOD() {
-    // TODO(b/154346074): Change this to is_trivially_destructible when all
-    // platform targets support that properly.
-    static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
-    void* allocated_memory = this->Allocate(sizeof(T), alignof(T));
-    return new (allocated_memory) T;
-  }
-
-  virtual ~BuiltinDataAllocator() {}
-};
-
-// Parse the appropriate data out of the op.
-//
-// This handles builtin data explicitly as there are flatbuffer schemas.
-// If it returns kTfLiteOk, it passes the data out with `builtin_data`. The
-// calling function has to pass in an allocator object, and this allocator
-// will be called to reserve space for the output data. If the calling
-// function's allocator reserves memory on the heap, then it's the calling
-// function's responsibility to free it.
-// If it returns kTfLiteError, `builtin_data` will be `nullptr`.
-TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
-                         ErrorReporter* error_reporter,
-                         BuiltinDataAllocator* allocator, void** builtin_data);
-
-// Converts the tensor data type used in the flat buffer to the representation
-// used by the runtime.
-TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
-                               ErrorReporter* error_reporter);
-
-TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter,
-                      BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
-                      BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
-                         BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
-                         BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
-                       BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseConcatenation(const Operator* op,
-                                ErrorReporter* error_reporter,
-                                BuiltinDataAllocator* allocator,
-                                void** builtin_data);
-
-TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
-                         BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter,
-                      BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
-                                  ErrorReporter* error_reporter,
-                                  BuiltinDataAllocator* allocator,
-                                  void** builtin_data);
-
-TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter,
-                             BuiltinDataAllocator* allocator,
-                             void** builtin_data);
-
-TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter,
-                        BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter,
-                        BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseFullyConnected(const Operator* op,
-                                 ErrorReporter* error_reporter,
-                                 BuiltinDataAllocator* allocator,
-                                 void** builtin_data);
-
-TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter,
-                          BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseGreaterEqual(const Operator* op,
-                               ErrorReporter* error_reporter,
-                               BuiltinDataAllocator* allocator,
-                               void** builtin_data);
-
-TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
-                            BuiltinDataAllocator* allocator,
-                            void** builtin_data);
-
-TfLiteStatus ParseL2Normalization(const Operator* op,
-                                  ErrorReporter* error_reporter,
-                                  BuiltinDataAllocator* allocator,
-                                  void** builtin_data);
-
-TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter,
-                       BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter,
-                            BuiltinDataAllocator* allocator,
-                            void** builtin_data);
-
-TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter,
-                      BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter,
-                             BuiltinDataAllocator* allocator,
-                             void** builtin_data);
-
-TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter,
-                             BuiltinDataAllocator* allocator,
-                             void** builtin_data);
-
-TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter,
-                            BuiltinDataAllocator* allocator,
-                            void** builtin_data);
-
-TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter,
-                           BuiltinDataAllocator* allocator,
-                           void** builtin_data);
-
-TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
-                          BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
-                          BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
-                      BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter,
-                      BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter,
-                           BuiltinDataAllocator* allocator,
-                           void** builtin_data);
-
-TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
-                       BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter,
-                      BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter,
-                        BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
-                       BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter,
-                        BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
-                           BuiltinDataAllocator* allocator,
-                           void** builtin_data);
-
-TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
-                          BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter,
-                       BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter,
-                        BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
-                          BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
-                                        ErrorReporter* error_reporter,
-                                        BuiltinDataAllocator* allocator,
-                                        void** builtin_data);
-
-TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
-                        BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
-                        BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter,
-                      BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
-                          BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
-                        BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
-                       BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
-                         BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseStridedSlice(const Operator* op,
-                               ErrorReporter* error_reporter,
-                               BuiltinDataAllocator* allocator,
-                               void** builtin_data);
-
-TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
-                      BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
-                       BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter,
-                       BuiltinDataAllocator* allocator, void** builtin_data);
-
-TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
-                         BuiltinDataAllocator* allocator, void** builtin_data);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/op_resolver.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/op_resolver.h
@@ -1,48 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
-#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-
-/// Abstract interface that returns TfLiteRegistrations given op codes or custom
-/// op names. This is the mechanism that ops being referenced in the flatbuffer
-/// model are mapped to executable function pointers (TfLiteRegistrations).
-class OpResolver {
- public:
-  /// Finds the op registration for a builtin operator by enum code.
-  virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
-                                           int version) const = 0;
-  /// Finds the op registration of a custom operator by op name.
-  virtual const TfLiteRegistration* FindOp(const char* op,
-                                           int version) const = 0;
-  virtual ~OpResolver() {}
-};
-
-// Handles the logic for converting between an OperatorCode structure extracted
-// from a flatbuffer and information about a registered operator
-// implementation.
-TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
-                                       const OpResolver& op_resolver,
-                                       ErrorReporter* error_reporter,
-                                       const TfLiteRegistration** registration);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/profiler.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/profiler.h
@@ -1,194 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_CORE_API_PROFILER_H_
-#define TENSORFLOW_LITE_CORE_API_PROFILER_H_
-
-#include <cstdint>
-
-namespace tflite {
-
-// A simple utility for enabling profiled event tracing in TensorFlow Lite.
-class Profiler {
- public:
-  // As certain Profiler instance might be only interested in certain event
-  // types, we define each event type value to allow a Profiler to use
-  // bitmasking bitwise operations to determine whether an event should be
-  // recorded or not.
-  enum class EventType {
-    // Default event type, the metadata field has no special significance.
-    DEFAULT = 1,
-
-    // The event is an operator invocation and the event_metadata field is the
-    // index of operator node.
-    OPERATOR_INVOKE_EVENT = 2,
-
-    // The event is an invocation for an internal operator of a TFLite delegate.
-    // The event_metadata field is the index of operator node that's specific to
-    // the delegate.
-    DELEGATE_OPERATOR_INVOKE_EVENT = 4,
-
-    // The event is a recording of runtime instrumentation such as the overall
-    // TFLite runtime status, the TFLite delegate status (if a delegate
-    // is applied), and the overall model inference latency etc.
-    // Note, the delegate status and overall status are stored as separate
-    // event_metadata fields. In particular, the delegate status is encoded
-    // as DelegateStatus::full_status().
-    GENERAL_RUNTIME_INSTRUMENTATION_EVENT = 8,
-  };
-
-  virtual ~Profiler() {}
-
-  // Signals the beginning of an event and returns a handle to the profile
-  // event. The `event_metadata1` and `event_metadata2` have different
-  // interpretations based on the actual Profiler instance and the `event_type`.
-  // For example, as for the 'SubgraphAwareProfiler' defined in
-  // lite/core/subgraph.h, when the event_type is OPERATOR_INVOKE_EVENT,
-  // `event_metadata1` represents the index of a TFLite node, and
-  // `event_metadata2` represents the index of the subgraph that this event
-  // comes from.
-  virtual uint32_t BeginEvent(const char* tag, EventType event_type,
-                              int64_t event_metadata1,
-                              int64_t event_metadata2) = 0;
-  // Similar w/ the above, but `event_metadata2` defaults to 0.
-  uint32_t BeginEvent(const char* tag, EventType event_type,
-                      int64_t event_metadata) {
-    return BeginEvent(tag, event_type, event_metadata, /*event_metadata2*/ 0);
-  }
-
-  // Signals an end to the specified profile event with 'event_metadata's, This
-  // is useful when 'event_metadata's are not available when the event begins
-  // or when one wants to overwrite the 'event_metadata's set at the beginning.
-  virtual void EndEvent(uint32_t event_handle, int64_t event_metadata1,
-                        int64_t event_metadata2) {}
-  // Signals an end to the specified profile event.
-  virtual void EndEvent(uint32_t event_handle) = 0;
-
-  // Appends an event of type 'event_type' with 'tag' and 'event_metadata'
-  // which started at 'start' and ended at 'end'
-  // Note:
-  // In cases were ProfileSimmarizer and tensorflow::StatsCalculator are used
-  // they assume the value is in "usec", if in any case subclasses
-  // didn't put usec, then the values are not meaningful.
-  // TODO karimnosseir: Revisit and make the function more clear.
-  void AddEvent(const char* tag, EventType event_type, uint64_t start,
-                uint64_t end, int64_t event_metadata) {
-    AddEvent(tag, event_type, start, end, event_metadata,
-             /*event_metadata2*/ 0);
-  }
-
-  virtual void AddEvent(const char* tag, EventType event_type, uint64_t start,
-                        uint64_t end, int64_t event_metadata1,
-                        int64_t event_metadata2) {}
-
- protected:
-  friend class ScopedProfile;
-};
-
-// Adds a profile event to `profiler` that begins with the construction
-// of the object and ends when the object goes out of scope.
-// The lifetime of tag should be at least the lifetime of `profiler`.
-// `profiler` may be null, in which case nothing is profiled.
-class ScopedProfile {
- public:
-  ScopedProfile(Profiler* profiler, const char* tag,
-                Profiler::EventType event_type = Profiler::EventType::DEFAULT,
-                int64_t event_metadata = 0)
-      : profiler_(profiler), event_handle_(0) {
-    if (profiler) {
-      event_handle_ = profiler_->BeginEvent(tag, event_type, event_metadata);
-    }
-  }
-
-  ~ScopedProfile() {
-    if (profiler_) {
-      profiler_->EndEvent(event_handle_);
-    }
-  }
-
- protected:
-  Profiler* profiler_;
-  uint32_t event_handle_;
-};
-
-class ScopedOperatorProfile : public ScopedProfile {
- public:
-  ScopedOperatorProfile(Profiler* profiler, const char* tag, int node_index)
-      : ScopedProfile(profiler, tag, Profiler::EventType::OPERATOR_INVOKE_EVENT,
-                      static_cast<uint32_t>(node_index)) {}
-};
-
-class ScopedDelegateOperatorProfile : public ScopedProfile {
- public:
-  ScopedDelegateOperatorProfile(Profiler* profiler, const char* tag,
-                                int node_index)
-      : ScopedProfile(profiler, tag,
-                      Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT,
-                      static_cast<uint32_t>(node_index)) {}
-};
-
-class ScopedRuntimeInstrumentationProfile : public ScopedProfile {
- public:
-  ScopedRuntimeInstrumentationProfile(Profiler* profiler, const char* tag)
-      : ScopedProfile(
-            profiler, tag,
-            Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, -1) {}
-
-  void set_runtime_status(int64_t delegate_status, int64_t interpreter_status) {
-    if (profiler_) {
-      delegate_status_ = delegate_status;
-      interpreter_status_ = interpreter_status;
-    }
-  }
-
-  ~ScopedRuntimeInstrumentationProfile() {
-    if (profiler_) {
-      profiler_->EndEvent(event_handle_, delegate_status_, interpreter_status_);
-    }
-  }
-
- private:
-  int64_t delegate_status_;
-  int64_t interpreter_status_;
-};
-
-}  // namespace tflite
-
-#define TFLITE_VARNAME_UNIQ_IMPL(name, ctr) name##ctr
-#define TFLITE_VARNAME_UNIQ(name, ctr) TFLITE_VARNAME_UNIQ_IMPL(name, ctr)
-
-#define TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler, tag)          \
-  tflite::ScopedProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
-      (profiler), (tag))
-
-#define TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler, tag, node_index)     \
-  tflite::ScopedOperatorProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
-      (profiler), (tag), (node_index))
-
-#define TFLITE_SCOPED_DELEGATE_OPERATOR_PROFILE(profiler, tag, node_index) \
-  tflite::ScopedDelegateOperatorProfile TFLITE_VARNAME_UNIQ(               \
-      _profile_, __COUNTER__)((profiler), (tag), (node_index))
-
-#define TFLITE_ADD_RUNTIME_INSTRUMENTATION_EVENT(                          \
-    profiler, tag, delegate_status, interpreter_status)                    \
-  do {                                                                     \
-    if (!profiler) {                                                       \
-      const auto handle = profiler->BeginEvent(                            \
-          tag, Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, \
-          delegate_status, interpreter_status);                            \
-      profiler->EndEvent(handle);                                          \
-    }                                                                      \
-  } while (false);
-
-#endif  // TENSORFLOW_LITE_CORE_API_PROFILER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/tensor_utils.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/core/api/tensor_utils.h
@@ -1,28 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
-#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
-
-#include "tensorflow/lite/c/common.h"
-
-namespace tflite {
-
-// Resets a variable tensor to the default value.
-TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/common.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/common.h
@@ -1,956 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
-
-#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
-#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
-#endif
-#endif
-
-#include <functional>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-constexpr int kReverseShift = -1;
-
-inline void GetActivationMinMax(FusedActivationFunctionType ac,
-                                float* output_activation_min,
-                                float* output_activation_max) {
-  switch (ac) {
-    case FusedActivationFunctionType::kNone:
-      *output_activation_min = std::numeric_limits<float>::lowest();
-      *output_activation_max = std::numeric_limits<float>::max();
-      break;
-    case FusedActivationFunctionType::kRelu:
-      *output_activation_min = 0.f;
-      *output_activation_max = std::numeric_limits<float>::max();
-      break;
-    case FusedActivationFunctionType::kRelu1:
-      *output_activation_min = -1.f;
-      *output_activation_max = 1.f;
-      break;
-    case FusedActivationFunctionType::kRelu6:
-      *output_activation_min = 0.f;
-      *output_activation_max = 6.f;
-      break;
-  }
-}
-
-template <typename T>
-inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
-                                      T output_activation_max) {
-  using std::max;
-  using std::min;
-  return min(max(x, output_activation_min), output_activation_max);
-}
-
-// Legacy function, left for compatibility only.
-template <FusedActivationFunctionType Ac>
-float ActivationFunction(float x) {
-  float output_activation_min, output_activation_max;
-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
-  return ActivationFunctionWithMinMax(x, output_activation_min,
-                                      output_activation_max);
-}
-
-inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
-                         const float* bias_data, int array_size,
-                         float* array_data) {
-  // Note: see b/132215220: in May 2019 we thought it would be OK to replace
-  // this with the Eigen one-liner:
-  //   return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
-  // This turned out to severely regress performance: +4ms (i.e. 8%) on
-  // MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now.
-  TFLITE_DCHECK_EQ((array_size % bias_size), 0);
-#ifdef USE_NEON
-  float* array_ptr = array_data;
-  float* array_end_ptr = array_ptr + array_size;
-  const auto clamp_min_vec = vdupq_n_f32(clamp_min);
-  const auto clamp_max_vec = vdupq_n_f32(clamp_max);
-  for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
-    int i = 0;
-    for (; i <= bias_size - 16; i += 16) {
-      auto b0 = vld1q_f32(bias_data + i);
-      auto b1 = vld1q_f32(bias_data + i + 4);
-      auto b2 = vld1q_f32(bias_data + i + 8);
-      auto b3 = vld1q_f32(bias_data + i + 12);
-      auto a0 = vld1q_f32(array_ptr + i);
-      auto a1 = vld1q_f32(array_ptr + i + 4);
-      auto a2 = vld1q_f32(array_ptr + i + 8);
-      auto a3 = vld1q_f32(array_ptr + i + 12);
-      auto x0 = vaddq_f32(a0, b0);
-      auto x1 = vaddq_f32(a1, b1);
-      auto x2 = vaddq_f32(a2, b2);
-      auto x3 = vaddq_f32(a3, b3);
-      x0 = vmaxq_f32(clamp_min_vec, x0);
-      x1 = vmaxq_f32(clamp_min_vec, x1);
-      x2 = vmaxq_f32(clamp_min_vec, x2);
-      x3 = vmaxq_f32(clamp_min_vec, x3);
-      x0 = vminq_f32(clamp_max_vec, x0);
-      x1 = vminq_f32(clamp_max_vec, x1);
-      x2 = vminq_f32(clamp_max_vec, x2);
-      x3 = vminq_f32(clamp_max_vec, x3);
-      vst1q_f32(array_ptr + i, x0);
-      vst1q_f32(array_ptr + i + 4, x1);
-      vst1q_f32(array_ptr + i + 8, x2);
-      vst1q_f32(array_ptr + i + 12, x3);
-    }
-    for (; i <= bias_size - 4; i += 4) {
-      auto b = vld1q_f32(bias_data + i);
-      auto a = vld1q_f32(array_ptr + i);
-      auto x = vaddq_f32(a, b);
-      x = vmaxq_f32(clamp_min_vec, x);
-      x = vminq_f32(clamp_max_vec, x);
-      vst1q_f32(array_ptr + i, x);
-    }
-    for (; i < bias_size; i++) {
-      array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i],
-                                                  clamp_min, clamp_max);
-    }
-  }
-#else  // not NEON
-  for (int array_offset = 0; array_offset < array_size;
-       array_offset += bias_size) {
-    for (int i = 0; i < bias_size; i++) {
-      array_data[array_offset + i] = ActivationFunctionWithMinMax(
-          array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
-    }
-  }
-#endif
-}
-
-inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
-    int32_t x, int32_t quantized_multiplier, int left_shift) {
-  using gemmlowp::RoundingDivideByPOT;
-  using gemmlowp::SaturatingRoundingDoublingHighMul;
-  return RoundingDivideByPOT(
-      SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
-}
-
-inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
-    int32_t x, int32_t quantized_multiplier, int left_shift) {
-  using gemmlowp::SaturatingRoundingDoublingHighMul;
-  return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
-                                           quantized_multiplier);
-}
-
-inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
-                                             int32_t quantized_multiplier,
-                                             int shift) {
-  using gemmlowp::RoundingDivideByPOT;
-  using gemmlowp::SaturatingRoundingDoublingHighMul;
-  int left_shift = shift > 0 ? shift : 0;
-  int right_shift = shift > 0 ? 0 : -shift;
-  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
-                                 x * (1 << left_shift), quantized_multiplier),
-                             right_shift);
-}
-
-inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
-                                             int32_t quantized_multiplier,
-                                             int shift) {
-  // Inputs:
-  // - quantized_multiplier has fixed point at bit 31
-  // - shift is -31 to +7 (negative for right shift)
-  //
-  // Assumptions: The following input ranges are assumed
-  // - quantize_scale>=0  (the usual range is (1<<30) to (1>>31)-1)
-  // - scaling is chosen so final scaled result fits in int32_t
-  // - input x is in the range -(1<<47) <= x < (1<<47)
-  assert(quantized_multiplier >= 0);
-  assert(shift >= -31 && shift < 8);
-
-  int32_t reduced_multiplier = (quantized_multiplier + (1 << 15)) >> 16;
-  int total_shift = 15 - shift;
-  x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
-  int32_t result = x >> total_shift;
-  return result;
-}
-
-template <typename T>
-int CountLeadingZeros(T integer_input) {
-  static_assert(std::is_unsigned<T>::value,
-                "Only unsigned integer types handled.");
-#if defined(__GNUC__)
-  return integer_input ? __builtin_clz(integer_input)
-                       : std::numeric_limits<T>::digits;
-#else
-  if (integer_input == 0) {
-    return std::numeric_limits<T>::digits;
-  }
-
-  const T one_in_leading_positive = static_cast<T>(1)
-                                    << (std::numeric_limits<T>::digits - 1);
-  int leading_zeros = 0;
-  while (integer_input < one_in_leading_positive) {
-    integer_input <<= 1;
-    ++leading_zeros;
-  }
-  return leading_zeros;
-#endif
-}
-
-template <typename T>
-inline int CountLeadingSignBits(T integer_input) {
-  static_assert(std::is_signed<T>::value, "Only signed integer types handled.");
-#if defined(__GNUC__) && !defined(__clang__)
-  return integer_input ? __builtin_clrsb(integer_input)
-                       : std::numeric_limits<T>::digits;
-#else
-  using U = typename std::make_unsigned<T>::type;
-  return integer_input >= 0
-             ? CountLeadingZeros(static_cast<U>(integer_input)) - 1
-         : integer_input != std::numeric_limits<T>::min()
-             ? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
-             : 0;
-#endif
-}
-
-// Use "count leading zeros" helper functions to do a fast Floor(log_2(x)).
-template <typename Integer>
-inline Integer FloorLog2(Integer n) {
-  static_assert(std::is_integral<Integer>::value, "");
-  static_assert(std::is_signed<Integer>::value, "");
-  static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, "");
-  TFLITE_CHECK_GT(n, 0);
-  if (sizeof(Integer) == 4) {
-    return 30 - CountLeadingSignBits(n);
-  } else {
-    return 62 - CountLeadingSignBits(n);
-  }
-}
-
-// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
-// softmax
-inline void gen_lut(const std::function<double(double)>& func, double min,
-                    double max, int16_t* table, const int num) {
-  // size of table should equal to num + 1
-  // last element only for slope calculation
-  double step = (max - min) / (num - 1);
-  double half_step = step / 2.0;
-  for (int i = 0; i < num - 1; i++) {
-    double sample_val = TfLiteRound(func(min + i * step) * 32768.0);
-    double midpoint_interp_val =
-        TfLiteRound((func(min + (i + 1) * step) * 32768.0 +
-                     TfLiteRound(func(min + i * step) * 32768.0)) /
-                    2.0);
-    double midpoint_val =
-        TfLiteRound(func(min + i * step + half_step) * 32768.0);
-    double midpoint_err = midpoint_interp_val - midpoint_val;
-    double bias = TfLiteRound(midpoint_err / 2.0);
-    table[i] = std::min(std::max(sample_val - bias, -32768.0), 32767.0);
-  }
-  table[num - 1] =
-      std::min(std::max(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
-}
-
-// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
-inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
-  // 512 base value, lut[513] only for calculate slope
-  uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
-  assert(index < 512 && "LUT index out of range.");
-  int16_t offset = value & 0x7f;
-
-  // base and slope are Q0.15
-  int16_t base = lut[index];
-  int16_t slope = lut[index + 1] - lut[index];
-
-  // Q0.15 * Q0.7 = Q0.22
-  // Round and convert from Q0.22 to Q0.15
-  int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
-
-  // Q0.15 + Q0.15
-  return base + delta;
-}
-
-// Table of sigmoid(i/24) at 0.16 format - 256 elements.
-
-// We use combined sigmoid and tanh look-up table, since
-// tanh(x) = 2*sigmoid(2*x) -1.
-// Both functions are symmetric, so the LUT table is only needed
-// for the absolute value of the input.
-static const uint16_t sigmoid_table_uint16[256] = {
-    32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498,
-    40149, 40794, 41432, 42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255,
-    46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409, 51865,
-    52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174,
-    56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288,
-    59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441,
-    61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886,
-    62990, 63090, 63186, 63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835,
-    63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405, 64450,
-    64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845,
-    64873, 64900, 64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097,
-    65115, 65132, 65149, 65164, 65179, 65194, 65208, 65221, 65234, 65246, 65258,
-    65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360,
-    65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425,
-    65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465,
-    65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491,
-    65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508,
-    65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65517, 65518,
-    65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525,
-    65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529,
-    65529, 65529, 65530, 65530, 65530, 65530, 65531, 65531, 65531, 65531, 65531,
-    65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533, 65533, 65533,
-    65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
-    65534, 65534, 65535};
-
-// TODO(b/77858996): Add these to gemmlowp.
-template <typename IntegerType>
-IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
-  return a;
-}
-
-template <>
-inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) {
-  std::int64_t a64 = a;
-  std::int64_t b64 = b;
-  std::int64_t sum = a64 + b64;
-  return static_cast<std::int32_t>(std::min(
-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
-      std::max(
-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
-          sum)));
-}
-
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingAddNonGemmlowp(a.raw(), b.raw()));
-}
-
-template <typename IntegerType>
-IntegerType SaturatingSub(IntegerType a, IntegerType b) {
-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
-  return a;
-}
-
-template <>
-inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) {
-  std::int32_t a32 = a;
-  std::int32_t b32 = b;
-  std::int32_t diff = a32 - b32;
-  return static_cast<std::int16_t>(
-      std::min(static_cast<int32_t>(32767),
-               std::max(static_cast<int32_t>(-32768), diff)));
-}
-
-template <>
-inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) {
-  std::int64_t a64 = a;
-  std::int64_t b64 = b;
-  std::int64_t diff = a64 - b64;
-  return static_cast<std::int32_t>(std::min(
-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
-      std::max(
-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
-          diff)));
-}
-
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingSub(a.raw(), b.raw()));
-}
-// End section to be moved to gemmlowp.
-
-template <typename IntegerType>
-IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
-  if (exponent == 0) {
-    return x;
-  }
-  using ScalarIntegerType =
-      typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
-  const IntegerType min =
-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
-  const IntegerType max =
-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
-  const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
-
-  const std::int32_t threshold =
-      ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
-  const IntegerType positive_mask =
-      gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
-  const IntegerType negative_mask =
-      gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
-
-  IntegerType result = gemmlowp::ShiftLeft(x, exponent);
-  result = gemmlowp::SelectUsingMask(positive_mask, max, result);
-  result = gemmlowp::SelectUsingMask(negative_mask, min, result);
-  return result;
-}
-
-// If we want to leave IntegerBits fixed, then multiplication
-// by a power of two has to be saturating/rounding, not exact anymore.
-template <typename tRawType, int tIntegerBits>
-gemmlowp::FixedPoint<tRawType, tIntegerBits>
-SaturatingRoundingMultiplyByPOTParam(
-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
-      SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
-}
-
-// Convert int32_t multiplier to int16_t with rounding.
-inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
-                                            int16_t* multiplier_int16_t) {
-  TFLITE_DCHECK_GE(multiplier_int32_t, 0);
-  static constexpr int32_t kRoundingOffset = 1 << 15;
-  if (multiplier_int32_t >=
-      std::numeric_limits<int32_t>::max() - kRoundingOffset) {
-    *multiplier_int16_t = std::numeric_limits<int16_t>::max();
-    return;
-  }
-  const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
-  TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
-  TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
-  *multiplier_int16_t = result;
-  TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
-}
-
-// Minimum output bits to accommodate log of maximum input range.  It actually
-// does not matter if one considers, say, [-64,64] or [-64,64).
-//
-// For example, run this through Octave:
-// [0:127; ...
-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
-constexpr int min_log_x_output_bits(int input_bits) {
-  return input_bits > 90   ? 7
-         : input_bits > 44 ? 6
-         : input_bits > 21 ? 5
-         : input_bits > 10 ? 4
-         : input_bits > 4  ? 3
-         : input_bits > 1  ? 2
-                           : 1;
-}
-
-// Although currently the name of this function says that it cannot handle
-// values less than 1, in practice it can handle as low as 1/x_max, where
-// x_max is the largest representable input.  In other words, the output range
-// is symmetric.
-template <int OutputIntegerBits, int InputIntegerBits>
-inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
-log_x_for_x_greater_than_or_equal_to_1_impl(
-    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
-  // assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
-  // assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
-  using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
-  // The reason for accumulating the result with an extra bit of headroom is
-  // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
-  // recip_denom will otherwise introduce an error.
-  static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
-  using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
-
-  const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1488522236, std::log(2.0));
-  const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
-  const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1518500250, std::sqrt(0.5));
-  const FixedPoint0 one_quarter =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
-
-  const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 1057819769,
-      2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
-  const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
-      FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
-
-  const FixedPointAccum shifted_quarter =
-      gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
-
-  // Reinterpret the input value as Q0.31, because we will figure out the
-  // required shift "ourselves" instead of using, say, Rescale.
-  FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
-  // z_a_pow_2 = input_integer_bits - z_a_headroom;
-  int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
-  FixedPoint0 r_a_tmp =
-      SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
-  const int32_t r_a_raw =
-      SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
-  // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
-  // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
-  //                   InputIntegerBits - z_b_headroom - 0.25);
-  const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
-          InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
-      shifted_quarter);
-
-  // z_b is treated like z_a, but premultiplying by sqrt(0.5).
-  FixedPoint0 z_b = z_a * sqrt_half;
-  int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
-  const int32_t r_b_raw =
-      SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
-  const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
-          InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
-      shifted_quarter);
-
-  const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
-  const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
-      std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
-
-  const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
-  FixedPoint0 q = r - sqrt_sqrt_half;
-  q = q + q;
-
-  const FixedPoint0 common_sq = q * q;
-  const FixedPoint0 num = q * r + q * common_sq * alpha_n;
-  const FixedPoint0 denom_minus_one_0 =
-      p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
-  const FixedPoint0 recip_denom =
-      one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
-
-  const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
-  return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
-                                              num_scaled * recip_denom);
-}
-
-template <int OutputIntegerBits, int InputIntegerBits>
-inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
-log_x_for_x_greater_than_or_equal_to_1(
-    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
-  static_assert(
-      OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
-      "Output integer bits must be sufficient to accommodate logs of inputs.");
-  return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
-                                                     InputIntegerBits>(
-      input_val);
-}
-
-inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
-                             int* num_bits_over_unit) {
-  int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
-  // This is the number of bits to the left of the binary point above 1.0.
-  // Consider x=1.25.  In that case shifted_scale=0.8 and
-  // no later adjustment will be needed.
-  *num_bits_over_unit = x_integer_digits - headroom_plus_one;
-  const int32_t shifted_sum_minus_one =
-      static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
-                           (static_cast<uint32_t>(1) << 31));
-
-  gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
-      gemmlowp::one_over_one_plus_x_for_x_in_0_1(
-          gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
-  return shifted_scale.raw();
-}
-
-inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
-                                             int32_t* output_inv_sqrt,
-                                             int* output_shift) {
-  TFLITE_DCHECK_GE(input, 0);
-  if (input <= 1) {
-    // Handle the input value 1 separately to avoid overflow in that case
-    // in the general computation below (b/143972021). Also handle 0 as if it
-    // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
-    // but rare/unrealistic input value. We can expect both to occur in some
-    // incompletely trained models, but probably not in fully trained models.
-    *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
-    *output_shift = 0;
-    return;
-  }
-  TFLITE_DCHECK_GT(input, 1);
-  *output_shift = 11;
-  while (input >= (1 << 29)) {
-    input /= 4;
-    ++*output_shift;
-  }
-  const unsigned max_left_shift_bits =
-      CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
-  const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
-  const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
-  *output_shift -= left_shift_bit_pairs;
-  input <<= 2 * left_shift_bit_pairs;
-  TFLITE_DCHECK_GE(input, (1 << 27));
-  TFLITE_DCHECK_LT(input, (1 << 29));
-  using gemmlowp::FixedPoint;
-  using gemmlowp::Rescale;
-  using gemmlowp::SaturatingRoundingMultiplyByPOT;
-  // Using 3 integer bits gives us enough room for the internal arithmetic in
-  // this Newton-Raphson iteration.
-  using F3 = FixedPoint<int32_t, 3>;
-  using F0 = FixedPoint<int32_t, 0>;
-  const F3 fixedpoint_input = F3::FromRaw(input >> 1);
-  const F3 fixedpoint_half_input =
-      SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
-  const F3 fixedpoint_half_three =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
-  // Newton-Raphson iteration
-  // Naive unoptimized starting guess: x = 1
-  F3 x = F3::One();
-  // Naive unoptimized number of iterations: 5
-  for (int i = 0; i < 5; i++) {
-    const F3 x3 = Rescale<3>(x * x * x);
-    x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
-  }
-  const F0 fixedpoint_half_sqrt_2 =
-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
-  x = x * fixedpoint_half_sqrt_2;
-  *output_inv_sqrt = x.raw();
-  if (*output_shift < 0) {
-    *output_inv_sqrt <<= -*output_shift;
-    *output_shift = 0;
-  }
-  // Convert right shift (right is positive) to left shift.
-  *output_shift *= reverse_shift;
-}
-
-// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
-// BROADCASTING.
-//
-// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
-// rectangular array of numbers.
-//
-// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
-// However, as Dims<N> is to be deprecated, this class exists as an adaptor
-// to enable simple unoptimized implementations of element-wise broadcasting
-// operations.
-template <int N>
-struct NdArrayDesc {
-  // The "extent" of each dimension. Indices along dimension d must be in the
-  // half-open interval [0, extents[d]).
-  int extents[N];
-
-  // The number of *elements* (not bytes) between consecutive indices of each
-  // dimension.
-  int strides[N];
-};
-
-// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
-// BROADCASTING.
-//
-// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
-inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
-                            int i3) {
-  TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]);
-  TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]);
-  TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]);
-  TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]);
-  return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] +
-         i3 * desc.strides[3];
-}
-
-inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) {
-  return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
-         indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
-         indexes[4] * desc.strides[4];
-}
-
-// Given the dimensions of the operands for an element-wise binary broadcast,
-// adjusts them so that they can be directly iterated over with simple loops.
-// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and
-// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr.
-//
-// This function assumes that the two input shapes are compatible up to
-// broadcasting and the shorter one has already been prepended with 1s to be the
-// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64),
-// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that
-// Dims<N> refer to shapes in reverse order. In this case, input0_dims will be
-// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1).
-//
-// When two shapes are compatible up to broadcasting, for each dimension d,
-// the input extents are either equal, or one of them is 1.
-//
-// This function performs the following for each dimension d:
-// - If the extents are equal, then do nothing since the loop that walks over
-//   both of the input arrays is correct.
-// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1
-//   and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows
-//   array0 to be referenced *at any index* in dimension d and still access the
-//   same slice.
-template <int N>
-inline void NdArrayDescsForElementwiseBroadcast(const Dims<N>& input0_dims,
-                                                const Dims<N>& input1_dims,
-                                                NdArrayDesc<N>* desc0_out,
-                                                NdArrayDesc<N>* desc1_out) {
-  TFLITE_DCHECK(desc0_out != nullptr);
-  TFLITE_DCHECK(desc1_out != nullptr);
-
-  // Copy dims to desc.
-  for (int i = 0; i < N; ++i) {
-    desc0_out->extents[i] = input0_dims.sizes[i];
-    desc0_out->strides[i] = input0_dims.strides[i];
-    desc1_out->extents[i] = input1_dims.sizes[i];
-    desc1_out->strides[i] = input1_dims.strides[i];
-  }
-
-  // Walk over each dimension. If the extents are equal do nothing.
-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
-  // stride 0.
-  for (int i = 0; i < N; ++i) {
-    const int extent0 = ArraySize(input0_dims, i);
-    const int extent1 = ArraySize(input1_dims, i);
-    if (extent0 != extent1) {
-      if (extent0 == 1) {
-        desc0_out->strides[i] = 0;
-        desc0_out->extents[i] = extent1;
-      } else {
-        TFLITE_DCHECK_EQ(extent1, 1);
-        desc1_out->strides[i] = 0;
-        desc1_out->extents[i] = extent0;
-      }
-    }
-  }
-}
-
-// Copies dims to desc, calculating strides.
-template <int N>
-inline void CopyDimsToDesc(const RuntimeShape& input_shape,
-                           NdArrayDesc<N>* desc_out) {
-  int desc_stride = 1;
-  for (int i = N - 1; i >= 0; --i) {
-    desc_out->extents[i] = input_shape.Dims(i);
-    desc_out->strides[i] = desc_stride;
-    desc_stride *= input_shape.Dims(i);
-  }
-}
-
-template <int N>
-inline void NdArrayDescsForElementwiseBroadcast(
-    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
-    NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) {
-  TFLITE_DCHECK(desc0_out != nullptr);
-  TFLITE_DCHECK(desc1_out != nullptr);
-
-  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
-  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
-
-  // Copy dims to desc, calculating strides.
-  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
-  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
-
-  // Walk over each dimension. If the extents are equal do nothing.
-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
-  // stride 0.
-  for (int i = 0; i < N; ++i) {
-    const int extent0 = extended_input0_shape.Dims(i);
-    const int extent1 = extended_input1_shape.Dims(i);
-    if (extent0 != extent1) {
-      if (extent0 == 1) {
-        desc0_out->strides[i] = 0;
-        desc0_out->extents[i] = extent1;
-      } else {
-        TFLITE_DCHECK_EQ(extent1, 1);
-        desc1_out->strides[i] = 0;
-        desc1_out->extents[i] = extent0;
-      }
-    }
-  }
-}
-
-template <int N>
-inline void NdArrayDescsForElementwiseBroadcast(
-    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
-    const RuntimeShape& input2_shape, NdArrayDesc<N>* desc0_out,
-    NdArrayDesc<N>* desc1_out, NdArrayDesc<N>* desc2_out) {
-  TFLITE_DCHECK(desc0_out != nullptr);
-  TFLITE_DCHECK(desc1_out != nullptr);
-  TFLITE_DCHECK(desc2_out != nullptr);
-
-  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
-  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
-  auto extended_input2_shape = RuntimeShape::ExtendedShape(N, input2_shape);
-
-  // Copy dims to desc, calculating strides.
-  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
-  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
-  CopyDimsToDesc<N>(extended_input2_shape, desc2_out);
-
-  // Walk over each dimension. If the extents are equal do nothing.
-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
-  // stride 0.
-  for (int i = 0; i < N; ++i) {
-    const int extent0 = extended_input0_shape.Dims(i);
-    const int extent1 = extended_input1_shape.Dims(i);
-    const int extent2 = extended_input2_shape.Dims(i);
-
-    int extent = extent0;
-    if (extent1 != 1) extent = extent1;
-    if (extent2 != 1) extent = extent2;
-
-    TFLITE_DCHECK(extent0 == 1 || extent0 == extent);
-    TFLITE_DCHECK(extent1 == 1 || extent1 == extent);
-    TFLITE_DCHECK(extent2 == 1 || extent2 == extent);
-
-    if (!(extent0 == extent1 && extent1 == extent2)) {
-      if (extent0 == 1) {
-        desc0_out->strides[i] = 0;
-        desc0_out->extents[i] = extent;
-      }
-      if (extent1 == 1) {
-        desc1_out->strides[i] = 0;
-        desc1_out->extents[i] = extent;
-      }
-      if (extent2 == 1) {
-        desc2_out->strides[i] = 0;
-        desc2_out->extents[i] = extent;
-      }
-    }
-  }
-}
-
-// Detailed implementation of NDOpsHelper, the indexes must be a zero array.
-// This implementation is equivalent to N nested loops. Ex, if N=4, it can be
-// re-writen as:
-// for (int b = 0; b < output.extents[0]; ++b) {
-//   for (int y = 0; y < output.extents[1]; ++y) {
-//     for (int x = 0; x < output.extents[2]; ++x) {
-//       for (int c = 0; c < output.extents[3]; ++c) {
-//           calc({b,y,x,c});
-//       }
-//     }
-//   }
-// }
-template <int N, int DIM, typename Calc>
-typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(
-    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
-  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
-    NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
-  }
-}
-
-template <int N, int DIM, typename Calc>
-typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(
-    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
-  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
-    calc(indexes);
-  }
-}
-
-// Execute the calc function in the innermost iteration based on the shape of
-// the output. The calc function should take a single argument of type int[N].
-template <int N, typename Calc>
-inline void NDOpsHelper(const NdArrayDesc<N>& output, const Calc& calc) {
-  int indexes[N] = {0};
-  NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
-}
-// Copied from gemmlowp::RoundDown when we dropped direct dependency on
-// gemmlowp.
-//
-// Returns the runtime argument rounded down to the nearest multiple of
-// the fixed Modulus.
-template <unsigned Modulus, typename Integer>
-Integer RoundDown(Integer i) {
-  return i - (i % Modulus);
-}
-
-// Copied from gemmlowp::RoundUp when we dropped direct dependency on
-// gemmlowp.
-//
-// Returns the runtime argument rounded up to the nearest multiple of
-// the fixed Modulus.
-template <unsigned Modulus, typename Integer>
-Integer RoundUp(Integer i) {
-  return RoundDown<Modulus>(i + Modulus - 1);
-}
-
-// Copied from gemmlowp::CeilQuotient when we dropped direct dependency on
-// gemmlowp.
-//
-// Returns the quotient a / b rounded up ('ceil') to the nearest integer.
-template <typename Integer>
-Integer CeilQuotient(Integer a, Integer b) {
-  return (a + b - 1) / b;
-}
-
-// This function is a copy of gemmlowp::HowManyThreads, copied when we dropped
-// the direct dependency of internal/optimized/ on gemmlowp.
-//
-// It computes a reasonable number of threads to use for a GEMM of shape
-// (rows, cols, depth).
-//
-// TODO(b/131910176): get rid of this function by switching each call site
-// to its own more sensible logic for its own workload.
-template <int KernelRows>
-inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols,
-                                int depth) {
-  // Early-exit in the default case where multi-threading is disabled.
-  if (max_num_threads == 1) {
-    return 1;
-  }
-
-  // Ensure that each thread has KernelRows rows to process, if at all possible.
-  int thread_count = std::min(max_num_threads, rows / KernelRows);
-
-  // Limit the number of threads according to the overall size of the problem.
-  if (thread_count > 1) {
-    // Empirically determined value.
-    static constexpr std::uint64_t min_cubic_size_per_thread = 64 * 1024;
-
-    // We can only multiply two out of three sizes without risking overflow
-    const std::uint64_t cubic_size =
-        std::uint64_t(rows) * std::uint64_t(cols) * std::uint64_t(depth);
-
-    thread_count = std::min(
-        thread_count, static_cast<int>(cubic_size / min_cubic_size_per_thread));
-  }
-
-  if (thread_count < 1) {
-    thread_count = 1;
-  }
-
-  assert(thread_count > 0 && thread_count <= max_num_threads);
-  return thread_count;
-}
-
-template <typename T>
-void optimized_ops_preload_l1_stream(const T* ptr) {
-#ifdef __GNUC__
-  // builtin offered by GCC-compatible compilers including clang
-  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 0 means no locality */ 0);
-#else
-  (void)ptr;
-#endif
-}
-
-template <typename T>
-void optimized_ops_preload_l1_keep(const T* ptr) {
-#ifdef __GNUC__
-  // builtin offered by GCC-compatible compilers including clang
-  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
-#else
-  (void)ptr;
-#endif
-}
-
-template <typename T>
-void optimized_ops_prefetch_write_l1_keep(const T* ptr) {
-#ifdef __GNUC__
-  // builtin offered by GCC-compatible compilers including clang
-  __builtin_prefetch(ptr, /* 1 means write */ 1, /* 3 means high locality */ 3);
-#else
-  (void)ptr;
-#endif
-}
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/compatibility.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/compatibility.h
@@ -1,112 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
-
-#include <cstdint>
-
-#include "tensorflow/lite/kernels/op_macros.h"
-
-#ifndef TFLITE_DCHECK
-#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_EQ
-#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_NE
-#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_GE
-#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_GT
-#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_LE
-#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-#ifndef TFLITE_DCHECK_LT
-#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
-#endif
-
-// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
-#ifndef TFLITE_CHECK
-#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_EQ
-#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_NE
-#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_GE
-#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_GT
-#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_LE
-#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TFLITE_CHECK_LT
-#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
-#endif
-
-#ifndef TF_LITE_STATIC_MEMORY
-// TODO(b/162019032): Consider removing these type-aliases.
-using int8 = std::int8_t;
-using uint8 = std::uint8_t;
-using int16 = std::int16_t;
-using uint16 = std::uint16_t;
-using int32 = std::int32_t;
-using uint32 = std::uint32_t;
-#endif  // !defined(TF_LITE_STATIC_MEMORY)
-
-// TFLITE_DEPRECATED()
-//
-// Duplicated from absl/base/macros.h to avoid pulling in that library.
-// Marks a deprecated class, struct, enum, function, method and variable
-// declarations. The macro argument is used as a custom diagnostic message (e.g.
-// suggestion of a better alternative).
-//
-// Example:
-//
-//   class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
-//   TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
-//
-// Every usage of a deprecated entity will trigger a warning when compiled with
-// clang's `-Wdeprecated-declarations` option. This option is turned off by
-// default, but the warnings will be reported by clang-tidy.
-#if defined(__clang__) && __cplusplus >= 201103L
-#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
-#endif
-
-#ifndef TFLITE_DEPRECATED
-#define TFLITE_DEPRECATED(message)
-#endif
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/cppmath.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/cppmath.h
@@ -1,40 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
-
-#include <cmath>
-
-namespace tflite {
-
-#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) ||                           \
-    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
-    defined(__ZEPHYR__)
-#define TF_LITE_GLOBAL_STD_PREFIX
-#else
-#define TF_LITE_GLOBAL_STD_PREFIX std
-#endif
-
-#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
-  template <class T>                                  \
-  inline T tf_name(const T x) {                       \
-    return TF_LITE_GLOBAL_STD_PREFIX::std_name(x);    \
-  }
-
-DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/max.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/max.h
@@ -1,35 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
-
-#include <cmath>
-
-namespace tflite {
-
-#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
-inline float TfLiteMax(const float& x, const float& y) {
-  return std::max(x, y);
-}
-#else
-template <class T>
-inline T TfLiteMax(const T& x, const T& y) {
-  return std::fmax(x, y);
-}
-#endif
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/min.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/min.h
@@ -1,35 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
-
-#include <cmath>
-
-namespace tflite {
-
-#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
-inline float TfLiteMin(const float& x, const float& y) {
-  return std::min(x, y);
-}
-#else
-template <class T>
-inline T TfLiteMin(const T& x, const T& y) {
-  return std::fmin(x, y);
-}
-#endif
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/optimized/neon_check.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/optimized/neon_check.h
@@ -1,40 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
-
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
-#define USE_NEON
-#include <arm_neon.h>
-#endif
-
-#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
-#define USE_NEON
-#include "NEON_2_SSE.h"
-#endif
-
-// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
-// defined, PortableSomeFunc(args) otherwise.
-#ifdef USE_NEON
-// Always use Neon code
-#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
-
-#else
-// No NEON available: Use Portable code
-#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
-
-#endif  // defined(USE_NEON)
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/quantization_util.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/quantization_util.h
@@ -1,292 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
-
-#include <cmath>
-#include <cstdint>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-// Given the min and max values of a float array, return
-// reasonable quantization parameters to use for this array.
-template <typename T>
-QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
-                                            bool narrow_range) {
-  const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
-  const T qmax = std::numeric_limits<T>::max();
-  const double qmin_double = qmin;
-  const double qmax_double = qmax;
-  // 0 should always be a representable value. Let's assume that the initial
-  // min,max range contains 0.
-  TFLITE_CHECK_LE(rmin, 0.);
-  TFLITE_CHECK_GE(rmax, 0.);
-  if (rmin == rmax) {
-    // Special case where the min,max range is a point. Should be {0}.
-    TFLITE_CHECK_EQ(rmin, 0.);
-    TFLITE_CHECK_EQ(rmax, 0.);
-    QuantizationParams quantization_params;
-    quantization_params.zero_point = 0;
-    quantization_params.scale = 0.;
-    return quantization_params;
-  }
-
-  // General case.
-  //
-  // First determine the scale.
-  const double scale = (rmax - rmin) / (qmax_double - qmin_double);
-
-  // Zero-point computation.
-  // First the initial floating-point computation. The zero-point can be
-  // determined from solving an affine equation for any known pair
-  // (real value, corresponding quantized value).
-  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
-  // The arithmetic error on the zero point computed from either pair
-  // will be roughly machine_epsilon * (sum of absolute values of terms)
-  // so we want to use the variant that adds the smaller terms.
-  const double zero_point_from_min = qmin_double - rmin / scale;
-  const double zero_point_from_max = qmax_double - rmax / scale;
-  const double zero_point_from_min_error =
-      std::abs(qmin_double) + std::abs(rmin / scale);
-  const double zero_point_from_max_error =
-      std::abs(qmax_double) + std::abs(rmax / scale);
-
-  const double zero_point_double =
-      zero_point_from_min_error < zero_point_from_max_error
-          ? zero_point_from_min
-          : zero_point_from_max;
-
-  // Now we need to nudge the zero point to be an integer
-  // (our zero points are integer, and this is motivated by the requirement
-  // to be able to represent the real value "0" exactly as a quantized value,
-  // which is required in multiple places, for example in Im2col with SAME
-  // padding).
-  T nudged_zero_point = 0;
-  if (zero_point_double < qmin_double) {
-    nudged_zero_point = qmin;
-  } else if (zero_point_double > qmax_double) {
-    nudged_zero_point = qmax;
-  } else {
-    nudged_zero_point = static_cast<T>(round(zero_point_double));
-  }
-  // The zero point should always be in the range of quantized value,
-  // [qmin, qmax].
-  TFLITE_CHECK_GE(nudged_zero_point, qmin);
-  TFLITE_CHECK_LE(nudged_zero_point, qmax);
-
-  // Finally, store the result nudged quantization params.
-  QuantizationParams quantization_params;
-  quantization_params.zero_point = nudged_zero_point;
-  quantization_params.scale = scale;
-  return quantization_params;
-}
-
-template <typename T>
-QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
-  return ChooseQuantizationParams<T>(rmin, rmax, false);
-}
-
-// Converts a floating-point number to an integer. For all inputs x where
-// static_cast<IntOut>(x) is legal according to the C++ standard, the result
-// is identical to that cast (i.e. the result is x with its fractional part
-// truncated whenever that is representable as IntOut).
-//
-// static_cast would cause undefined behavior for the following cases, which
-// have well-defined behavior for this function:
-//
-//  1. If x is NaN, the result is zero.
-//
-//  2. If the truncated form of x is above the representable range of IntOut,
-//     the result is std::numeric_limits<IntOut>::max().
-//
-//  3. If the truncated form of x is below the representable range of IntOut,
-//     the result is std::numeric_limits<IntOut>::min().
-//
-// Note that cases #2 and #3 cover infinities as well as finite numbers.
-//
-// The range of FloatIn must include the range of IntOut, otherwise
-// the results are undefined.
-// TODO(sfeuz): Replace by absl::SafeCast once available.
-template <class IntOut, class FloatIn>
-IntOut SafeCast(FloatIn x) {
-  static_assert(!std::numeric_limits<FloatIn>::is_integer,
-                "FloatIn is integer");
-  static_assert(std::numeric_limits<IntOut>::is_integer,
-                "IntOut is not integer");
-  static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
-
-  // Special case NaN, for which the logic below doesn't work.
-  if (std::isnan(x)) {
-    return 0;
-  }
-
-  // Negative values all clip to zero for unsigned results.
-  if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
-    return 0;
-  }
-
-  // Handle infinities.
-  if (std::isinf(x)) {
-    return x < 0 ? std::numeric_limits<IntOut>::min()
-                 : std::numeric_limits<IntOut>::max();
-  }
-
-  // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
-  // unless x is zero in which case exp == 0. Note that this implies that the
-  // magnitude of x is strictly less than 2^exp.
-  int exp = 0;
-  std::frexp(x, &exp);
-
-  // Let N be the number of non-sign bits in the representation of IntOut. If
-  // the magnitude of x is strictly less than 2^N, the truncated version of x
-  // is representable as IntOut. The only representable integer for which this
-  // is not the case is kMin for signed types (i.e. -2^N), but that is covered
-  // by the fall-through below.
-  if (exp <= std::numeric_limits<IntOut>::digits) {
-    return x;
-  }
-
-  // Handle numbers with magnitude >= 2^N.
-  return x < 0 ? std::numeric_limits<IntOut>::min()
-               : std::numeric_limits<IntOut>::max();
-}
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of NEGATIVE its exponent ---
-// this is intended as a RIGHT-shift.
-//
-// Restricted to the case where the multiplier < 1 (and non-negative).
-void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
-                                         int32_t* quantized_multiplier,
-                                         int* left_shift);
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of its exponent.
-//
-// Restricted to the case where the multiplier > 1.
-void QuantizeMultiplierGreaterThanOne(double double_multiplier,
-                                      int32_t* quantized_multiplier,
-                                      int* left_shift);
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of its exponent.
-//
-// Handles an arbitrary positive multiplier. The 'shift' output-value is
-// basically the 'floating-point exponent' of the multiplier:
-// Negative for a right-shift (when the multiplier is <1), positive for a
-// left-shift (when the multiplier is >1)
-void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
-                        int* shift);
-
-// Splits a double input value into a returned fraction, and a shift value from
-// the exponent, using only bitwise and integer operations to support
-// microcontrollers and other environments without floating-point support.
-//
-// This is designed to be a replacement for how std::frexp() is used within the
-// QuantizeMultiplier() function, and so has a different signature than the
-// standard version, returning a 64-bit integer rather than a double. This
-// result has a maximum value of 1<<31, with the fraction expressed as a
-// proportion of that maximum.
-//
-// std::frexp() returns NaNs and infinities unmodified, but since we're
-// returning integers that can't represent those values, instead we return
-// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
-// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
-// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
-// result in return values that end up truncating some bits at the end,
-// reflecting the loss of precision inherent in denormalization.
-int64_t IntegerFrExp(double input, int* shift);
-
-// Converts an integer fraction in the format produced by IntegerFrExp (where
-// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
-// IEEE binary64 double format result. The implementation uses only integer and
-// bitwise operators, so no floating point hardware support or emulation is
-// needed. This is here so quantized operations can run non-time-critical
-// preparation calculations on microcontrollers and other platforms without
-// float support.
-double DoubleFromFractionAndShift(int64_t fraction, int shift);
-
-// Performs a multiplication of two numbers in double format, using only integer
-// and bitwise instructions. This is aimed at supporting housekeeping functions
-// for quantized operations on microcontrollers without floating-point hardware.
-double IntegerDoubleMultiply(double a, double b);
-
-// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
-// greater than b. It is implemented using only integer and logical instructions
-// so that it can be easily run on microcontrollers for quantized operations.
-int IntegerDoubleCompare(double a, double b);
-
-// This first creates a multiplier in a double equivalent of
-// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
-// precision in the double's fractional bits.  It then splits the result into
-// significand and exponent.
-void PreprocessSoftmaxScaling(double beta, double input_scale,
-                              int input_integer_bits,
-                              int32_t* quantized_multiplier, int* left_shift);
-// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
-void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
-                                    int input_integer_bits,
-                                    int32_t* quantized_multiplier,
-                                    int* left_shift,
-                                    int32_t* reverse_scaling_divisor,
-                                    int* reverse_scaling_left_shift);
-// Calculate the largest input that will result in a within-bounds intermediate
-// result within MultiplyByQuantizedMultiplierGreaterThanOne.  In other words,
-// it must not overflow before we reduce the value by multiplication by the
-// input multiplier.  The negative radius is used as the minimum difference in
-// Softmax.
-int CalculateInputRadius(int input_integer_bits, int input_left_shift,
-                         int total_signed_bits = 31);
-
-// Nudges a min/max quantization range to ensure zero is zero.
-// Gymnastics with nudged zero point is to ensure that real zero maps to
-// an integer, which is required for e.g. zero-padding in convolutional layers.
-// Outputs nudged_min, nudged_max, nudged_scale.
-void NudgeQuantizationRange(const float min, const float max,
-                            const int quant_min, const int quant_max,
-                            float* nudged_min, float* nudged_max,
-                            float* nudged_scale);
-
-// Fake quantizes (quantizes and dequantizes) input_data using the scale,
-// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
-// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
-void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
-                       const float nudged_max, const float* input_data,
-                       float* output_data, const float size);
-
-// If x is approximately a power of two (with any positive or negative
-// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
-// returns false.
-bool CheckedLog2(const float x, int* log2_result);
-
-// Decomposes an array of double multipliers into a Q0.31 int32 representation
-// of its significand, and shift representation of its exponent.
-//
-// Handles an arbitrary multiplier. The 'shift' output-value is
-// basically the 'floating-point exponent' of the multiplier:
-// Negative for a right-shift (when the multiplier is <1), positive for a
-// left-shift (when the multiplier is >1)
-void QuantizeMultiplierArray(const double* effective_scales, size_t size,
-                             int32_t* effective_scale_significand,
-                             int* effective_shift);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/add.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/add.h
@@ -1,454 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T>
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const T* input1_data,
-                const RuntimeShape& input2_shape, const T* input2_data,
-                const RuntimeShape& output_shape, T* output_data) {
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = ActivationFunctionWithMinMax(
-        input1_data[i] + input2_data[i], params.quantized_activation_min,
-        params.quantized_activation_max);
-  }
-}
-
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const float* input1_data,
-                const RuntimeShape& input2_shape, const float* input2_data,
-                const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; i++) {
-    auto x = input1_data[i] + input2_data[i];
-    output_data[i] = ActivationFunctionWithMinMax(
-        x, params.float_activation_min, params.float_activation_max);
-  }
-}
-
-// Element-wise add that can often be used for inner loop of broadcast add as
-// well as the non-broadcast add.
-
-// This function is used for 8-bit as well as for 16-bit, but the accumulator
-// is 32-bit for both cases. The overflow does not happen due to the
-// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
-template <typename T>
-inline void AddElementwise(int size, const ArithmeticParams& params,
-                           const T* input1_data, const T* input2_data,
-                           T* output_data) {
-  TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
-  TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
-  TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
-  TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
-
-  for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sum, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<T>(clamped_output);
-  }
-}
-
-// Scalar-broadcast add that can be used for inner loop of more general
-// broadcast add, so that, for example, scalar-broadcast with batch will still
-// be fast.
-inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
-                               uint8_t input1_data, const uint8_t* input2_data,
-                               uint8_t* output_data) {
-  TFLITE_DCHECK_GT(params.input1_offset, -256);
-  TFLITE_DCHECK_GT(params.input2_offset, -256);
-  TFLITE_DCHECK_LT(params.input1_offset, 256);
-  TFLITE_DCHECK_LT(params.input2_offset, 256);
-
-  const int32_t input1_val = params.input1_offset + input1_data;
-  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-  const int32_t scaled_input1_val =
-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          shifted_input1_val, params.input1_multiplier, params.input1_shift);
-  for (int i = 0; i < size; ++i) {
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sum, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<uint8_t>(clamped_output);
-  }
-}
-
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
-                const RuntimeShape& output_shape, uint8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  TFLITE_DCHECK_GT(params.input1_offset, -256);
-  TFLITE_DCHECK_GT(params.input2_offset, -256);
-  TFLITE_DCHECK_LT(params.input1_offset, 256);
-  TFLITE_DCHECK_LT(params.input2_offset, 256);
-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-inline void AddGeneralParamScale(const ArithmeticParams& params,
-                                 const RuntimeShape& input1_shape,
-                                 const int16_t* input1_data,
-                                 const RuntimeShape& input2_shape,
-                                 const int16_t* input2_data,
-                                 const RuntimeShape& output_shape,
-                                 int16_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  int max_value = std::numeric_limits<int16_t>::max();
-
-  TFLITE_DCHECK_GT(params.input1_offset, -max_value);
-  TFLITE_DCHECK_GT(params.input2_offset, -max_value);
-  TFLITE_DCHECK_LT(params.input1_offset, max_value);
-  TFLITE_DCHECK_LT(params.input2_offset, max_value);
-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const int16_t* input1_data,
-                const RuntimeShape& input2_shape, const int16_t* input2_data,
-                const RuntimeShape& output_shape, int16_t* output_data,
-                bool pot_scale = true) {
-  if (!pot_scale) {
-    AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
-                         input2_data, output_shape, output_data);
-    return;
-  }
-
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-
-  const int input1_shift = params.input1_shift;
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  const int16_t output_activation_min = params.quantized_activation_min;
-  const int16_t output_activation_max = params.quantized_activation_max;
-
-  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
-  TFLITE_DCHECK_LE(input1_shift, 0);
-  TFLITE_DCHECK_LE(params.input2_shift, 0);
-  const int16_t* not_shift_input =
-      input1_shift == 0 ? input1_data : input2_data;
-  const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
-  const int input_right_shift =
-      input1_shift == 0 ? -params.input2_shift : -input1_shift;
-
-  for (int i = 0; i < flat_size; i++) {
-    // F0 uses 0 integer bits, range [-1, 1].
-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-
-    F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
-    F0 scaled_input = F0::FromRaw(
-        gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
-    F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
-    const int16_t raw_output = result.raw();
-    const int16_t clamped_output = std::min(
-        output_activation_max, std::max(output_activation_min, raw_output));
-    output_data[i] = clamped_output;
-  }
-}
-
-// TODO(jiawen): We can implement BroadcastAdd on buffers of arbitrary
-// dimensionality if the runtime code does a single loop over one dimension
-// that handles broadcasting as the base case. The code generator would then
-// generate max(D1, D2) nested for loops.
-// TODO(benoitjacob): BroadcastAdd is intentionally duplicated from
-// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
-// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
-// reference_ops.h.
-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
-                               const RuntimeShape& input1_shape,
-                               const float* input1_data,
-                               const RuntimeShape& input2_shape,
-                               const float* input2_data,
-                               const RuntimeShape& output_shape,
-                               float* output_data) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              ActivationFunctionWithMinMax(
-                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
-                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
-                  params.float_activation_min, params.float_activation_max);
-        }
-      }
-    }
-  }
-}
-
-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
-                               const RuntimeShape& input1_shape,
-                               const int32_t* input1_data,
-                               const RuntimeShape& input2_shape,
-                               const int32_t* input2_data,
-                               const RuntimeShape& output_shape,
-                               int32_t* output_data) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              ActivationFunctionWithMinMax(
-                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
-                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
-                  params.quantized_activation_min,
-                  params.quantized_activation_max);
-        }
-      }
-    }
-  }
-}
-
-// This function is used for 8-bit as well as for 16-bit, but the accumulator
-// is 32-bit for both cases. The overflow does not happen due to the
-// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
-template <typename T>
-inline void BroadcastAdd4DSlow(
-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
-    const T* input1_data, const RuntimeShape& input2_shape,
-    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          const int32_t input1_val =
-              params.input1_offset +
-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
-          const int32_t input2_val =
-              params.input2_offset +
-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
-          const int32_t shifted_input1_val =
-              input1_val * (1 << params.left_shift);
-          const int32_t shifted_input2_val =
-              input2_val * (1 << params.left_shift);
-          const int32_t scaled_input1_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input1_val, params.input1_multiplier,
-                  params.input1_shift);
-          const int32_t scaled_input2_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input2_val, params.input2_multiplier,
-                  params.input2_shift);
-          const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-          const int32_t raw_output =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  raw_sum, params.output_multiplier, params.output_shift) +
-              params.output_offset;
-          const int32_t clamped_output =
-              std::min(params.quantized_activation_max,
-                       std::max(params.quantized_activation_min, raw_output));
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              static_cast<T>(clamped_output);
-        }
-      }
-    }
-  }
-}
-
-inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
-                                 const RuntimeShape& unswitched_input1_shape,
-                                 const uint8_t* unswitched_input1_data,
-                                 const RuntimeShape& unswitched_input2_shape,
-                                 const uint8_t* unswitched_input2_data,
-                                 const RuntimeShape& output_shape,
-                                 uint8_t* output_data) {
-  ArithmeticParams switched_params = unswitched_params;
-  switched_params.input1_offset = unswitched_params.input2_offset;
-  switched_params.input1_multiplier = unswitched_params.input2_multiplier;
-  switched_params.input1_shift = unswitched_params.input2_shift;
-  switched_params.input2_offset = unswitched_params.input1_offset;
-  switched_params.input2_multiplier = unswitched_params.input1_multiplier;
-  switched_params.input2_shift = unswitched_params.input1_shift;
-
-  const bool use_unswitched =
-      unswitched_params.broadcast_category ==
-      tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
-
-  const ArithmeticParams& params =
-      use_unswitched ? unswitched_params : switched_params;
-  const uint8_t* input1_data =
-      use_unswitched ? unswitched_input1_data : unswitched_input2_data;
-  const uint8_t* input2_data =
-      use_unswitched ? unswitched_input2_data : unswitched_input1_data;
-
-  // Fivefold nested loops. The second input resets its position for each
-  // iteration of the second loop. The first input resets its position at the
-  // beginning of the fourth loop. The innermost loop is an elementwise add of
-  // sections of the arrays.
-  uint8_t* output_data_ptr = output_data;
-  const uint8_t* input1_data_ptr = input1_data;
-  const uint8_t* input2_data_reset = input2_data;
-  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
-  // between input shapes. y3 for input 1 is always broadcast, and so the
-  // dimension there is 1, whereas optionally y1 might be broadcast for input 2.
-  // Put another way,
-  // input1.shape.FlatSize = y0 * y1 * y2 * y4,
-  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
-  int y0 = params.broadcast_shape[0];
-  int y1 = params.broadcast_shape[1];
-  int y2 = params.broadcast_shape[2];
-  int y3 = params.broadcast_shape[3];
-  int y4 = params.broadcast_shape[4];
-  if (y4 > 1) {
-    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
-    // dimension.
-    for (int i0 = 0; i0 < y0; ++i0) {
-      const uint8_t* input2_data_ptr;
-      for (int i1 = 0; i1 < y1; ++i1) {
-        input2_data_ptr = input2_data_reset;
-        for (int i2 = 0; i2 < y2; ++i2) {
-          for (int i3 = 0; i3 < y3; ++i3) {
-            AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
-                           output_data_ptr);
-            input2_data_ptr += y4;
-            output_data_ptr += y4;
-          }
-          // We have broadcast y4 of input1 data y3 times, and now move on.
-          input1_data_ptr += y4;
-        }
-      }
-      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
-      input2_data_reset = input2_data_ptr;
-    }
-  } else {
-    // Special case of y4 == 1, in which the innermost loop is a single element
-    // and can be combined with the next (y3) as an inner broadcast.
-    //
-    // Note that this handles the case of pure scalar broadcast when
-    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
-    // broadcast with batch (as y2 > 1).
-    //
-    // NOTE The process is the same as the above general case except simplified
-    // for y4 == 1 and the loop over y3 is contained within the
-    // AddScalarBroadcast function.
-    for (int i0 = 0; i0 < y0; ++i0) {
-      const uint8_t* input2_data_ptr;
-      for (int i1 = 0; i1 < y1; ++i1) {
-        input2_data_ptr = input2_data_reset;
-        for (int i2 = 0; i2 < y2; ++i2) {
-          AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
-                             output_data_ptr);
-          input2_data_ptr += y3;
-          output_data_ptr += y3;
-          input1_data_ptr += 1;
-        }
-      }
-      input2_data_reset = input2_data_ptr;
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h
@@ -1,68 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T1, typename T2, typename T3, typename Cmp>
-void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
-               const T3* input2_data, const RuntimeShape& output_shape,
-               T2* output_data, const Cmp& cmp) {
-  TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
-  TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
-                   output_shape.DimensionsCount());
-  int axis = input2_data[0];
-  if (axis < 0) {
-    axis += input1_shape.DimensionsCount();
-  }
-  const int axis_size = input1_shape.Dims(axis);
-
-  int outer_size = 1;
-  for (int i = 0; i < axis; ++i) {
-    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
-    outer_size *= input1_shape.Dims(i);
-  }
-
-  int inner_size = 1;
-  const int dims_count = input1_shape.DimensionsCount();
-  for (int i = axis + 1; i < dims_count; ++i) {
-    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
-    inner_size *= input1_shape.Dims(i);
-  }
-  for (int outer = 0; outer < outer_size; ++outer) {
-    for (int inner = 0; inner < inner_size; ++inner) {
-      auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
-      T2 min_max_index = 0;
-      for (int i = 1; i < axis_size; ++i) {
-        const auto& curr_value =
-            input1_data[(outer * axis_size + i) * inner_size + inner];
-        if (cmp(curr_value, min_max_value)) {
-          min_max_value = curr_value;
-          min_max_index = static_cast<T2>(i);
-        }
-      }
-      output_data[outer * inner_size + inner] = min_max_index;
-    }
-  }
-}
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/binary_function.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/binary_function.h
@@ -1,84 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// TODO(ycling): Refactoring. Remove BroadcastLogical and use the more
-// generalized and efficient BroadcastBinaryFunction.
-//
-// Also appears to duplicate MinimumMaximum.
-//
-// R: Result type. T1: Input 1 type. T2: Input 2 type.
-template <typename R, typename T1, typename T2>
-inline void BroadcastBinaryFunction4DSlow(
-    const RuntimeShape& unextended_input1_shape, const T1* input1_data,
-    const RuntimeShape& unextended_input2_shape, const T2* input2_data,
-    const RuntimeShape& unextended_output_shape, R* output_data,
-    R (*func)(T1, T2)) {
-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
-                                      unextended_input2_shape, &desc1, &desc2);
-
-  for (int b = 0; b < output_shape.Dims(0); ++b) {
-    for (int y = 0; y < output_shape.Dims(1); ++y) {
-      for (int x = 0; x < output_shape.Dims(2); ++x) {
-        for (int c = 0; c < output_shape.Dims(3); ++c) {
-          auto out_idx = Offset(output_shape, b, y, x, c);
-          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
-          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
-          auto in1_val = input1_data[in1_idx];
-          auto in2_val = input2_data[in2_idx];
-          output_data[out_idx] = func(in1_val, in2_val);
-        }
-      }
-    }
-  }
-}
-
-// R: Result type. T1: Input 1 type. T2: Input 2 type.
-// TODO(renjieliu): Refactor other binary functions to use this one.
-template <typename R, typename T1, typename T2>
-inline void BinaryFunction(const RuntimeShape& input1_shape,
-                           const T1* input1_data,
-                           const RuntimeShape& input2_shape,
-                           const T2* input2_data,
-                           const RuntimeShape& output_shape, R* output_data,
-                           R (*func)(T1, T2)) {
-  const int flat_size =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = func(input1_data[i], input2_data[i]);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/ceil.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/ceil.h
@@ -1,37 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
-
-#include <cmath>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
-                 const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = std::ceil(input_data[i]);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/comparisons.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/comparisons.h
@@ -1,334 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-#include "tensorflow/lite/string_util.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T>
-inline bool EqualFn(T lhs, T rhs) {
-  return lhs == rhs;
-}
-
-template <typename T>
-inline bool NotEqualFn(T lhs, T rhs) {
-  return lhs != rhs;
-}
-
-template <typename T>
-inline bool GreaterFn(T lhs, T rhs) {
-  return lhs > rhs;
-}
-template <typename T>
-inline bool GreaterEqualFn(T lhs, T rhs) {
-  return lhs >= rhs;
-}
-template <typename T>
-inline bool LessFn(T lhs, T rhs) {
-  return lhs < rhs;
-}
-template <typename T>
-inline bool LessEqualFn(T lhs, T rhs) {
-  return lhs <= rhs;
-}
-
-inline bool StringRefEqualFn(const StringRef& lhs, const StringRef& rhs) {
-  if (lhs.len != rhs.len) return false;
-  for (int i = 0; i < lhs.len; ++i) {
-    if (lhs.str[i] != rhs.str[i]) return false;
-  }
-  return true;
-}
-
-inline bool StringRefNotEqualFn(const StringRef& lhs, const StringRef& rhs) {
-  return !StringRefEqualFn(lhs, rhs);
-}
-
-template <typename T>
-using ComparisonFn = bool (*)(T, T);
-
-template <typename T, ComparisonFn<T> F>
-inline void ComparisonImpl(
-    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
-    const T* input1_data, const RuntimeShape& input2_shape,
-    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
-  const int64_t flatsize =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int64_t i = 0; i < flatsize; ++i) {
-    output_data[i] = F(input1_data[i], input2_data[i]);
-  }
-}
-
-inline void ComparisonStringImpl(bool (*F)(const StringRef&, const StringRef&),
-                                 const RuntimeShape& input1_shape,
-                                 const TfLiteTensor* input1,
-                                 const RuntimeShape& input2_shape,
-                                 const TfLiteTensor* input2,
-                                 const RuntimeShape& output_shape,
-                                 bool* output_data) {
-  const int64_t flatsize =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int64_t i = 0; i < flatsize; ++i) {
-    const auto lhs = GetString(input1, i);
-    const auto rhs = GetString(input2, i);
-    output_data[i] = F(lhs, rhs);
-  }
-}
-
-template <ComparisonFn<float> F>
-inline void Comparison(const ComparisonParams& op_params,
-                       const RuntimeShape& input1_shape,
-                       const float* input1_data,
-                       const RuntimeShape& input2_shape,
-                       const float* input2_data,
-                       const RuntimeShape& output_shape, bool* output_data) {
-  ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
-                           input2_data, output_shape, output_data);
-}
-
-template <typename T, ComparisonFn<int32_t> F>
-inline void ComparisonWithScaling(
-    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
-    const T* input1_data, const RuntimeShape& input2_shape,
-    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
-  int left_shift = op_params.left_shift;
-  int32_t input1_offset = op_params.input1_offset;
-  int32_t input1_multiplier = op_params.input1_multiplier;
-  int input1_shift = op_params.input1_shift;
-  int32_t input2_offset = op_params.input2_offset;
-  int32_t input2_multiplier = op_params.input2_multiplier;
-  int input2_shift = op_params.input2_shift;
-
-  const int64_t flatsize =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int64_t i = 0; i < flatsize; ++i) {
-    const int32_t input1_val = input1_offset + input1_data[i];
-    const int32_t input2_val = input2_offset + input2_data[i];
-    const int32_t shifted_input1_val = input1_val * (1 << left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, input1_multiplier, input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, input2_multiplier, input2_shift);
-    output_data[i] = F(scaled_input1_val, scaled_input2_val);
-  }
-}
-
-struct BroadcastComparison4DSlowCommon {
-  const RuntimeShape output_shape;
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-};
-
-inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
-    const RuntimeShape& unextended_input1_shape,
-    const RuntimeShape& unextended_input2_shape,
-    const RuntimeShape& unextended_output_shape) {
-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
-                                      unextended_input2_shape, &desc1, &desc2);
-  return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
-          desc2};
-}
-
-template <typename T, ComparisonFn<T> F>
-inline void BroadcastComparison4DSlowImpl(
-    const ComparisonParams& op_params,
-    const RuntimeShape& unextended_input1_shape, const T* input1_data,
-    const RuntimeShape& unextended_input2_shape, const T* input2_data,
-    const RuntimeShape& unextended_output_shape, bool* output_data) {
-  const BroadcastComparison4DSlowCommon dims =
-      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
-                                          unextended_input2_shape,
-                                          unextended_output_shape);
-
-  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
-    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
-      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
-        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
-          output_data[Offset(dims.output_shape, b, y, x, c)] =
-              F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
-                input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
-        }
-      }
-    }
-  }
-}
-
-inline void BroadcastComparison4DSlowStringImpl(
-    bool (*F)(const StringRef&, const StringRef&),
-    const RuntimeShape& unextended_input1_shape, const TfLiteTensor* input1,
-    const RuntimeShape& unextended_input2_shape, const TfLiteTensor* input2,
-    const RuntimeShape& unextended_output_shape, bool* output_data) {
-  const BroadcastComparison4DSlowCommon dims =
-      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
-                                          unextended_input2_shape,
-                                          unextended_output_shape);
-
-  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
-    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
-      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
-        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
-          const auto lhs =
-              GetString(input1, SubscriptToIndex(dims.desc1, b, y, x, c));
-          const auto rhs =
-              GetString(input2, SubscriptToIndex(dims.desc2, b, y, x, c));
-          output_data[Offset(dims.output_shape, b, y, x, c)] = F(lhs, rhs);
-        }
-      }
-    }
-  }
-}
-
-template <ComparisonFn<float> F>
-inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
-                                      const RuntimeShape& input1_shape,
-                                      const float* input1_data,
-                                      const RuntimeShape& input2_shape,
-                                      const float* input2_data,
-                                      const RuntimeShape& output_shape,
-                                      bool* output_data) {
-  BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
-                                          input2_shape, input2_data,
-                                          output_shape, output_data);
-}
-
-template <typename T, ComparisonFn<int32_t> F>
-inline void BroadcastComparison4DSlowWithScaling(
-    const ComparisonParams& op_params,
-    const RuntimeShape& unextended_input1_shape, const T* input1_data,
-    const RuntimeShape& unextended_input2_shape, const T* input2_data,
-    const RuntimeShape& unextended_output_shape, bool* output_data) {
-  const BroadcastComparison4DSlowCommon dims =
-      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
-                                          unextended_input2_shape,
-                                          unextended_output_shape);
-
-  int left_shift = op_params.left_shift;
-  int32_t input1_offset = op_params.input1_offset;
-  int32_t input1_multiplier = op_params.input1_multiplier;
-  int input1_shift = op_params.input1_shift;
-  int32_t input2_offset = op_params.input2_offset;
-  int32_t input2_multiplier = op_params.input2_multiplier;
-  int input2_shift = op_params.input2_shift;
-
-  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
-    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
-      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
-        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
-          const int32_t input1_val =
-              input1_offset +
-              input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
-          const int32_t input2_val =
-              input2_offset +
-              input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
-          const int32_t shifted_input1_val = input1_val * (1 << left_shift);
-          const int32_t shifted_input2_val = input2_val * (1 << left_shift);
-          const int32_t scaled_input1_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input1_val, input1_multiplier, input1_shift);
-          const int32_t scaled_input2_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input2_val, input2_multiplier, input2_shift);
-          output_data[Offset(dims.output_shape, b, y, x, c)] =
-              F(scaled_input1_val, scaled_input2_val);
-        }
-      }
-    }
-  }
-}
-
-#define TFLITE_COMPARISON_OP(name)                                             \
-  inline void name(const ComparisonParams& op_params,                          \
-                   const RuntimeShape& input1_shape, const float* input1_data, \
-                   const RuntimeShape& input2_shape, const float* input2_data, \
-                   const RuntimeShape& output_shape, bool* output_data) {      \
-    Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape,   \
-                         input2_data, output_shape, output_data);              \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void name##NoScaling(                                                 \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const T* input1_data, const RuntimeShape& input2_shape,                  \
-      const T* input2_data, const RuntimeShape& output_shape,                  \
-      bool* output_data) {                                                     \
-    ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data,          \
-                                input2_shape, input2_data, output_shape,       \
-                                output_data);                                  \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void name##WithScaling(                                               \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const T* input1_data, const RuntimeShape& input2_shape,                  \
-      const T* input2_data, const RuntimeShape& output_shape,                  \
-      bool* output_data) {                                                     \
-    ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data,   \
-                                       input2_shape, input2_data,              \
-                                       output_shape, output_data);             \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void Broadcast4DSlow##name##NoScaling(                                \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const T* input1_data, const RuntimeShape& input2_shape,                  \
-      const T* input2_data, const RuntimeShape& output_shape,                  \
-      bool* output_data) {                                                     \
-    BroadcastComparison4DSlowImpl<T, name##Fn>(                                \
-        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
-        output_shape, output_data);                                            \
-  }                                                                            \
-  inline void Broadcast4DSlow##name(                                           \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const float* input1_data, const RuntimeShape& input2_shape,              \
-      const float* input2_data, const RuntimeShape& output_shape,              \
-      bool* output_data) {                                                     \
-    BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data,  \
-                                        input2_shape, input2_data,             \
-                                        output_shape, output_data);            \
-  }                                                                            \
-  template <typename T>                                                        \
-  inline void Broadcast4DSlow##name##WithScaling(                              \
-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
-      const T* input1_data, const RuntimeShape& input2_shape,                  \
-      const T* input2_data, const RuntimeShape& output_shape,                  \
-      bool* output_data) {                                                     \
-    BroadcastComparison4DSlowWithScaling<T, name##Fn>(                         \
-        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
-        output_shape, output_data);                                            \
-  }
-TFLITE_COMPARISON_OP(Equal);
-TFLITE_COMPARISON_OP(NotEqual);
-TFLITE_COMPARISON_OP(Greater);
-TFLITE_COMPARISON_OP(GreaterEqual);
-TFLITE_COMPARISON_OP(Less);
-TFLITE_COMPARISON_OP(LessEqual);
-#undef TFLITE_COMPARISON_OP
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/concatenation.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/concatenation.h
@@ -1,140 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename Scalar>
-inline void Concatenation(const ConcatenationParams& params,
-                          const RuntimeShape* const* input_shapes,
-                          const Scalar* const* input_data,
-                          const RuntimeShape& output_shape,
-                          Scalar* output_data) {
-  int axis = params.axis;
-  int inputs_count = params.inputs_count;
-  const int concat_dimensions = output_shape.DimensionsCount();
-  TFLITE_DCHECK_LT(axis, concat_dimensions);
-
-  int64_t concat_size = 0;
-  for (int i = 0; i < inputs_count; i++) {
-    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
-    for (int j = 0; j < concat_dimensions; j++) {
-      if (j != axis) {
-        MatchingDim(*input_shapes[i], j, output_shape, j);
-      }
-    }
-    concat_size += input_shapes[i]->Dims(axis);
-  }
-  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
-  int64_t outer_size = 1;
-  for (int i = 0; i < axis; ++i) {
-    outer_size *= output_shape.Dims(i);
-  }
-  // For all input arrays,
-  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
-  int64_t base_inner_size = 1;
-  for (int i = axis + 1; i < concat_dimensions; ++i) {
-    base_inner_size *= output_shape.Dims(i);
-  }
-
-  Scalar* output_ptr = output_data;
-  for (int k = 0; k < outer_size; k++) {
-    for (int i = 0; i < inputs_count; ++i) {
-      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
-      const Scalar* input_ptr = input_data[i] + k * copy_size;
-      memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
-      output_ptr += copy_size;
-    }
-  }
-}
-
-// TODO(prabhumk): This is the same as the optimized implementation.
-// TODO(prabhumk): The quantized implementation of concatentation isn't fully
-// quantized as it takes scale as a floating point value. This should be fixed
-// when optimizng this routine further.
-inline void ConcatenationWithScaling(const ConcatenationParams& params,
-                                     const RuntimeShape* const* input_shapes,
-                                     const uint8_t* const* input_data,
-                                     const RuntimeShape& output_shape,
-                                     uint8_t* output_data) {
-  int axis = params.axis;
-  const int32_t* input_zeropoint = params.input_zeropoint;
-  const float* input_scale = params.input_scale;
-  int inputs_count = params.inputs_count;
-  const int32_t output_zeropoint = params.output_zeropoint;
-  const float output_scale = params.output_scale;
-
-  const int concat_dimensions = output_shape.DimensionsCount();
-  TFLITE_DCHECK_LT(axis, concat_dimensions);
-
-  int64_t concat_size = 0;
-  for (int i = 0; i < inputs_count; i++) {
-    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
-    for (int j = 0; j < concat_dimensions; j++) {
-      if (j != axis) {
-        MatchingDim(*input_shapes[i], j, output_shape, j);
-      }
-    }
-    concat_size += input_shapes[i]->Dims(axis);
-  }
-  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
-  int64_t outer_size = 1;
-  for (int i = 0; i < axis; ++i) {
-    outer_size *= output_shape.Dims(i);
-  }
-  // For all input arrays,
-  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
-  int64_t base_inner_size = 1;
-  for (int i = axis + 1; i < concat_dimensions; ++i) {
-    base_inner_size *= output_shape.Dims(i);
-  }
-
-  const float inverse_output_scale = 1.f / output_scale;
-  uint8_t* output_ptr = output_data;
-  for (int k = 0; k < outer_size; k++) {
-    for (int i = 0; i < inputs_count; ++i) {
-      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
-      const uint8_t* input_ptr = input_data[i] + k * copy_size;
-      if (input_zeropoint[i] == output_zeropoint &&
-          input_scale[i] == output_scale) {
-        memcpy(output_ptr, input_ptr, copy_size);
-      } else {
-        const float scale = input_scale[i] * inverse_output_scale;
-        const float bias = -input_zeropoint[i] * scale;
-        for (int j = 0; j < copy_size; ++j) {
-          const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
-                                    input_ptr[j] * scale + bias)) +
-                                output_zeropoint;
-          output_ptr[j] = static_cast<uint8_t>(
-              std::max<int32_t>(std::min<int32_t>(255, value), 0));
-        }
-      }
-      output_ptr += copy_size;
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/conv.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/conv.h
@@ -1,262 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
-
-#include "tensorflow/lite/kernels/internal/types.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-
-
-
-namespace tflite {
-
-namespace reference_ops {
-
-
-inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
-                 const float* input_data, const RuntimeShape& filter_shape,
-                 const float* filter_data, const RuntimeShape& bias_shape,
-                 const float* bias_data, const RuntimeShape& output_shape,
-                 float* output_data, const RuntimeShape& im2col_shape,
-                 float* im2col_data) {
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  (void)im2col_data;   // only used in optimized code.
-  (void)im2col_shape;  // only used in optimized code.
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          const int in_x_origin = (out_x * stride_width) - pad_width;
-          const int in_y_origin = (out_y * stride_height) - pad_height;
-          float total = 0.f;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // If the location is outside the bounds of the input image,
-                // use zero as a default value.
-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height)) {
-                  float input_value = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  float filter_value =
-                      filter_data[Offset(filter_shape, out_channel, filter_y,
-                                         filter_x, in_channel)];
-                  total += (input_value * filter_value);
-                }
-              }
-            }
-          }
-          float bias_value = 0.0f;
-          if (bias_data) {
-            bias_value = bias_data[out_channel];
-          }
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              ActivationFunctionWithMinMax(total + bias_value,
-                                           output_activation_min,
-                                           output_activation_max);
-        }
-      }
-    }
-  }
-}
-
-inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
-                 const uint8_t* input_data, const RuntimeShape& filter_shape,
-                 const uint8_t* filter_data, const RuntimeShape& bias_shape,
-                 const int32_t* bias_data, const RuntimeShape& output_shape,
-                 uint8_t* output_data, const RuntimeShape& im2col_shape,
-                 uint8_t* im2col_data, void* cpu_backend_context) {
-  (void)cpu_backend_context;  // only used in optimized code.
-  (void)im2col_data;   // only used in optimized code.
-  (void)im2col_shape;  // only used in optimized code.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          const int in_x_origin = (out_x * stride_width) - pad_width;
-          const int in_y_origin = (out_y * stride_height) - pad_height;
-          int32_t acc = 0;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // If the location is outside the bounds of the input image,
-                // use zero as a default value.
-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height)) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val =
-                      filter_data[Offset(filter_shape, out_channel, filter_y,
-                                         filter_x, in_channel)];
-                  acc +=
-                      (filter_val + filter_offset) * (input_val + input_offset);
-                }
-              }
-            }
-          }
-          if (bias_data) {
-            acc += bias_data[out_channel];
-          }
-          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                              output_shift);
-          acc += output_offset;
-          acc = std::max(acc, output_activation_min);
-          acc = std::min(acc, output_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              static_cast<uint8_t>(acc);
-        }
-      }
-    }
-  }
-}
-
-inline void HybridConvPerChannel(
-    const ConvParams& params, float* scaling_factors_ptr,
-    const RuntimeShape& input_shape, const int8_t* input_data,
-    const RuntimeShape& filter_shape, const int8_t* filter_data,
-    const RuntimeShape& bias_shape, const float* bias_data,
-    const RuntimeShape& output_shape, float* output_data,
-    const RuntimeShape& im2col_shape, int8_t* im2col_data,
-    const float* per_channel_scale, int32_t* input_offset) {
-  (void)im2col_data;   // only used in optimized code.
-  (void)im2col_shape;  // only used in optimized code.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          const int in_x_origin = (out_x * stride_width) - pad_width;
-          const int in_y_origin = (out_y * stride_height) - pad_height;
-          int32_t acc = 0;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // If the location is outside the bounds of the input image,
-                // use zero as a default value.
-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height)) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val =
-                      filter_data[Offset(filter_shape, out_channel, filter_y,
-                                         filter_x, in_channel)];
-                  acc += filter_val * (input_val - input_offset[batch]);
-                }
-              }
-            }
-          }
-          float acc_float =
-              acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
-          if (bias_data) {
-            acc_float += bias_data[out_channel];
-          }
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              ActivationFunctionWithMinMax(acc_float, output_activation_min,
-                                           output_activation_max);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
@@ -1,100 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void DepthwiseConv(
-    const DepthwiseParams& params, const RuntimeShape& input_shape,
-    const float* input_data, const RuntimeShape& filter_shape,
-    const float* filter_data, const RuntimeShape& bias_shape,
-    const float* bias_data, const RuntimeShape& output_shape,
-    float* output_data) {
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int depth_multiplier = params.depth_multiplier;
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int input_depth = input_shape.Dims(3);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-  for (int b = 0; b < batches; ++b) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int ic = 0; ic < input_depth; ++ic) {
-          for (int m = 0; m < depth_multiplier; m++) {
-            const int oc = m + ic * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - pad_width;
-            const int in_y_origin = (out_y * stride_height) - pad_height;
-            float total = 0.f;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // If the location is outside the bounds of the input image,
-                // use zero as a default value.
-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height)) {
-                  float input_value =
-                      input_data[Offset(input_shape, b, in_y, in_x, ic)];
-                  float filter_value = filter_data[Offset(
-                      filter_shape, 0, filter_y, filter_x, oc)];
-                  total += (input_value * filter_value);
-                }
-              }
-            }
-            float bias_value = 0.0f;
-            if (bias_data) {
-              bias_value = bias_data[oc];
-            }
-            output_data[Offset(output_shape, b, out_y, out_x, oc)] =
-                ActivationFunctionWithMinMax(total + bias_value,
-                                             output_activation_min,
-                                             output_activation_max);
-          }
-        }
-      }
-    }
-  }
-}
-
-}  // end namespace reference_ops
-}  // end namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -1,297 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
-
-#include <algorithm>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-// Used in tests and template parameters to control which version of depthwise
-// convolution is called. Primarily for reference code, and specializations
-// forced in tests.
-enum class DepthwiseConvImplementation {
-  // Run all tests against kUseStandardEntry even if also testing another
-  // kernel, since we need to be sure that the main DepthwiseConv() function in
-  // optimized_ops.h dispatches to a correctly-executing kernel.
-  kNone = 0,                 // The "default" option: use the normal
-                             // DepthwiseConv kernel (entry) function.
-  kUseGenericKernel,         // Forced use of generic kernel.
-  kUseNeon3x3,               // 3x3 kernel that uses NEON when available.
-  kUseNeon3x3DotProduct,     // 3x3 kernel that uses dot-product enabled NEON
-                             // when available.
-  kUseCModel3x3DotProduct,   // 3x3 kernel, reference C model that is intended
-                             // to match overall design NEON code.
-  kUseUnwound3x3DotProduct,  // 3x3 kernel, reference C model with unwound loops
-                             // and some arrays.
-  kUseIntrinsics3x3DotProduct,  // 3x3 kernel using NEON intrinsics.
-};
-
-// Category of depthwise convolution output rounding.
-enum class DepthwiseConvOutputRounding {
-  kNone = 0,      // Invalid: specific method must be specified.
-  kAwayFromZero,  // Original method: exact halves rounded away from zero.
-  kUpward,        // Halves towards +infinity: adds 0.5 before truncate.
-  // This is where a future kNearestEven would be placed.
-};
-
-// Category of depthwise convolution depth multiplication.
-enum class DepthwiseConvDepthMultiplication {
-  kNoMultiplication = 0,  // Depth multiplier = 1.
-  kUnitInputDepth,        // Input depth = 1, output depth = depth multiplier.
-};
-
-namespace reference_ops {
-namespace depthwise_conv {
-
-template <DepthwiseConvOutputRounding output_rounding>
-inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
-                                  int shift) {
-  TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
-}
-
-template <>
-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
-    int32_t x, int32_t quantized_multiplier, int shift) {
-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
-}
-
-template <>
-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
-    int32_t x, int32_t quantized_multiplier, int shift) {
-  using gemmlowp::SaturatingRoundingDoublingHighMul;
-  const int left_shift = shift > 0 ? shift : 0;
-  const int right_shift = shift > 0 ? 0 : -shift;
-  const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
-  return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
-                                            quantized_multiplier) +
-          rounding_offset) >>
-         right_shift;
-}
-
-template <DepthwiseConvOutputRounding output_rounding>
-struct DepthwiseConvBasicKernel {
-  static inline void Run(
-      const DepthwiseParams& params, const RuntimeShape& input_shape,
-      const uint8_t* input_data, const RuntimeShape& filter_shape,
-      const uint8_t* filter_data, const RuntimeShape& bias_shape,
-      const int32_t* bias_data, const RuntimeShape& output_shape,
-      uint8_t* output_data) {
-    const int stride_width = params.stride_width;
-    const int stride_height = params.stride_height;
-    const int dilation_width_factor = params.dilation_width_factor;
-    const int dilation_height_factor = params.dilation_height_factor;
-    const int pad_width = params.padding_values.width;
-    const int pad_height = params.padding_values.height;
-    const int depth_multiplier = params.depth_multiplier;
-    const int32_t output_activation_min = params.quantized_activation_min;
-    const int32_t output_activation_max = params.quantized_activation_max;
-    const int32_t input_offset = params.input_offset;
-    const int32_t filter_offset = params.weights_offset;
-    const int32_t output_offset = params.output_offset;
-    const int32_t output_multiplier = params.output_multiplier;
-    const int output_shift = params.output_shift;
-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-    const int input_height = input_shape.Dims(1);
-    const int input_width = input_shape.Dims(2);
-    const int input_depth = input_shape.Dims(3);
-    const int filter_height = filter_shape.Dims(1);
-    const int filter_width = filter_shape.Dims(2);
-    const int output_height = output_shape.Dims(1);
-    const int output_width = output_shape.Dims(2);
-    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-    for (int b = 0; b < batches; ++b) {
-      for (int out_y = 0; out_y < output_height; ++out_y) {
-        for (int out_x = 0; out_x < output_width; ++out_x) {
-          for (int ic = 0; ic < input_depth; ++ic) {
-            for (int m = 0; m < depth_multiplier; m++) {
-              const int oc = m + ic * depth_multiplier;
-              const int in_x_origin = (out_x * stride_width) - pad_width;
-              const int in_y_origin = (out_y * stride_height) - pad_height;
-              int32_t acc = 0;
-              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                  const int in_x =
-                      in_x_origin + dilation_width_factor * filter_x;
-                  const int in_y =
-                      in_y_origin + dilation_height_factor * filter_y;
-                  // If the location is outside the bounds of the input image,
-                  // use zero as a default value.
-                  if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                      (in_y < input_height)) {
-                    int32_t input_val =
-                        input_data[Offset(input_shape, b, in_y, in_x, ic)];
-                    int32_t filter_val = filter_data[Offset(
-                        filter_shape, 0, filter_y, filter_x, oc)];
-                    acc += (filter_val + filter_offset) *
-                           (input_val + input_offset);
-                  }
-                }
-              }
-              if (bias_data) {
-                acc += bias_data[oc];
-              }
-              acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
-                                                        output_shift);
-              acc += output_offset;
-              acc = std::max(acc, output_activation_min);
-              acc = std::min(acc, output_activation_max);
-              output_data[Offset(output_shape, b, out_y, out_x, oc)] =
-                  static_cast<uint8_t>(acc);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  // TODO(b/148596273): Reconcile reference versions, perhaps with common
-  // MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
-  static inline void RunPerChannel(
-      const DepthwiseParams& params, const RuntimeShape& input_shape,
-      const int8_t* input_data, const RuntimeShape& filter_shape,
-      const int8_t* filter_data, const RuntimeShape& bias_shape,
-      const int32_t* bias_data, const RuntimeShape& output_shape,
-      int8_t* output_data) {
-    // Get parameters.
-    // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
-    const int stride_width = params.stride_width;
-    const int stride_height = params.stride_height;
-    const int dilation_width_factor = params.dilation_width_factor;
-    const int dilation_height_factor = params.dilation_height_factor;
-    const int pad_width = params.padding_values.width;
-    const int pad_height = params.padding_values.height;
-    const int depth_multiplier = params.depth_multiplier;
-    const int32_t input_offset = params.input_offset;
-    const int32_t output_offset = params.output_offset;
-    const int32_t output_activation_min = params.quantized_activation_min;
-    const int32_t output_activation_max = params.quantized_activation_max;
-    const int32_t* output_multiplier = params.output_multiplier_per_channel;
-    const int32_t* output_shift = params.output_shift_per_channel;
-
-    // Check dimensions of the tensors.
-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-    const int input_height = input_shape.Dims(1);
-    const int input_width = input_shape.Dims(2);
-    const int input_depth = input_shape.Dims(3);
-    const int filter_height = filter_shape.Dims(1);
-    const int filter_width = filter_shape.Dims(2);
-    const int output_height = output_shape.Dims(1);
-    const int output_width = output_shape.Dims(2);
-    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-    for (int batch = 0; batch < batches; ++batch) {
-      for (int out_y = 0; out_y < output_height; ++out_y) {
-        for (int out_x = 0; out_x < output_width; ++out_x) {
-          for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-            for (int m = 0; m < depth_multiplier; ++m) {
-              const int output_channel = m + in_channel * depth_multiplier;
-              const int in_x_origin = (out_x * stride_width) - pad_width;
-              const int in_y_origin = (out_y * stride_height) - pad_height;
-              int32_t acc = 0;
-              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                  const int in_x =
-                      in_x_origin + dilation_width_factor * filter_x;
-                  const int in_y =
-                      in_y_origin + dilation_height_factor * filter_y;
-                  // Zero padding by omitting the areas outside the image.
-                  const bool is_point_inside_image =
-                      (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                      (in_y < input_height);
-                  if (is_point_inside_image) {
-                    int32_t input_val = input_data[Offset(
-                        input_shape, batch, in_y, in_x, in_channel)];
-                    int32_t filter_val = filter_data[Offset(
-                        filter_shape, 0, filter_y, filter_x, output_channel)];
-                    // Accumulate with 32 bits accumulator.
-                    // In the nudging process during model quantization, we
-                    // force real value of 0.0 be represented by a quantized
-                    // value. This guarantees that the input_offset is a int8_t,
-                    // even though it is represented using int32_t. int32_t +=
-                    // int8_t
-                    // * (int8_t - int8_t) so the highest value we can get from
-                    // each accumulation is [-127, 127] * ([-128, 127] -
-                    // [-128, 127]), which is [-32512, 32512]. log2(32512)
-                    // = 14.98, which means we can accumulate at least 2^16
-                    // multiplications without overflow. The accumulator is
-                    // applied to a filter so the accumulation logic will hold
-                    // as long as the filter size (filter_y * filter_x *
-                    // in_channel) does not exceed 2^16, which is the case in
-                    // all the models we have seen so far.
-                    acc += filter_val * (input_val + input_offset);
-                  }
-                }
-              }
-              if (bias_data) {
-                acc += bias_data[output_channel];
-              }
-              acc = DepthwiseConvRound<output_rounding>(
-                  acc, output_multiplier[output_channel],
-                  output_shift[output_channel]);
-              acc += output_offset;
-              acc = std::max(acc, output_activation_min);
-              acc = std::min(acc, output_activation_max);
-              output_data[Offset(output_shape, batch, out_y, out_x,
-                                 output_channel)] = static_cast<int8_t>(acc);
-            }
-          }
-        }
-      }
-    }
-  }
-};
-
-}  // namespace depthwise_conv
-
-inline void DepthwiseConv(
-    const DepthwiseParams& params, const RuntimeShape& input_shape,
-    const uint8_t* input_data, const RuntimeShape& filter_shape,
-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    uint8_t* output_data) {
-  return depthwise_conv::DepthwiseConvBasicKernel<
-      DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
-                                                       input_data, filter_shape,
-                                                       filter_data, bias_shape,
-                                                       bias_data, output_shape,
-                                                       output_data);
-}
-
-}  // namespace reference_ops
-}  // end namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/dequantize.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/dequantize.h
@@ -1,78 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
-
-#include <limits.h>
-
-#include <vector>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// Dequantizes into a float without rounding.
-template <typename InputT, typename OutputT>
-inline void Dequantize(const tflite::DequantizationParams& op_params,
-                       const RuntimeShape& input_shape,
-                       const InputT* input_data,
-                       const RuntimeShape& output_shape, OutputT* output_data) {
-  int32_t zero_point = op_params.zero_point;
-  const double scale = op_params.scale;
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    const int32_t val = input_data[i];
-    const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
-    output_data[i] = result;
-  }
-}
-
-// Dequantizes per-channel quantized tensor to float.
-template <typename T>
-inline void PerChannelDequantize(
-    const tflite::PerChannelDequantizationParams& op_params,
-    const RuntimeShape& input_shape, const T* input_data,
-    const RuntimeShape& output_shape, float* output_data) {
-  // Ensure flat size is same.
-  MatchingFlatSize(input_shape, output_shape);
-
-  const int32_t* zero_point = op_params.zero_point;
-  const float* scale = op_params.scale;
-  const int32_t quantized_dimension = op_params.quantized_dimension;
-  const int32_t num_dims = input_shape.DimensionsCount();
-  const int32_t* dims_data = input_shape.DimsData();
-  std::vector<int> current_dim(num_dims, 0);
-
-  do {
-    size_t offset =
-        ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
-                            current_dim.data(), 0, nullptr);
-    const int channel = current_dim[quantized_dimension];
-    const int32_t val = input_data[offset];
-    const float result =
-        static_cast<float>(scale[channel] * (val - zero_point[channel]));
-    output_data[offset] = result;
-  } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
-                     current_dim.data()));
-}
-
-}  // namespace reference_ops
-
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/floor.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/floor.h
@@ -1,39 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
-
-#include <cmath>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline void Floor(const RuntimeShape& input_shape, const float* input_data,
-                  const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    int offset = i;
-    output_data[offset] = std::floor(input_data[offset]);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/fully_connected.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/fully_connected.h
@@ -1,320 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const float* input_data, const RuntimeShape& weights_shape,
-    const float* weights_data, const RuntimeShape& bias_shape,
-    const float* bias_data, const RuntimeShape& output_shape,
-    float* output_data) {
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  // TODO(benoitjacob): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dims_count = output_shape.DimensionsCount();
-  const int weights_dims_count = weights_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
-  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
-                                       output_shape, output_dims_count - 1);
-  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      float total = 0.f;
-      for (int d = 0; d < accum_depth; ++d) {
-        total += input_data[b * accum_depth + d] *
-                 weights_data[out_c * accum_depth + d];
-      }
-      float bias_value = 0.0f;
-      if (bias_data) {
-        bias_value = bias_data[out_c];
-      }
-      output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
-          total + bias_value, output_activation_min, output_activation_max);
-    }
-  }
-}
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const uint8_t* input_data, const RuntimeShape& filter_shape,
-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    uint8_t* output_data) {
-  const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  // TODO(benoitjacob): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
-                                       output_shape, output_dim_count - 1);
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * (input_val + input_offset);
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
-    }
-  }
-}
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const uint8_t* input_data, const RuntimeShape& filter_shape,
-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  TFLITE_DCHECK_EQ(output_offset, 0);
-  // TODO(benoitjacob): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
-                                       output_shape, output_dim_count - 1);
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32_t accum = bias_data[out_c];
-      // Accumulation loop.
-      for (int d = 0; d < accum_depth; ++d) {
-        int16_t input_val = input_data[b * accum_depth + d] + input_offset;
-        int16_t filter_val =
-            filter_data[out_c * accum_depth + d] + filter_offset;
-        accum += filter_val * input_val;
-      }
-      // Down-scale the final int32_t accumulator to the scale used by our
-      // (16-bit, typically 3 integer bits) fixed-point format. The quantized
-      // multiplier and shift here have been pre-computed offline
-      // (e.g. by toco).
-      accum =
-          MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
-      // Saturate, cast to int16_t, and store to output array.
-      accum = std::max(accum, output_activation_min - output_offset);
-      accum = std::min(accum, output_activation_max - output_offset);
-      accum += output_offset;
-      output_data[out_c + output_depth * b] = accum;
-    }
-  }
-}
-
-inline void ShuffledFullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const uint8_t* input_data, const RuntimeShape& weights_shape,
-    const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
-  // TODO(benoitjacob): This really should be:
-  //     const int batches = ArraySize(output_dims, 1);
-  // but the current --variable_batch hack consists in overwriting the 3rd
-  // dimension with the runtime batch size, as we don't keep track for each
-  // array of which dimension is the batch dimension in it.
-  const int output_dim_count = output_shape.DimensionsCount();
-  const int weights_dim_count = weights_shape.DimensionsCount();
-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
-  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
-                                       output_shape, output_dim_count - 1);
-  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
-  TFLITE_DCHECK((accum_depth % 16) == 0);
-  TFLITE_DCHECK((output_depth % 4) == 0);
-
-  // Shuffling and xoring of input activations into the workspace buffer
-  uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
-  if (batches == 1) {
-    for (int i = 0; i < accum_depth; i++) {
-      shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
-    }
-  } else if (batches == 4) {
-    for (int c = 0; c < accum_depth; c += 16) {
-      for (int b = 0; b < 4; b++) {
-        const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
-        for (int j = 0; j < 16; j++) {
-          uint8_t src_val = *src_data_ptr++;
-          // Flip the sign bit, so that the kernel will only need to
-          // reinterpret these uint8_t values as int8_t, getting for free the
-          // subtraction of the zero_point value 128.
-          uint8_t dst_val = src_val ^ 0x80;
-          *shuffled_input_workspace_ptr++ = dst_val;
-        }
-      }
-    }
-  } else {
-    TFLITE_DCHECK(false);
-    return;
-  }
-
-  // Actual computation
-  if (batches == 1) {
-    int16_t* output_ptr = output_data;
-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
-    // so that just reinterpreting them as int8_t values is equivalent to
-    // subtracting 128 from them, thus implementing for free the subtraction of
-    // the zero_point value 128.
-    const int8_t* shuffled_weights_ptr =
-        reinterpret_cast<const int8_t*>(shuffled_weights_data);
-    // Likewise, we preshuffled and pre-xored the input data above.
-    const int8_t* shuffled_input_data =
-        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
-    for (int c = 0; c < output_depth; c += 4) {
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32_t accum[4] = {0};
-      // Accumulation loop.
-      for (int d = 0; d < accum_depth; d += 16) {
-        for (int i = 0; i < 4; i++) {
-          for (int j = 0; j < 16; j++) {
-            int8_t input_val = shuffled_input_data[d + j];
-            int8_t weights_val = *shuffled_weights_ptr++;
-            accum[i] += weights_val * input_val;
-          }
-        }
-      }
-      for (int i = 0; i < 4; i++) {
-        // Add bias value
-        int32_t acc = accum[i] + bias_data[c + i];
-        // Down-scale the final int32_t accumulator to the scale used by our
-        // (16-bit, typically 3 integer bits) fixed-point format. The quantized
-        // multiplier and shift here have been pre-computed offline
-        // (e.g. by toco).
-        acc =
-            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-        // Saturate, cast to int16_t, and store to output array.
-        acc = std::max(acc, output_activation_min);
-        acc = std::min(acc, output_activation_max);
-        output_ptr[c + i] = acc;
-      }
-    }
-  } else if (batches == 4) {
-    int16_t* output_ptr = output_data;
-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
-    // so that just reinterpreting them as int8_t values is equivalent to
-    // subtracting 128 from them, thus implementing for free the subtraction of
-    // the zero_point value 128.
-    const int8_t* shuffled_weights_ptr =
-        reinterpret_cast<const int8_t*>(shuffled_weights_data);
-    // Likewise, we preshuffled and pre-xored the input data above.
-    const int8_t* shuffled_input_data =
-        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
-    for (int c = 0; c < output_depth; c += 4) {
-      const int8_t* shuffled_input_ptr = shuffled_input_data;
-      // Accumulation loop.
-      // Internal accumulation.
-      // Initialize accumulator with the bias-value.
-      int32_t accum[4][4];
-      for (int i = 0; i < 4; i++) {
-        for (int b = 0; b < 4; b++) {
-          accum[i][b] = 0;
-        }
-      }
-      for (int d = 0; d < accum_depth; d += 16) {
-        for (int i = 0; i < 4; i++) {
-          for (int b = 0; b < 4; b++) {
-            for (int j = 0; j < 16; j++) {
-              int8_t input_val = shuffled_input_ptr[16 * b + j];
-              int8_t weights_val = shuffled_weights_ptr[16 * i + j];
-              accum[i][b] += weights_val * input_val;
-            }
-          }
-        }
-        shuffled_input_ptr += 64;
-        shuffled_weights_ptr += 64;
-      }
-      for (int i = 0; i < 4; i++) {
-        for (int b = 0; b < 4; b++) {
-          // Add bias value
-          int32_t acc = accum[i][b] + bias_data[c + i];
-          // Down-scale the final int32_t accumulator to the scale used by our
-          // (16-bit, typically 3 integer bits) fixed-point format. The
-          // quantized multiplier and shift here have been pre-computed offline
-          // (e.g. by toco).
-          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
-                                              output_shift);
-          // Saturate, cast to int16_t, and store to output array.
-          acc = std::max(acc, output_activation_min);
-          acc = std::min(acc, output_activation_max);
-          output_ptr[b * output_depth + c + i] = acc;
-        }
-      }
-    }
-  } else {
-    TFLITE_DCHECK(false);
-    return;
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/hard_swish.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/hard_swish.h
@@ -1,166 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
-
-#include "ruy/profiler/instrumentation.h"  // from @ruy
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline int16_t SaturatingLeftShift(int16_t value, int amount) {
-  int32_t result = static_cast<int32_t>(value) * (1 << amount);
-  result = std::min<int32_t>(result, std::numeric_limits<int16_t>::max());
-  result = std::max<int32_t>(result, std::numeric_limits<int16_t>::min());
-  return result;
-}
-
-// Similar to ARM instruction SQDMULH.
-// Similar to gemmlowp::SaturatingRoundingDoublingHighMul except
-// rounding to zero instead of to nearest (SQRDMULH).
-inline std::int16_t SaturatingDoublingHighMul(std::int16_t a, std::int16_t b) {
-  bool overflow = a == b && a == std::numeric_limits<std::int16_t>::min();
-  std::int32_t a_32(a);
-  std::int32_t b_32(b);
-  std::int32_t ab_32 = a_32 * b_32;
-  std::int16_t ab_x2_high16 = static_cast<std::int16_t>((ab_32) / (1 << 15));
-  return overflow ? std::numeric_limits<std::int16_t>::max() : ab_x2_high16;
-}
-
-template <typename T>
-inline void HardSwish(const RuntimeShape& input_shape, const T* input_data,
-                      const RuntimeShape& output_shape, T* output_data) {
-  ruy::profiler::ScopeLabel label("ReferenceHardSwish/Float");
-  auto matching_size = MatchingFlatSize(input_shape, output_shape);
-  const T* in_end = input_data + matching_size;
-  for (; input_data < in_end; input_data++, output_data++) {
-    const float in = *input_data;
-    *output_data =
-        in * std::min(static_cast<T>(6), std::max(static_cast<T>(0), in + 3)) /
-        6;
-  }
-}
-
-template <typename T>
-inline void HardSwish(const HardSwishParams& params,
-                      const RuntimeShape& input_shape, const T* input_data,
-                      const RuntimeShape& output_shape, T* output_data) {
-  ruy::profiler::ScopeLabel label("ReferenceHardSwish/Quantized");
-
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    const int16_t input_value = input_data[i] - params.input_zero_point;
-    // Left-shift as much as we can without overflow/saturation to put
-    // significant bits in the high bits of our 16-bit fixedpoint values, so
-    // that fixed-point approximate computations below are as accurate as
-    // possible.
-    const int16_t input_value_on_hires_input_scale = input_value * (1 << 7);
-    // Compute the input value on essentially the output scale, just not
-    // right-shifted yet. This is the value that we'll use in the (x >= +3)
-    // case, and that in the general case we'll multiply against the "relu-ish"
-    // fixed-point multiplier in [0, 1].
-    const int16_t input_value_on_preshift_output_scale =
-        gemmlowp::SaturatingRoundingDoublingHighMul(
-            input_value_on_hires_input_scale,
-            params.output_multiplier_fixedpoint_int16);
-    // Now compute the "relu-ish multiplier". In the (-3 <= x <= +3) case, that
-    // is just an affine rescaling of x from [-3, 3] to [0, 1]. In the general
-    // case, it is just that plus saturation at the boundaries of [-3, 3].
-    // First, we rescale from [-3, 3] to [-1, 1], saturating.
-    // That is done by rescaling the input value with a fixed-point multiplier
-    // (reluish_multiplier_fixedpoint) and bit-shift such that we represent
-    // that input value on the scale where the real value 3.0f is represented
-    // by the quantized value 32768.  (+32768 is actually not representable as
-    // int16_t, so this saturates at +32767, and that is seen empirically to be
-    // a negligible contribution to numerical error/bias).
-    //
-    // This code is careful to correctly implement any magnitude of multiplier,
-    // involving either a right shift or a left shift, with correct saturation
-    // behavior in the left-shift case. This forces this code to be more
-    // complicated, but is necessary for real applications: a partially
-    // trained quantized MobileNet v3-small model that motivated this code
-    // exhibits some large [min, max] range boundaries, of the order of
-    // magnitude of 10 or 100 depending on layers.
-    //
-    // The next few lines are basically just an ordinary
-    // MultiplyByQuantizedMultiplier, except that we are more careful here
-    // about the fine details of saturation when left-shifting, because here
-    // overflow in left-shift is a common case, not an anomaly as
-    // MultiplyByQuantizedMultiplier assumes.
-    int16_t reluish_value = input_value_on_hires_input_scale;
-    // Shift left, saturating, as much as we can while ensuring that this
-    // saturation will not contribute to the result. That is, left shift amount
-    // reduced by 1.
-    if (params.reluish_multiplier_exponent > 0) {
-      reluish_value = SaturatingLeftShift(
-          reluish_value, params.reluish_multiplier_exponent - 1);
-    }
-    // Apply the fixed-point multiplier, dividing the value by a divisor
-    // ranging in [1, 2].
-    reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul(
-        reluish_value, params.reluish_multiplier_fixedpoint_int16);
-    // Apply the last bit of left-shift. Thus, in the left-shifting case, if
-    // any saturation affects the result, it is happening here --- any
-    // saturation having occurred above is overwritten here, not affecting the
-    // result.
-    if (params.reluish_multiplier_exponent > 0) {
-      reluish_value = SaturatingLeftShift(reluish_value, 1);
-    }
-    // Shift right, in the right-shifting case.
-    if (params.reluish_multiplier_exponent < 0) {
-      reluish_value = gemmlowp::RoundingDivideByPOT(
-          reluish_value, -params.reluish_multiplier_exponent);
-    }
-    // At this point we have rescaled the value into a 16bit fixedpoint
-    // reluish_value in [-1, 1].
-    // We now convert that to a 16bit fixedpoint value in [0, 1].
-    reluish_value = (reluish_value + (1 << 15)) >> 1;
-    // Use of SaturatingDoublingHighMul here is important to cancel the biases
-    // from the above SaturatingRoundingDoublingHighMul.
-    //
-    // On a partially trained MobileNet-v3-small,
-    //
-    //                                       | bias on    |  ImageNet
-    //                                       | quantized  |  Top-1
-    // Operation used here                   | values     |  accuracy (50k)
-    // --------------------------------------+------------+-----------
-    // SaturatingDoublingHighMul             | -0.0024    |  58.920
-    // SaturatingRoundingDoublingHighMul     | -0.0067    |  58.064
-    //
-    // In activations_test, this is covered by this testcase:
-    //     QuantizedActivationsOpTest.HardSwishBias
-    //
-    const int16_t preshift_output_value = SaturatingDoublingHighMul(
-        reluish_value, input_value_on_preshift_output_scale);
-    // We were so far operating on the pre-shift output scale. Now we finally
-    // apply that output shift, arriving at the final output scale.
-    int16_t output_value = gemmlowp::RoundingDivideByPOT(
-        preshift_output_value, -params.output_multiplier_exponent);
-    output_value += params.output_zero_point;
-    output_value =
-        std::min<int16_t>(output_value, std::numeric_limits<T>::max());
-    output_value =
-        std::max<int16_t>(output_value, std::numeric_limits<T>::min());
-    output_data[i] = output_value;
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
@@ -1,145 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
-
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void CheckArithmeticParams(const ArithmeticParams& params) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  // Input offset is negative input zero point. Activation tensors are
-  // asymmetric quantized so they span the full int8 range.
-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
-  TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
-  TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
-}
-
-// Element-wise add that can often be used for inner loop of broadcast add as
-// well as the non-broadcast add.
-inline void AddElementwise(int size, const ArithmeticParams& params,
-                           const int8_t* input1_data, const int8_t* input2_data,
-                           int8_t* output_data) {
-  CheckArithmeticParams(params);
-
-  for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sum, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<int8_t>(clamped_output);
-  }
-}
-
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const int8_t* input1_data,
-                const RuntimeShape& input2_shape, const int8_t* input2_data,
-                const RuntimeShape& output_shape, int8_t* output_data) {
-  CheckArithmeticParams(params);
-
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
-                               const RuntimeShape& input1_shape,
-                               const int8_t* input1_data,
-                               const RuntimeShape& input2_shape,
-                               const int8_t* input2_data,
-                               const RuntimeShape& output_shape,
-                               int8_t* output_data) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          const int32_t input1_val =
-              params.input1_offset +
-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
-          const int32_t input2_val =
-              params.input2_offset +
-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
-          const int32_t shifted_input1_val =
-              input1_val * (1 << params.left_shift);
-          const int32_t shifted_input2_val =
-              input2_val * (1 << params.left_shift);
-          const int32_t scaled_input1_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input1_val, params.input1_multiplier,
-                  params.input1_shift);
-          const int32_t scaled_input2_val =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  shifted_input2_val, params.input2_multiplier,
-                  params.input2_shift);
-          const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
-          const int32_t raw_output =
-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
-                  raw_sum, params.output_multiplier, params.output_shift) +
-              params.output_offset;
-          const int32_t clamped_output =
-              std::min(params.quantized_activation_max,
-                       std::max(params.quantized_activation_min, raw_output));
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              static_cast<int8_t>(clamped_output);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
@@ -1,217 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-// Fixed-point per-channel-quantization convolution reference kernel.
-inline void ConvPerChannel(
-    const ConvParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
-  // Get parameters.
-  const int32_t input_offset = params.input_offset;  // r = s(q - Z)
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int32_t output_offset = params.output_offset;
-
-  // Set min and max value of the output.
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  // Consistency check.
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-
-  // Check dimensions of the tensors.
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          const int in_x_origin = (out_x * stride_width) - pad_width;
-          const int in_y_origin = (out_y * stride_height) - pad_height;
-          int32_t acc = 0;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // Zero padding by omitting the areas outside the image.
-                const bool is_point_inside_image =
-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height);
-                if (is_point_inside_image) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val =
-                      filter_data[Offset(filter_shape, out_channel, filter_y,
-                                         filter_x, in_channel)];
-                  // Accumulate with 32 bits accumulator.
-                  // In the nudging process during model quantization, we force
-                  // real value of 0.0 be represented by a quantized value. This
-                  // guarantees that the input_offset is a int8_t, even though
-                  // it is represented using int32_t. int32_t += int8_t *
-                  // (int8_t - int8_t) so the highest value we can get from each
-                  // accumulation is [-127, 127] * ([-128, 127] -
-                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
-                  // = 14.98, which means we can accumulate at least 2^16
-                  // multiplications without overflow. The accumulator is
-                  // applied to a filter so the accumulation logic will hold as
-                  // long as the filter size (filter_y * filter_x * in_channel)
-                  // does not exceed 2^16, which is the case in all the models
-                  // we have seen so far.
-                  // TODO(jianlijianli): Add a check to make sure the
-                  // accumulator depth is smaller than 2^16.
-                  acc += filter_val * (input_val + input_offset);
-                }
-              }
-            }
-          }
-
-          if (bias_data) {
-            acc += bias_data[out_channel];
-          }
-          acc = MultiplyByQuantizedMultiplier(
-              acc, output_multiplier[out_channel], output_shift[out_channel]);
-          acc += output_offset;
-          acc = std::max(acc, output_activation_min);
-          acc = std::min(acc, output_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              static_cast<int8_t>(acc);
-        }
-      }
-    }
-  }
-}
-
-// Fixed-point per-channel-quantization convolution reference kernel.
-// 16-bit data and 8-bit filter
-inline void ConvPerChannel(
-    const ConvParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const std::int64_t* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  // Get parameters.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-
-  // Set min and max value of the output.
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  // Consistency check.
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (bias_data) {
-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-  }
-
-  // Check dimensions of the tensors.
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          const int in_x_origin = (out_x * stride_width) - pad_width;
-          const int in_y_origin = (out_y * stride_height) - pad_height;
-          std::int64_t acc = 0;
-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // Zero padding by omitting the areas outside the image.
-                const bool is_point_inside_image =
-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height);
-                if (is_point_inside_image) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val =
-                      filter_data[Offset(filter_shape, out_channel, filter_y,
-                                         filter_x, in_channel)];
-                  // Accumulate with 64 bits accumulator.
-                  // int64_t += int8_t * int16_t so the highest value we can
-                  // get from each accumulation is [-127, 127] * ([-32768,
-                  // 32767] -
-                  // [-32768, 32767]), which is [-8322945, 8322945].
-                  // log2(8322945) = 22.99.
-                  acc += filter_val * input_val;
-                }
-              }
-            }
-          }
-          if (bias_data) {
-            acc += bias_data[out_channel];
-          }
-          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
-              acc, output_multiplier[out_channel], output_shift[out_channel]);
-          scaled_acc = std::max(scaled_acc, output_activation_min);
-          scaled_acc = std::min(scaled_acc, output_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
-              static_cast<int16_t>(scaled_acc);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
@@ -1,289 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-inline void DepthwiseConvPerChannel(
-    const DepthwiseParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
-  // Get parameters.
-  // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int depth_multiplier = params.depth_multiplier;
-  const int32_t input_offset = params.input_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  // Check dimensions of the tensors.
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int input_depth = input_shape.Dims(3);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-          for (int m = 0; m < depth_multiplier; ++m) {
-            const int output_channel = m + in_channel * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - pad_width;
-            const int in_y_origin = (out_y * stride_height) - pad_height;
-            int32_t acc = 0;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // Zero padding by omitting the areas outside the image.
-                const bool is_point_inside_image =
-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height);
-                if (is_point_inside_image) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val = filter_data[Offset(
-                      filter_shape, 0, filter_y, filter_x, output_channel)];
-                  // Accumulate with 32 bits accumulator.
-                  // In the nudging process during model quantization, we force
-                  // real value of 0.0 be represented by a quantized value. This
-                  // guarantees that the input_offset is a int8_t, even though
-                  // it is represented using int32_t. int32_t += int8_t *
-                  // (int8_t - int8_t) so the highest value we can get from each
-                  // accumulation is [-127, 127] * ([-128, 127] -
-                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
-                  // = 14.98, which means we can accumulate at least 2^16
-                  // multiplications without overflow. The accumulator is
-                  // applied to a filter so the accumulation logic will hold as
-                  // long as the filter size (filter_y * filter_x * in_channel)
-                  // does not exceed 2^16, which is the case in all the models
-                  // we have seen so far.
-                  // TODO(jianlijianli): Add a check to make sure the
-                  // accumulator depth is smaller than 2^16.
-                  acc += filter_val * (input_val + input_offset);
-                }
-              }
-            }
-            if (bias_data) {
-              acc += bias_data[output_channel];
-            }
-            acc = MultiplyByQuantizedMultiplier(
-                acc, output_multiplier[output_channel],
-                output_shift[output_channel]);
-            acc += output_offset;
-            acc = std::max(acc, output_activation_min);
-            acc = std::min(acc, output_activation_max);
-            output_data[Offset(output_shape, batch, out_y, out_x,
-                               output_channel)] = static_cast<int8_t>(acc);
-          }
-        }
-      }
-    }
-  }
-}
-
-inline void DepthwiseConvPerChannel(
-    const DepthwiseParams& params, const int32_t* output_multiplier,
-    const int32_t* output_shift, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const std::int64_t* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  // Get parameters.
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int depth_multiplier = params.depth_multiplier;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-
-  // Check dimensions of the tensors.
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int input_depth = input_shape.Dims(3);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-          for (int m = 0; m < depth_multiplier; ++m) {
-            const int output_channel = m + in_channel * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - pad_width;
-            const int in_y_origin = (out_y * stride_height) - pad_height;
-            std::int64_t acc = 0;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // Zero padding by omitting the areas outside the image.
-                const bool is_point_inside_image =
-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height);
-                if (is_point_inside_image) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val = filter_data[Offset(
-                      filter_shape, 0, filter_y, filter_x, output_channel)];
-                  // Accumulate with 64 bits accumulator.
-                  // We assume maximum of 2^16 accumulations as with the 8-bit
-                  // case so actually the value in the accumulator should not
-                  // exceed 40 bits
-                  acc += static_cast<int64_t>(filter_val) *
-                         static_cast<int64_t>(input_val);
-                }
-              }
-            }
-            if (bias_data) {
-              acc += bias_data[output_channel];
-            }
-            int32_t scaled_acc = MultiplyByQuantizedMultiplier(
-                acc, output_multiplier[output_channel],
-                output_shift[output_channel]);
-            scaled_acc = std::max(scaled_acc, output_activation_min);
-            scaled_acc = std::min(scaled_acc, output_activation_max);
-            output_data[Offset(output_shape, batch, out_y, out_x,
-                               output_channel)] =
-                static_cast<int16_t>(scaled_acc);
-          }
-        }
-      }
-    }
-  }
-}
-
-inline void DepthwiseConvHybridPerChannel(
-    const DepthwiseParams& params, float* scaling_factors_ptr,
-    const RuntimeShape& input_shape, const int8_t* input_data,
-    const RuntimeShape& filter_shape, const int8_t* filter_data,
-    const RuntimeShape& bias_shape, const float* bias_data,
-    const RuntimeShape& output_shape, float* output_data,
-    const float* per_channel_scale, int32_t* input_offset) {
-  const int stride_width = params.stride_width;
-  const int stride_height = params.stride_height;
-  const int dilation_width_factor = params.dilation_width_factor;
-  const int dilation_height_factor = params.dilation_height_factor;
-  const int pad_width = params.padding_values.width;
-  const int pad_height = params.padding_values.height;
-  const int depth_multiplier = params.depth_multiplier;
-  const float output_activation_min = params.float_activation_min;
-  const float output_activation_max = params.float_activation_max;
-  // Check dimensions of the tensors.
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int input_depth = input_shape.Dims(3);
-  const int filter_height = filter_shape.Dims(1);
-  const int filter_width = filter_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int bias_depth = bias_shape.FlatSize();
-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
-  TFLITE_DCHECK_EQ(bias_depth, output_depth);
-
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-          for (int m = 0; m < depth_multiplier; ++m) {
-            const int output_channel = m + in_channel * depth_multiplier;
-            const int in_x_origin = (out_x * stride_width) - pad_width;
-            const int in_y_origin = (out_y * stride_height) - pad_height;
-            int32_t acc = 0;
-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
-                const int in_y =
-                    in_y_origin + dilation_height_factor * filter_y;
-                // Zero padding by omitting the areas outside the image.
-                const bool is_point_inside_image =
-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
-                    (in_y < input_height);
-                if (is_point_inside_image) {
-                  int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
-                  int32_t filter_val = filter_data[Offset(
-                      filter_shape, 0, filter_y, filter_x, output_channel)];
-                  acc += filter_val * (input_val - input_offset[batch]);
-                }
-              }
-            }
-            float acc_float = static_cast<float>(acc);
-            acc_float *=
-                per_channel_scale[output_channel] * scaling_factors_ptr[batch];
-            if (bias_data && output_channel < bias_depth) {
-              acc_float += bias_data[output_channel];
-            }
-            output_data[Offset(output_shape, batch, out_y, out_x,
-                               output_channel)] =
-                ActivationFunctionWithMinMax(acc_float, output_activation_min,
-                                             output_activation_max);
-          }
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
@@ -1,108 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
-  const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = output_shape.Dims(0);
-  const int output_depth = output_shape.Dims(1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * (input_val + input_offset);
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
-    }
-  }
-}
-
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int64_t* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
-  const int32_t filter_offset = params.weights_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
-  const int32_t output_activation_min = params.quantized_activation_min;
-  const int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
-
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = output_shape.Dims(0);
-  const int output_depth = output_shape.Dims(1);
-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  for (int b = 0; b < batches; ++b) {
-    for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int64_t acc = 0;
-      for (int d = 0; d < accum_depth; ++d) {
-        int32_t input_val = input_data[b * accum_depth + d];
-        int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * input_val;
-      }
-      if (bias_data) {
-        acc += bias_data[out_c];
-      }
-      int32_t acc_scaled =
-          MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc_scaled = std::max(acc_scaled, output_activation_min);
-      acc_scaled = std::min(acc_scaled, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
@@ -1,65 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
-                            int32_t depth, const int8_t* input_data,
-                            int8_t* output_data) {
-  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
-  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
-  // The output scale must be in sync with Prepare().
-  // Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
-  // to [-1, 127/128].
-  static constexpr int32_t kOutputScale = 7;
-  for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
-    // int32_t = (int8_t - int8_t) ^ 2.
-    // ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
-    // safe from overflowing in at least 2^16 steps.
-    int32_t acc = 0;
-    for (int inner_index = 0; inner_index < depth; ++inner_index) {
-      int32_t input =
-          input_data[depth * outer_index + inner_index] - input_zero_point;
-      acc += input * input;
-    }
-    int32_t inv_l2norm_multiplier;
-    int inv_l2norm_shift;
-    GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
-                                     &inv_l2norm_shift);
-
-    for (int inner_index = 0; inner_index < depth; ++inner_index) {
-      int32_t input =
-          input_data[depth * outer_index + inner_index] - input_zero_point;
-
-      // Rescale and downcast. Rescale is folded into the division.
-      int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
-          input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
-      output_in_q24 =
-          std::min(static_cast<int32_t>(kMaxInt8),
-                   std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
-      output_data[depth * outer_index + inner_index] =
-          static_cast<int8_t>(output_in_q24);
-    }
-  }
-}
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
@@ -1,99 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
-
-#include <limits>
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
-                     int32_t input_multiplier, int32_t input_left_shift,
-                     int32_t input_size, const int8_t* input_data,
-                     int8_t* output_data) {
-  // Integer bits must be in sync with Prepare() function.
-  static constexpr int32_t kInputIntegerBits = 4;
-  static constexpr int32_t kOutputIntegerBits = 8;
-  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
-  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
-  static constexpr int32_t kOutputZeroPoint = -128;
-
-  for (int i = 0; i < input_size; ++i) {
-    const int32_t input =
-        static_cast<int32_t>(input_data[i]) - input_zero_point;
-    if (input <= -input_range_radius) {
-      output_data[i] = kMinInt8;
-    } else if (input >= input_range_radius) {
-      output_data[i] = kMaxInt8;
-    } else {
-      const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
-          input, input_multiplier, input_left_shift);
-      using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
-      const int32_t output_in_q0 =
-          gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
-
-      // Rescale and downcast.
-      using gemmlowp::RoundingDivideByPOT;
-      int32_t output_in_q23 =
-          RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
-      output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
-                                        static_cast<int32_t>(kMinInt8)),
-                               static_cast<int32_t>(kMaxInt8));
-      output_data[i] = static_cast<int8_t>(output_in_q23);
-    }
-  }
-}
-
-inline void Logistic(int32_t input_multiplier, int32_t input_size,
-                     const int16_t* ptr_input_data, int16_t* ptr_output_data) {
-  // We use the LUT for sigmoid and take into account, that
-  // tanh(x) = 2*sigmoid(2*x) - 1
-
-  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
-
-  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
-    int32_t input_data = (*ptr_input_data) * input_data_mul;
-
-    // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and
-    // we do interpolation on unsigned values.
-    uint32_t abs_input_data = 3 * abs(input_data);
-
-    // We divide by 2 power of 9, because
-    // we need to divide by 2 in power of 7 for
-    // the input conversion + 1/4 from the scale above.
-    uint8_t uh = abs_input_data >> 9;
-    uint32_t ua = sigmoid_table_uint16[uh];
-    uint32_t ub = sigmoid_table_uint16[uh + 1];
-    uint32_t ut = abs_input_data & 0x1ff;
-
-    // Interpolation is done using the fractional bit.
-    uint32_t result = (ua << 9) + ut * (ub - ua);
-
-    result = (input_data >= 0) ? (result + (1 << 9))
-                               : ((1 << (16 + 9)) - result + (1 << 9) - 1);
-
-    // Back to 16-bit.
-    result >>= 10;
-
-    *ptr_output_data = result;
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
@@ -1,131 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
-
-#include "fixedpoint/fixedpoint.h"
-#include "ruy/profiler/instrumentation.h"  // from @ruy
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-template <typename T>
-inline void MulElementwise(int size, const ArithmeticParams& params,
-                           const T* input1_data, const T* input2_data,
-                           T* output_data) {
-  for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t unclamped_result =
-        params.output_offset +
-        MultiplyByQuantizedMultiplier(input1_val * input2_val,
-                                      params.output_multiplier,
-                                      params.output_shift);
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, unclamped_result));
-    output_data[i] = static_cast<T>(clamped_output);
-  }
-}
-
-template <typename T>
-inline void Mul(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const T* input1_data,
-                const RuntimeShape& input2_shape, const T* input2_data,
-                const RuntimeShape& output_shape, T* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  ruy::profiler::ScopeLabel label("Mul/8bit");
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-// Mul with 16 bit inputs and int8_t outputs.
-inline void Mul(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const int16_t* input1_data,
-                const RuntimeShape& input2_shape, const int16_t* input2_data,
-                const RuntimeShape& output_shape, int8_t* output_data) {
-  ruy::profiler::ScopeLabel label("Mul/Int16Int8");
-  int32_t output_offset = params.output_offset;
-  int32_t output_activation_min = params.quantized_activation_min;
-  int32_t output_activation_max = params.quantized_activation_max;
-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
-
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    // F0 uses 0 integer bits, range [-1, 1].
-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-
-    F0 unclamped_result =
-        F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
-    int16_t rescaled_result =
-        gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
-    int16_t clamped_result = std::min<int16_t>(
-        output_activation_max - output_offset, rescaled_result);
-    clamped_result = std::max<int16_t>(output_activation_min - output_offset,
-                                       clamped_result);
-    output_data[i] = output_offset + clamped_result;
-  }
-}
-
-template <typename T>
-inline void BroadcastMul4DSlow(
-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
-    const T* input1_data, const RuntimeShape& input2_shape,
-    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
-
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  // The input shapes are extended as part of NdArrayDesc initialization.
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          const int32_t input1_val =
-              params.input1_offset +
-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
-          const int32_t input2_val =
-              params.input2_offset +
-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
-          const int32_t unclamped_result =
-              params.output_offset +
-              MultiplyByQuantizedMultiplier(input1_val * input2_val,
-                                            params.output_multiplier,
-                                            params.output_shift);
-          const int32_t clamped_output = std::min(
-              params.quantized_activation_max,
-              std::max(params.quantized_activation_min, unclamped_result));
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              static_cast<T>(clamped_output);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
@@ -1,258 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
-
-#include <limits>
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void AveragePool(const PoolParams& params,
-                        const RuntimeShape& input_shape,
-                        const int8_t* input_data,
-                        const RuntimeShape& output_shape, int8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int32_t acc = 0;
-          int filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              acc +=
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              filter_count++;
-            }
-          }
-          // Round to the closest integer value.
-          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
-                        : (acc - filter_count / 2) / filter_count;
-          acc = std::max(acc, params.quantized_activation_min);
-          acc = std::min(acc, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<int8_t>(acc);
-        }
-      }
-    }
-  }
-}
-
-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
-                    const int8_t* input_data, const RuntimeShape& output_shape,
-                    int8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_GE(params.quantized_activation_min,
-                   std::numeric_limits<int8_t>::min());
-  TFLITE_DCHECK_LE(params.quantized_activation_max,
-                   std::numeric_limits<int8_t>::max());
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int8_t max = std::numeric_limits<int8_t>::lowest();
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              max = std::max(
-                  max,
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
-            }
-          }
-          max = std::max<int8_t>(max, params.quantized_activation_min);
-          max = std::min<int8_t>(max, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<int8_t>(max);
-        }
-      }
-    }
-  }
-}
-
-inline void AveragePool(const PoolParams& params,
-                        const RuntimeShape& input_shape,
-                        const int16_t* input_data,
-                        const RuntimeShape& output_shape,
-                        int16_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int32_t acc = 0;
-          int filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              acc +=
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              filter_count++;
-            }
-          }
-          // Round to the closest integer value.
-          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
-                        : (acc - filter_count / 2) / filter_count;
-          acc = std::max(acc, params.quantized_activation_min);
-          acc = std::min(acc, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<int16_t>(acc);
-        }
-      }
-    }
-  }
-}
-
-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
-                    const int16_t* input_data, const RuntimeShape& output_shape,
-                    int16_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_GE(params.quantized_activation_min,
-                   std::numeric_limits<int16_t>::min());
-  TFLITE_DCHECK_LE(params.quantized_activation_max,
-                   std::numeric_limits<int16_t>::max());
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int16_t max = std::numeric_limits<int16_t>::lowest();
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              max = std::max(
-                  max,
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
-            }
-          }
-          max = std::max<int16_t>(max, params.quantized_activation_min);
-          max = std::min<int16_t>(max, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<int16_t>(max);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
@@ -1,106 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
-
-#include <limits>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-namespace reference_integer_ops {
-
-inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
-                 int32_t input_multiplier, int32_t input_shift,
-                 int32_t input_size, const int8_t* input_data,
-                 int8_t* output_data) {
-  // Integer bits must be in sync with Prepare() function.
-  static constexpr int32_t kInputIntegerBits = 4;
-  static constexpr int32_t kOutputScale = 7;
-  static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
-  static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
-  using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
-
-  for (int i = 0; i < input_size; ++i) {
-    const int32_t input =
-        static_cast<int32_t>(input_data[i]) - input_zero_point;
-    if (input <= -input_range_radius) {
-      output_data[i] = kMinInt8;
-    } else if (input >= input_range_radius) {
-      output_data[i] = kMaxInt8;
-    } else {
-      const int32_t input_in_q4 =
-          MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
-      const int32_t output_in_q0 =
-          gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
-
-      // Rescale and downcast.
-      using gemmlowp::RoundingDivideByPOT;
-      int32_t output_in_q24 =
-          RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
-      output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
-      output_data[i] = static_cast<int8_t>(output_in_q24);
-    }
-  }
-}
-
-inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
-                 int32_t input_size, const int16_t* ptr_input_data,
-                 int16_t* ptr_output_data) {
-  // We use the LUT for sigmoid and take into account, that
-  // tanh(x) = 2*sigmoid(2*x) - 1
-
-  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
-
-  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
-    int32_t input_data = (*ptr_input_data) * input_data_mul;
-
-    if (input_left_shift == 1) {
-      input_data <<= 1;
-    }
-
-    // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
-    uint32_t abs_input_data = 3 * abs(input_data);
-    uint32_t uh = abs_input_data >> 8;
-    int32_t result;
-
-    if (uh >= 255) {
-      // Saturate to maximum.
-      result = 0xFFFF << 8;
-    } else {
-      uint32_t ua = sigmoid_table_uint16[uh];
-      uint32_t ub = sigmoid_table_uint16[uh + 1];
-
-      uint8_t ut = abs_input_data & 0xFF;
-
-      result = (ua << 8) + ut * (ub - ua);
-    }
-
-    result = (input_data >= 0)
-                 ? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
-                 : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
-
-    // Convert back to 16-bit.
-    result >>= (9 - 1);
-
-    *ptr_output_data = result;
-  }
-}
-
-}  // namespace reference_integer_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/l2normalization.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/l2normalization.h
@@ -1,90 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
-
-#include <algorithm>
-#include <cmath>
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
-                            const RuntimeShape& input_shape,
-                            const float* input_data,
-                            const RuntimeShape& output_shape,
-                            float* output_data, float epsilon = 1e-6) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-  for (int i = 0; i < outer_size; ++i) {
-    float squared_l2_norm = 0;
-    for (int c = 0; c < depth; ++c) {
-      const float val = input_data[depth * i + c];
-      squared_l2_norm += val * val;
-    }
-    float l2_norm = std::sqrt(squared_l2_norm);
-    l2_norm = std::max(l2_norm, epsilon);
-    for (int c = 0; c < depth; ++c) {
-      output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
-    }
-  }
-}
-
-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
-                            const RuntimeShape& input_shape,
-                            const uint8_t* input_data,
-                            const RuntimeShape& output_shape,
-                            uint8_t* output_data) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int32_t input_zero_point = op_params.input_zero_point;
-
-  for (int i = 0; i < outer_size; ++i) {
-    int32_t square_l2_norm = 0;
-    for (int c = 0; c < depth; c++) {
-      int32_t diff = input_data[depth * i + c] - input_zero_point;
-      square_l2_norm += diff * diff;
-    }
-    int32_t inv_l2norm_multiplier;
-    int inv_l2norm_shift;
-    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
-                                     &inv_l2norm_multiplier, &inv_l2norm_shift);
-    for (int c = 0; c < depth; c++) {
-      int32_t diff = input_data[depth * i + c] - input_zero_point;
-      int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
-      int32_t unclamped_output_val = 128 + rescaled_diff;
-      int32_t output_val =
-          std::min(static_cast<int32_t>(255),
-                   std::max(static_cast<int32_t>(0), unclamped_output_val));
-      output_data[depth * i + c] = static_cast<uint8_t>(output_val);
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/logistic.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/logistic.h
@@ -1,132 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
-
-#include <cmath>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-#include "tensorflow/lite/kernels/op_macros.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
-                     const RuntimeShape& output_shape, float* output_data) {
-  const float cutoff_upper = 16.619047164916992188f;
-  const float cutoff_lower = -9.f;
-
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  // Rational for using approximation in reference kernel.
-  // 0. This approximation gives enough precision for float.
-  // 1. This works around an issue on an embedded chipset where exp() does not
-  // return correctly as expected - exp(x) should return inf when overflown
-  // not 1.701417   IEEE 754 defines representation for inf.
-  // 2. This will speed up calculation and is matching the behavior in the
-  // optimized kernels. (check the definition of scalar_logistic_op<float>)
-
-  for (int i = 0; i < flat_size; i++) {
-    float val = input_data[i];
-    float result;
-    if (val > cutoff_upper) {
-      result = 1.0f;
-    } else if (val < cutoff_lower) {
-      result = std::exp(val);
-    } else {
-      result = 1.f / (1.f + std::exp(-val));
-    }
-    output_data[i] = result;
-  }
-}
-
-// Convenience version that allows, for example, generated-code calls to be
-// uniform between data types.
-inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
-                     const float* input_data, const RuntimeShape& output_shape,
-                     float* output_data) {
-  // Drop params: not needed.
-  Logistic(input_shape, input_data, output_shape, output_data);
-}
-
-inline void Logistic(const LogisticParams& params,
-                     const RuntimeShape& input_shape, const int16_t* input_data,
-                     const RuntimeShape& output_shape, int16_t* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    // F0 uses 0 integer bits, range [-1, 1].
-    // This is the return type of math functions such as tanh, logistic,
-    // whose range is in [-1, 1].
-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-    // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
-    using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
-
-    const F3 input = F3::FromRaw(input_data[i]);
-    F0 output = gemmlowp::logistic(input);
-    output_data[i] = output.raw();
-  }
-}
-
-// Quantized int8_t logistic activation.  Cheats by dequantizing and
-// requantizing around the floating point logistic method.  This implementation
-// is slow on platforms without a floating point unit.
-
-// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
-// approach used in TFLite for int8_t Logistic.
-inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
-                     float input_scale, int input_zero_point,
-                     const RuntimeShape& output_shape, int8_t* output_data,
-                     float output_scale, int output_zero_point) {
-  const float cutoff_upper = 16.619047164916992188f;
-  const float cutoff_lower = -9.f;
-
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  // Rational for using approximation in reference kernel.
-  // 0. This approximation gives enough precision for float.
-  // 1. This works around an issue on an embedded chipset where exp() does not
-  // return correctly as expected - exp(x) should return inf when overflown
-  // not 1.701417   IEEE 754 defines representation for inf.
-  // 2. This will speed up calculation and is matching the behavior in the
-  // optimized kernels. (check the definition of scalar_logistic_op<float>)
-
-  for (int i = 0; i < flat_size; i++) {
-    // Dequantize.
-    float val =
-        static_cast<float>((input_data[i] - input_zero_point) * input_scale);
-    float result;
-    if (val > cutoff_upper) {
-      result = 1.0f;
-    } else if (val < cutoff_lower) {
-      result = std::exp(val);
-    } else {
-      result = 1.f / (1.f + std::exp(-val));
-    }
-    // Requantize
-    int8_t output =
-        static_cast<int8_t>(result / output_scale + output_zero_point);
-    output_data[i] = output;
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h
@@ -1,64 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename T, typename Op, int N = 5>
-void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
-                                 const T* input1_data,
-                                 const RuntimeShape& unextended_input2_shape,
-                                 const T* input2_data,
-                                 const RuntimeShape& unextended_output_shape,
-                                 T* output_data, Op op) {
-  // Uses element-wise calculation if broadcast is not required.
-  if (unextended_input1_shape == unextended_input2_shape) {
-    const int flat_size =
-        MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
-                             unextended_output_shape);
-    for (int i = 0; i < flat_size; ++i) {
-      output_data[i] = op(input1_data[i], input2_data[i]);
-    }
-  } else {
-    TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
-    TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
-    TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
-
-    NdArrayDesc<N> desc1;
-    NdArrayDesc<N> desc2;
-    NdArrayDesc<N> output_desc;
-    NdArrayDescsForElementwiseBroadcast(
-        unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
-    CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
-                   &output_desc);
-
-    auto maxmin_func = [&](int indexes[N]) {
-      output_data[SubscriptToIndex(output_desc, indexes)] =
-          op(input1_data[SubscriptToIndex(desc1, indexes)],
-             input2_data[SubscriptToIndex(desc2, indexes)]);
-    };
-    NDOpsHelper<N>(output_desc, maxmin_func);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/mul.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/mul.h
@@ -1,166 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// Element-wise mul that can often be used for inner loop of broadcast Mul as
-// well as the non-broadcast Mul.
-inline void MulElementwise(int size, const ArithmeticParams& params,
-                           const uint8_t* input1_data,
-                           const uint8_t* input2_data, uint8_t* output_data) {
-  for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t unclamped_result =
-        params.output_offset +
-        MultiplyByQuantizedMultiplier(input1_val * input2_val,
-                                      params.output_multiplier,
-                                      params.output_shift);
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, unclamped_result));
-    output_data[i] = static_cast<uint8_t>(clamped_output);
-  }
-}
-
-template <typename T>
-inline void Mul(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const T* input1_data,
-                const RuntimeShape& input2_shape, const T* input2_data,
-                const RuntimeShape& output_shape, T* output_data) {
-  T output_activation_min;
-  T output_activation_max;
-  GetActivationParams(params, &output_activation_min, &output_activation_max);
-
-  const int flat_size =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = ActivationFunctionWithMinMax(
-        input1_data[i] * input2_data[i], output_activation_min,
-        output_activation_max);
-  }
-}
-
-inline void Mul(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
-                const RuntimeShape& output_shape, uint8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  const int flat_size =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
-
-  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-inline void BroadcastMul4DSlow(const ArithmeticParams& params,
-                               const RuntimeShape& input1_shape,
-                               const uint8_t* input1_data,
-                               const RuntimeShape& input2_shape,
-                               const uint8_t* input2_data,
-                               const RuntimeShape& output_shape,
-                               uint8_t* output_data) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          const int32_t input1_val =
-              params.input1_offset +
-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
-          const int32_t input2_val =
-              params.input2_offset +
-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
-          const int32_t unclamped_result =
-              params.output_offset +
-              MultiplyByQuantizedMultiplier(input1_val * input2_val,
-                                            params.output_multiplier,
-                                            params.output_shift);
-          const int32_t clamped_output = std::min(
-              params.quantized_activation_max,
-              std::max(params.quantized_activation_min, unclamped_result));
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              static_cast<uint8_t>(clamped_output);
-        }
-      }
-    }
-  }
-}
-
-template <typename T>
-void BroadcastMul4DSlow(const ArithmeticParams& params,
-                        const RuntimeShape& unextended_input1_shape,
-                        const T* input1_data,
-                        const RuntimeShape& unextended_input2_shape,
-                        const T* input2_data,
-                        const RuntimeShape& unextended_output_shape,
-                        T* output_data) {
-  T output_activation_min;
-  T output_activation_max;
-  GetActivationParams(params, &output_activation_min, &output_activation_max);
-
-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
-                                      unextended_input2_shape, &desc1, &desc2);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < output_shape.Dims(0); ++b) {
-    for (int y = 0; y < output_shape.Dims(1); ++y) {
-      for (int x = 0; x < output_shape.Dims(2); ++x) {
-        for (int c = 0; c < output_shape.Dims(3); ++c) {
-          output_data[Offset(output_shape, b, y, x, c)] =
-              ActivationFunctionWithMinMax(
-                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] *
-                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
-                  output_activation_min, output_activation_max);
-        }
-      }
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/neg.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/neg.h
@@ -1,37 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename T>
-inline void Negate(const RuntimeShape& input_shape, const T* input_data,
-                   const RuntimeShape& output_shape, T* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = -input_data[i];
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/pad.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/pad.h
@@ -1,162 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
-
-#include <vector>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// TFLite Pad supports activation tensors with up to 4 dimensions.
-constexpr int PadKernelMaxDimensionCount() { return 4; }
-
-// There are two versions of pad: Pad and PadV2.  In PadV2 there is a second
-// scalar input that provides the padding value.  Therefore pad_value_ptr can be
-// equivalent to a simple input1_data.  For Pad, it should point to a zero
-// value.
-//
-// Note that two typenames are required, so that T=P=int32_t is considered a
-// specialization distinct from P=int32_t.
-template <typename T, typename P>
-inline void PadImpl(const tflite::PadParams& op_params,
-                    const RuntimeShape& input_shape, const T* input_data,
-                    const P* pad_value_ptr, const RuntimeShape& output_shape,
-                    T* output_data) {
-  const RuntimeShape ext_input_shape =
-      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
-  const RuntimeShape ext_output_shape =
-      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
-  TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
-  TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
-
-  // Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
-  // pad them to 4 dims (yes, we are "padding the padding").
-  int left_padding_copy[PadKernelMaxDimensionCount()];
-  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
-    left_padding_copy[i] = 0;
-  }
-  for (int i = 0; i < op_params.left_padding_count; ++i) {
-    left_padding_copy[i + PadKernelMaxDimensionCount() -
-                      op_params.left_padding_count] = op_params.left_padding[i];
-  }
-  int right_padding_copy[PadKernelMaxDimensionCount()];
-  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
-    right_padding_copy[i] = 0;
-  }
-  for (int i = 0; i < op_params.right_padding_count; ++i) {
-    right_padding_copy[i + PadKernelMaxDimensionCount() -
-                       op_params.right_padding_count] =
-        op_params.right_padding[i];
-  }
-
-  const int output_batch = ext_output_shape.Dims(0);
-  const int output_height = ext_output_shape.Dims(1);
-  const int output_width = ext_output_shape.Dims(2);
-  const int output_depth = ext_output_shape.Dims(3);
-
-  const int left_b_padding = left_padding_copy[0];
-  const int left_h_padding = left_padding_copy[1];
-  const int left_w_padding = left_padding_copy[2];
-  const int left_d_padding = left_padding_copy[3];
-
-  const int right_b_padding = right_padding_copy[0];
-  const int right_h_padding = right_padding_copy[1];
-  const int right_w_padding = right_padding_copy[2];
-  const int right_d_padding = right_padding_copy[3];
-
-  const T pad_value = *pad_value_ptr;
-
-  const T* in_ptr = input_data;
-  T* out_ptr = output_data;
-  for (int out_b = 0; out_b < output_batch; ++out_b) {
-    for (int out_h = 0; out_h < output_height; ++out_h) {
-      for (int out_w = 0; out_w < output_width; ++out_w) {
-        for (int out_d = 0; out_d < output_depth; ++out_d) {
-          if (out_b < left_b_padding ||
-              out_b >= output_batch - right_b_padding ||
-              out_h < left_h_padding ||
-              out_h >= output_height - right_h_padding ||
-              out_w < left_w_padding ||
-              out_w >= output_width - right_w_padding ||
-              out_d < left_d_padding ||
-              out_d >= output_depth - right_d_padding) {
-            *out_ptr++ = pad_value;
-          } else {
-            *out_ptr++ = *in_ptr++;
-          }
-        }
-      }
-    }
-  }
-}
-
-template <typename T, typename P>
-inline void Pad(const tflite::PadParams& op_params,
-                const RuntimeShape& input_shape, const T* input_data,
-                const P* pad_value_ptr, const RuntimeShape& output_shape,
-                T* output_data) {
-  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
-          output_data);
-}
-
-// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
-template <typename T>
-inline void Pad(const tflite::PadParams& op_params,
-                const RuntimeShape& input_shape, const T* input_data,
-                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
-                T* output_data) {
-  const T converted_pad_value = static_cast<T>(*pad_value_ptr);
-  PadImpl(op_params, input_shape, input_data, &converted_pad_value,
-          output_shape, output_data);
-}
-
-// This version avoids conflicting template matching.
-template <>
-inline void Pad(const tflite::PadParams& op_params,
-                const RuntimeShape& input_shape, const int32_t* input_data,
-                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
-                int32_t* output_data) {
-  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
-          output_data);
-}
-
-template <typename T, typename P>
-inline void PadImageStyle(const tflite::PadParams& op_params,
-                          const RuntimeShape& input_shape, const T* input_data,
-                          const P* pad_value_ptr,
-                          const RuntimeShape& output_shape, T* output_data) {
-  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
-      output_data);
-}
-
-template <typename P>
-inline void PadImageStyle(const tflite::PadParams& op_params,
-                          const RuntimeShape& input_shape,
-                          const float* input_data, const P* pad_value_ptr,
-                          const RuntimeShape& output_shape,
-                          float* output_data) {
-  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
-      output_data);
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/pooling.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/pooling.h
@@ -1,297 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void AveragePool(const PoolParams& params,
-                        const RuntimeShape& input_shape,
-                        const float* input_data,
-                        const RuntimeShape& output_shape, float* output_data) {
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          float total = 0.f;
-          float filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              total +=
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              filter_count++;
-            }
-          }
-          const float average = total / filter_count;
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              ActivationFunctionWithMinMax(average, params.float_activation_min,
-                                           params.float_activation_max);
-        }
-      }
-    }
-  }
-}
-
-inline void AveragePool(const PoolParams& params,
-                        const RuntimeShape& input_shape,
-                        const uint8_t* input_data,
-                        const RuntimeShape& output_shape,
-                        uint8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          int32_t acc = 0;
-          int filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              acc +=
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              filter_count++;
-            }
-          }
-          acc = (acc + filter_count / 2) / filter_count;
-          acc = std::max(acc, params.quantized_activation_min);
-          acc = std::min(acc, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<uint8_t>(acc);
-        }
-      }
-    }
-  }
-}
-
-inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
-                   const float* input_data, const RuntimeShape& output_shape,
-                   float* output_data) {
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          float sum_squares = 0.f;
-          int filter_count = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              const float val =
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
-              sum_squares += val * val;
-              filter_count++;
-            }
-          }
-          const float l2pool_result = std::sqrt(sum_squares / filter_count);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              ActivationFunctionWithMinMax(l2pool_result,
-                                           params.float_activation_min,
-                                           params.float_activation_max);
-        }
-      }
-    }
-  }
-}
-
-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
-                    const float* input_data, const RuntimeShape& output_shape,
-                    float* output_data) {
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          float max = std::numeric_limits<float>::lowest();
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              max = std::max(
-                  max,
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
-            }
-          }
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              ActivationFunctionWithMinMax(max, params.float_activation_min,
-                                           params.float_activation_max);
-        }
-      }
-    }
-  }
-}
-
-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
-                    const uint8_t* input_data, const RuntimeShape& output_shape,
-                    uint8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
-  TFLITE_DCHECK_LE(params.quantized_activation_max, 255);
-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int stride_height = params.stride_height;
-  const int stride_width = params.stride_width;
-  for (int batch = 0; batch < batches; ++batch) {
-    for (int out_y = 0; out_y < output_height; ++out_y) {
-      for (int out_x = 0; out_x < output_width; ++out_x) {
-        for (int channel = 0; channel < depth; ++channel) {
-          const int in_x_origin =
-              (out_x * stride_width) - params.padding_values.width;
-          const int in_y_origin =
-              (out_y * stride_height) - params.padding_values.height;
-          // Compute the boundaries of the filter region clamped so as to
-          // ensure that the filter window fits in the input array.
-          const int filter_x_start = std::max(0, -in_x_origin);
-          const int filter_x_end =
-              std::min(params.filter_width, input_width - in_x_origin);
-          const int filter_y_start = std::max(0, -in_y_origin);
-          const int filter_y_end =
-              std::min(params.filter_height, input_height - in_y_origin);
-          uint8_t max = 0;
-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
-               ++filter_y) {
-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
-                 ++filter_x) {
-              const int in_x = in_x_origin + filter_x;
-              const int in_y = in_y_origin + filter_y;
-              max = std::max(
-                  max,
-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
-            }
-          }
-          max = std::max<uint8_t>(max, params.quantized_activation_min);
-          max = std::min<uint8_t>(max, params.quantized_activation_max);
-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
-              static_cast<uint8_t>(max);
-        }
-      }
-    }
-  }
-}
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/prelu.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/prelu.h
@@ -1,109 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// Broadcast prelu to output_shape for quantized uint8_t/int8_t data.
-template <typename T>
-inline void BroadcastPrelu4DSlow(
-    const PreluParams& params, const RuntimeShape& input_shape,
-    const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data,
-    const RuntimeShape& output_shape, T* output_data) {
-  TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(alpha_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input_shape, alpha_shape, &desc1, &desc2);
-
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          int output_index = Offset(extended_output_shape, b, y, x, c);
-          int input_index = SubscriptToIndex(desc1, b, y, x, c);
-          const int32_t input_value =
-              params.input_offset + input_data[input_index];
-          int32_t output_value;
-          if (input_value >= 0) {
-            output_value = MultiplyByQuantizedMultiplier(
-                input_value, params.output_multiplier_1, params.output_shift_1);
-          } else {
-            auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
-            const int32_t alpha_value =
-                params.alpha_offset + alpha_data[alpha_index];
-
-            output_value = MultiplyByQuantizedMultiplier(
-                input_value * alpha_value, params.output_multiplier_2,
-                params.output_shift_2);
-          }
-          output_value += params.output_offset;
-
-          const int32_t quantized_min = std::numeric_limits<T>::min();
-          const int32_t quantized_max = std::numeric_limits<T>::max();
-          const int32_t clamped_output =
-              std::min(quantized_max, std::max(quantized_min, output_value));
-          output_data[output_index] = static_cast<T>(clamped_output);
-        }
-      }
-    }
-  }
-}
-
-template <typename T>
-inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape,
-                  const T* input_data, const RuntimeShape& alpha_shape,
-                  const T* alpha_data, const RuntimeShape& output_shape,
-                  T* output_data) {
-  const int32_t quantized_min = std::numeric_limits<T>::min();
-  const int32_t quantized_max = std::numeric_limits<T>::max();
-
-  const int flat_size =
-      MatchingElementsSize(input_shape, alpha_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const int32_t input_value = params.input_offset + input_data[i];
-    int32_t output_value;
-    if (input_value >= 0) {
-      output_value = MultiplyByQuantizedMultiplier(
-          input_value, params.output_multiplier_1, params.output_shift_1);
-    } else {
-      const int32_t alpha_value = params.alpha_offset + alpha_data[i];
-
-      output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value,
-                                                   params.output_multiplier_2,
-                                                   params.output_shift_2);
-    }
-    output_value += params.output_offset;
-
-    const int32_t clamped_output =
-        std::min(quantized_max, std::max(quantized_min, output_value));
-    output_data[i] = static_cast<T>(clamped_output);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h
@@ -1,138 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// Consolidates dimensions in broadcast inputs, checks for five-fold pattern.
-//
-// For example, if sequence of dimensions of one input is
-// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ...
-// we can consolidate these as
-// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1.
-//
-// The category is updated in the less-frequent case of shapes that are
-// not suited to a fivefold-loop broadcast.
-//
-// Falls back to generic pattern when it does not know how to process properly.
-//
-// Returns true iff there is some sort of broadcast, which includes five-fold
-// patterns and falling back to generic broadcast.
-inline bool ProcessBroadcastShapes(const RuntimeShape& shape0,
-                                   const RuntimeShape& shape1,
-                                   tflite::ArithmeticParams* params) {
-  const int dims_count =
-      std::max(shape0.DimensionsCount(), shape1.DimensionsCount());
-
-  params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
-  RuntimeShape scalar_shape(dims_count, 1);
-
-  auto extended_shape0 = RuntimeShape::ExtendedShape(dims_count, shape0);
-  auto extended_shape1 = RuntimeShape::ExtendedShape(dims_count, shape1);
-
-  // Check for "exact" match, implicitly accepting any scalar shapes.
-  if (extended_shape0 == extended_shape1) {
-    params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
-    return false;
-  }
-
-  for (int i = dims_count - 1; i >= 0; --i) {
-    if (extended_shape0.Dims(i) == extended_shape1.Dims(i)) {
-      continue;
-    } else if (extended_shape0.Dims(i) == 1) {
-      params->broadcast_category =
-          BroadcastableOpCategory::kFirstInputBroadcastsFast;
-      break;
-    } else if (extended_shape1.Dims(i) == 1) {
-      params->broadcast_category =
-          BroadcastableOpCategory::kSecondInputBroadcastsFast;
-      break;
-    } else {
-      // This case is erroneous: there is a dimension that does not match and
-      // is not a broadcast from one shape to the other.
-      params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
-      return true;
-    }
-  }
-
-  if (params->broadcast_category !=
-          BroadcastableOpCategory::kFirstInputBroadcastsFast &&
-      params->broadcast_category !=
-          BroadcastableOpCategory::kSecondInputBroadcastsFast) {
-    // This is unreachable because at least one else clause in the above loop
-    // must be reached.
-    TFLITE_DCHECK(false);
-    params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
-    return false;
-  }
-
-  // From this point it is assumed contractually that corresponding dimensions
-  // in shape0 and shape1 are either (a) equal or (b) one or other equals 1.
-  const bool swap_inputs = params->broadcast_category ==
-                           BroadcastableOpCategory::kSecondInputBroadcastsFast;
-  const RuntimeShape* shape_a =
-      swap_inputs ? &extended_shape1 : &extended_shape0;
-  const RuntimeShape* shape_b =
-      swap_inputs ? &extended_shape0 : &extended_shape1;
-
-  int i = dims_count - 1;
-  params->broadcast_shape[0] = 1;
-  params->broadcast_shape[1] = 1;
-  params->broadcast_shape[2] = 1;
-  params->broadcast_shape[3] = 1;
-  params->broadcast_shape[4] = 1;
-  // y_0 is greedy: include dims if both or neither equal 1: in other words,
-  // test for equality rather than (shape_a->Dims(i) != 1).
-  while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
-    params->broadcast_shape[4] *= shape_b->Dims(i);
-    --i;
-  }
-  // Here either input_a or input_b has dim of 1 (if i >= 0).  If it is input_b
-  // that has the unit dimension, the next two loops are not entered.
-  while (i >= 0 && shape_a->Dims(i) == 1) {
-    params->broadcast_shape[3] *= shape_b->Dims(i);
-    --i;
-  }
-  while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
-    params->broadcast_shape[2] *= shape_a->Dims(i);
-    --i;
-  }
-  // Here either input_a or input_b has dim of 1 (if i >= 0).
-  while (i >= 0 && shape_b->Dims(i) == 1) {
-    params->broadcast_shape[1] *= shape_a->Dims(i);
-    --i;
-  }
-  while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
-    params->broadcast_shape[0] *= shape_b->Dims(i);
-    --i;
-  }
-
-  // Rarer case is when the broadcast dimensions cannot be handled by a fivefold
-  // loop.
-  if (i >= 0) {
-    params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
-  }
-  return true;
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/quantize.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/quantize.h
@@ -1,55 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
-
-#include <algorithm>
-#include <limits>
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-template <typename InputT, typename OutputT>
-inline void AffineQuantize(const tflite::QuantizationParams& op_params,
-                           const RuntimeShape& input_shape,
-                           const InputT* input_data,
-                           const RuntimeShape& output_shape,
-                           OutputT* output_data) {
-  const int32_t zero_point = op_params.zero_point;
-  const double scale = op_params.scale;
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
-  static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
-
-  for (int i = 0; i < flat_size; i++) {
-    const InputT val = input_data[i];
-    int32_t unclamped =
-        static_cast<int32_t>(TfLiteRound(val / static_cast<float>(scale))) +
-        zero_point;
-    int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
-    output_data[i] = clamped;
-  }
-}
-
-}  // namespace reference_ops
-
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/reduce.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/reduce.h
@@ -1,405 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
-
-#include "ruy/profiler/instrumentation.h"  // from @ruy
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/max.h"
-#include "tensorflow/lite/kernels/internal/min.h"
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-// A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
-// This method iterates through input data and reduce elements along the
-// dimensions given in axis.
-template <typename In, typename Out>
-inline bool Reduce(const In* input_data, const int* input_dims,
-                   const int* output_dims, const int input_num_dims,
-                   const int output_num_dims, const int* axis,
-                   const int num_axis, int* input_iter,
-                   Out reducer(const Out current, const In in),
-                   Out* output_data) {
-  // Reset input iterator.
-  for (int idx = 0; idx < input_num_dims; ++idx) {
-    input_iter[idx] = 0;
-  }
-  // Iterate through input_data.
-  do {
-    size_t input_offset =
-        ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
-    size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
-                                               input_iter, num_axis, axis);
-    output_data[output_offset] =
-        reducer(output_data[output_offset], input_data[input_offset]);
-  } while (NextIndex(input_num_dims, input_dims, input_iter));
-  return true;
-}
-
-// This method parses the input 'axis' to remove duplicates and handle negative
-// values, and returns a valid 'out_axis'
-inline bool ResolveAxis(const int num_dims, const int* axis,
-                        const int64_t num_axis, int* out_axis,
-                        int* out_num_axis) {
-  *out_num_axis = 0;  // Just in case.
-  // Short-circuit axis resolution for scalars; the axis will go unused.
-  if (num_dims == 0) {
-    return true;
-  }
-  // o(n^2) is fine since out_num_axis should be really small, mostly <= 4
-  for (int64_t idx = 0; idx < num_axis; ++idx) {
-    // Handle negative index. A positive index 'p_idx' can be represented as a
-    // negative index 'n_idx' as: n_idx = p_idx-num_dims
-    // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1]  */
-    int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
-    TFLITE_DCHECK(current >= 0 && current < num_dims);
-    bool is_dup = false;
-    for (int j = 0; j < *out_num_axis; ++j) {
-      if (out_axis[j] == current) {
-        is_dup = true;
-        break;
-      }
-    }
-    if (!is_dup) {
-      out_axis[*out_num_axis] = current;
-      *out_num_axis += 1;
-    }
-  }
-  return true;
-}
-
-// This method expects that output_data has been initialized.
-template <typename In, typename Out>
-inline bool ReduceSumImpl(const In* input_data, const int* input_dims,
-                          const int* output_dims, const int input_num_dims,
-                          const int output_num_dims, const int* axis,
-                          const int num_axis, int* input_iter,
-                          Out* output_data) {
-  auto reducer = [](const Out current, const In in) -> Out {
-    const Out actual_in = static_cast<Out>(in);
-    return current + actual_in;
-  };
-  return Reduce<In, Out>(input_data, input_dims, output_dims, input_num_dims,
-                         output_num_dims, axis, num_axis, input_iter, reducer,
-                         output_data);
-}
-
-template <typename T>
-inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
-                                    const T init_value, T* data) {
-  size_t num_elements = 1;
-  for (int idx = 0; idx < num_dims; ++idx) {
-    size_t current = static_cast<size_t>(dims[idx]);
-    // Overflow prevention.
-    if (num_elements > std::numeric_limits<size_t>::max() / current) {
-      return false;
-    }
-    num_elements *= current;
-  }
-  for (size_t idx = 0; idx < num_elements; ++idx) {
-    data[idx] = init_value;
-  }
-  return true;
-}
-
-// Computes the generic value (i.e., sum/max/min/prod) of elements across
-// dimensions given in axis. It needs to pass in init_value and reducer.
-template <typename T>
-inline bool ReduceGeneric(const T* input_data, const int* input_dims,
-                          const int input_num_dims, T* output_data,
-                          const int* output_dims, const int output_num_dims,
-                          const int* axis, const int64_t num_axis_dimensions,
-                          bool keep_dims, int* temp_index, int* resolved_axis,
-                          T init_value,
-                          T reducer(const T current, const T in)) {
-  // Reset output data.
-  if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
-                               output_data)) {
-    return false;
-  }
-
-  // Resolve axis.
-  int num_resolved_axis = 0;
-  if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
-                   &num_resolved_axis)) {
-    return false;
-  }
-
-  return Reduce<T, T>(input_data, input_dims, output_dims, input_num_dims,
-                      output_num_dims, resolved_axis, num_resolved_axis,
-                      temp_index, reducer, output_data);
-}
-
-// Computes the mean of elements across dimensions given in axis.
-// It does so in two stages, first calculates the sum of elements along the axis
-// then divides it by the number of element in axis.
-template <typename T, typename U>
-inline bool Mean(const T* input_data, const int* input_dims,
-                 const int input_num_dims, T* output_data,
-                 const int* output_dims, const int output_num_dims,
-                 const int* axis, const int num_axis_dimensions, bool keep_dims,
-                 int* temp_index, int* resolved_axis, U* temp_sum) {
-  ruy::profiler::ScopeLabel label("Mean");
-  // Reset output data.
-  size_t num_outputs = 1;
-  for (int idx = 0; idx < output_num_dims; ++idx) {
-    size_t current = static_cast<size_t>(output_dims[idx]);
-    // Overflow prevention.
-    if (num_outputs > std::numeric_limits<size_t>::max() / current) {
-      return false;
-    }
-    num_outputs *= current;
-  }
-  for (size_t idx = 0; idx < num_outputs; ++idx) {
-    output_data[idx] = T();
-    temp_sum[idx] = U();
-  }
-
-  // Resolve axis.
-  int num_resolved_axis = 0;
-  if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
-                   &num_resolved_axis)) {
-    return false;
-  }
-
-  if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
-                           output_num_dims, resolved_axis, num_resolved_axis,
-                           temp_index, temp_sum)) {
-    return false;
-  }
-
-  // Calculate mean by dividing output_data by num of aggregated element.
-  U num_elements_in_axis = 1;
-  for (int idx = 0; idx < num_resolved_axis; ++idx) {
-    size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
-    // Overflow prevention.
-    if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
-      return false;
-    }
-    num_elements_in_axis *= current;
-  }
-
-  if (num_elements_in_axis > 0) {
-    for (size_t idx = 0; idx < num_outputs; ++idx) {
-      output_data[idx] =
-          static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
-    }
-  }
-  return true;
-}
-
-template <typename T>
-inline void Mean(const tflite::MeanParams& op_params,
-                 const RuntimeShape& unextended_input_shape,
-                 const T* input_data,
-                 const RuntimeShape& unextended_output_shape, T* output_data) {
-  ruy::profiler::ScopeLabel label("Mean4D");
-
-  // Current implementation only supports dimension equals 4 and simultaneous
-  // reduction over width and height.
-  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
-  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-
-  const int output_batch = output_shape.Dims(0);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int output_depth = output_shape.Dims(3);
-
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-
-  TFLITE_CHECK_EQ(op_params.axis_count, 2);
-  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
-               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
-  TFLITE_CHECK_EQ(output_height, 1);
-  TFLITE_CHECK_EQ(output_width, 1);
-
-  for (int out_b = 0; out_b < output_batch; ++out_b) {
-    for (int out_d = 0; out_d < output_depth; ++out_d) {
-      float value = 0;
-      for (int in_h = 0; in_h < input_height; ++in_h) {
-        for (int in_w = 0; in_w < input_width; ++in_w) {
-          value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
-        }
-      }
-      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
-          value / (input_width * input_height);
-    }
-  }
-}
-
-inline void Mean(const tflite::MeanParams& op_params,
-                 const RuntimeShape& unextended_input_shape,
-                 const uint8_t* input_data, int32_t input_zero_point,
-                 float input_scale, const RuntimeShape& unextended_output_shape,
-                 uint8_t* output_data, int32_t output_zero_point,
-                 float output_scale) {
-  ruy::profiler::ScopeLabel label("Mean4D/Uint8");
-
-  // Current implementation only supports dimension equals 4 and simultaneous
-  // reduction over width and height.
-  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
-  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-  const int output_batch = output_shape.Dims(0);
-  const int output_height = output_shape.Dims(1);
-  const int output_width = output_shape.Dims(2);
-  const int output_depth = output_shape.Dims(3);
-  const int input_height = input_shape.Dims(1);
-  const int input_width = input_shape.Dims(2);
-  const float num_elements_in_axis = input_width * input_height;
-
-  TFLITE_CHECK_EQ(op_params.axis_count, 2);
-  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
-               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
-  TFLITE_CHECK_EQ(output_height, 1);
-  TFLITE_CHECK_EQ(output_width, 1);
-
-  constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
-  constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
-
-  int32_t bias =
-      output_zero_point -
-      static_cast<int32_t>(input_zero_point * input_scale / output_scale);
-  double real_scale =
-      static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
-
-  int32_t multiplier;
-  int shift;
-  QuantizeMultiplier(real_scale, &multiplier, &shift);
-  for (int out_b = 0; out_b < output_batch; ++out_b) {
-    for (int out_d = 0; out_d < output_depth; ++out_d) {
-      int32_t acc = 0;
-      for (int in_h = 0; in_h < input_height; ++in_h) {
-        for (int in_w = 0; in_w < input_width; ++in_w) {
-          acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
-        }
-      }
-      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
-      acc += bias;
-      acc = std::min(std::max(acc, kMinValue), kMaxValue);
-      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
-          static_cast<uint8_t>(acc);
-    }
-  }
-}
-
-// Computes the mean of elements across dimensions given in axis.
-// It does so in two stages, first calculates the sum of elements along the axis
-// then divides it by the number of element in axis for quantized values.
-template <typename T, typename U>
-inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
-                               float input_scale, const int* input_dims,
-                               const int input_num_dims, T* output_data,
-                               int32_t output_zero_point, float output_scale,
-                               const int* output_dims,
-                               const int output_num_dims, const int* axis,
-                               const int num_axis_dimensions, bool keep_dims,
-                               int* temp_index, int* resolved_axis, U* temp_sum,
-                               bool compute_sum) {
-  const bool uint8_case = std::is_same<T, uint8_t>::value;
-  const bool int16_case = std::is_same<T, int16_t>::value;
-  if (uint8_case) {
-    ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8");
-  } else if (int16_case) {
-    ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16");
-  } else {
-    ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8");
-  }
-  // Reset output data.
-  size_t num_outputs = 1;
-  for (int idx = 0; idx < output_num_dims; ++idx) {
-    size_t current = static_cast<size_t>(output_dims[idx]);
-    // Overflow prevention.
-    if (num_outputs > std::numeric_limits<size_t>::max() / current) {
-      return false;
-    }
-    num_outputs *= current;
-  }
-  for (size_t idx = 0; idx < num_outputs; ++idx) {
-    output_data[idx] = T();
-    temp_sum[idx] = U();
-  }
-
-  // Resolve axis.
-  int num_resolved_axis = 0;
-  if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
-                   &num_resolved_axis)) {
-    return false;
-  }
-
-  if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
-                           output_num_dims, resolved_axis, num_resolved_axis,
-                           temp_index, temp_sum)) {
-    return false;
-  }
-
-  // Calculate mean by dividing output_data by num of aggregated element.
-  U num_elements_in_axis = 1;
-  for (int idx = 0; idx < num_resolved_axis; ++idx) {
-    size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
-    // Overflow prevention.
-    if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
-      return false;
-    }
-    num_elements_in_axis *= current;
-  }
-
-  if (num_elements_in_axis > 0) {
-    const float scale = input_scale / output_scale;
-    if (compute_sum) {
-      // TODO(b/116341117): Eliminate float and do this completely in 8bit.
-      const float bias =
-          -input_zero_point * scale * num_elements_in_axis + 0.5f;
-      for (size_t idx = 0; idx < num_outputs; ++idx) {
-        const U value =
-            static_cast<U>(TfLiteRound(temp_sum[idx] * scale + bias)) +
-            output_zero_point;
-        output_data[idx] = static_cast<T>(value);
-      }
-    } else {
-      const float bias = -input_zero_point * scale + 0.5f;
-      for (size_t idx = 0; idx < num_outputs; ++idx) {
-        float float_mean = static_cast<float>(temp_sum[idx]) /
-                           static_cast<float>(num_elements_in_axis);
-        float result = TfLiteMin(
-            TfLiteRound(float_mean * scale + bias) + output_zero_point,
-            static_cast<float>(std::numeric_limits<T>::max()));
-        result = TfLiteMax(result,
-                           static_cast<float>(std::numeric_limits<T>::min()));
-        output_data[idx] = static_cast<T>(result);
-      }
-    }
-  }
-  return true;
-}
-
-}  // namespace reference_ops
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/requantize.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/requantize.h
@@ -1,67 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
-
-#include "ruy/profiler/instrumentation.h"  // from @ruy
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace reference_ops {
-
-template <typename input_type, typename output_type>
-inline void Requantize(const input_type* input_data, int32_t size,
-                       int32_t effective_scale_multiplier,
-                       int32_t effective_scale_shift, int32_t input_zeropoint,
-                       int32_t output_zeropoint, output_type* output_data) {
-  ruy::profiler::ScopeLabel label("Requantize");
-  const bool same_scale =
-      (effective_scale_multiplier == 1 << 30 && effective_scale_shift == 1);
-  if (same_scale) {
-    const bool mixed_type_int8_uint8 =
-        std::is_same<input_type, int8_t>::value &&
-        std::is_same<output_type, uint8_t>::value;
-    const bool mixed_type_uint8_int8 =
-        std::is_same<input_type, uint8_t>::value &&
-        std::is_same<output_type, int8_t>::value;
-    const int32_t zero_point_diff = input_zeropoint - output_zeropoint;
-    // Fast path to do requantization for the case when just a shift of 128 is
-    // needed.
-    if ((mixed_type_int8_uint8 && zero_point_diff == -128) ||
-        (mixed_type_uint8_int8 && zero_point_diff == 128)) {
-      for (int i = 0; i < size; ++i) {
-        output_data[i] = input_data[i] ^ 0x80;
-      }
-    }
-  }
-  static constexpr int32_t kMinOutput = std::numeric_limits<output_type>::min();
-  static constexpr int32_t kMaxOutput = std::numeric_limits<output_type>::max();
-  for (int i = 0; i < size; ++i) {
-    const int32_t input = input_data[i] - input_zeropoint;
-    const int32_t output =
-        MultiplyByQuantizedMultiplier(input, effective_scale_multiplier,
-                                      effective_scale_shift) +
-        output_zeropoint;
-    const int32_t clamped_output =
-        std::max(std::min(output, kMaxOutput), kMinOutput);
-    output_data[i] = static_cast<output_type>(clamped_output);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h
@@ -1,101 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
-
-#include <cmath>
-
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline int32_t GetNearestNeighbor(const int input_value,
-                                  const int32_t input_size,
-                                  const int32_t output_size,
-                                  const bool align_corners,
-                                  const bool half_pixel_centers) {
-  const float scale =
-      (align_corners && output_size > 1)
-          ? (input_size - 1) / static_cast<float>(output_size - 1)
-          : input_size / static_cast<float>(output_size);
-  const float offset = half_pixel_centers ? 0.5f : 0.0f;
-  int32_t output_value = std::min(
-      align_corners
-          ? static_cast<int32_t>(TfLiteRound((input_value + offset) * scale))
-          : static_cast<int32_t>(std::floor((input_value + offset) * scale)),
-      input_size - 1);
-  if (half_pixel_centers) {
-    output_value = std::max(static_cast<int32_t>(0), output_value);
-  }
-  return output_value;
-}
-
-template <typename T>
-inline void ResizeNearestNeighbor(
-    const tflite::ResizeNearestNeighborParams& op_params,
-    const RuntimeShape& unextended_input_shape, const T* input_data,
-    const RuntimeShape& output_size_shape, const int32_t* output_size_data,
-    const RuntimeShape& unextended_output_shape, T* output_data) {
-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-
-  int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
-  int32_t input_height = input_shape.Dims(1);
-  int32_t input_width = input_shape.Dims(2);
-  int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
-
-  // The Tensorflow version of this op allows resize on the width and height
-  // axis only.
-  TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
-  int32_t output_height = output_size_data[0];
-  int32_t output_width = output_size_data[1];
-
-  const int col_offset = input_shape.Dims(3);
-  const int row_offset = input_shape.Dims(2) * col_offset;
-  const int batch_offset = input_shape.Dims(1) * row_offset;
-
-  const T* input_ptr = input_data;
-  T* output_ptr = output_data;
-  for (int b = 0; b < batches; ++b) {
-    for (int y = 0; y < output_height; ++y) {
-      int32_t in_y = GetNearestNeighbor(y, input_height, output_height,
-                                        op_params.align_corners,
-                                        op_params.half_pixel_centers);
-      const T* y_input_ptr = input_ptr + in_y * row_offset;
-      for (int x = 0; x < output_width; ++x) {
-        int32_t in_x = GetNearestNeighbor(x, input_width, output_width,
-                                          op_params.align_corners,
-                                          op_params.half_pixel_centers);
-        const T* x_input_ptr = y_input_ptr + in_x * col_offset;
-        memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
-        output_ptr += depth;
-      }
-    }
-    input_ptr += batch_offset;
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/round.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/round.h
@@ -1,51 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
-
-#include <cmath>
-
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline float RoundToNearest(float value) {
-  auto floor_val = std::floor(value);
-  auto diff = value - floor_val;
-  if ((diff < 0.5f) ||
-      ((diff == 0.5f) && (static_cast<int>(floor_val) % 2 == 0))) {
-    return floor_val;
-  } else {
-    return floor_val = floor_val + 1.0f;
-  }
-}
-
-inline void Round(const RuntimeShape& input_shape, const float* input_data,
-                  const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    // Note that this implementation matches that of tensorFlow tf.round
-    // and corresponds to the bankers rounding method.
-    // cfenv (for fesetround) is not yet supported universally on Android, so
-    // using a work around.
-    output_data[i] = RoundToNearest(input_data[i]);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/softmax.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/softmax.h
@@ -1,228 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
-
-#include <limits>
-#include <vector>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-#include "tensorflow/lite/kernels/op_macros.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void Softmax(const SoftmaxParams& params,
-                    const RuntimeShape& input_shape, const float* input_data,
-                    const RuntimeShape& output_shape, float* output_data) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-
-  for (int i = 0; i < outer_size; ++i) {
-    // Find max element value which we'll use to ensure numerical stability
-    // taking advantage of the following equality:
-    // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
-    float max = std::numeric_limits<float>::lowest();
-    for (int c = 0; c < depth; ++c) {
-      max = std::max(max, input_data[i * depth + c]);
-    }
-
-    // Compute sum.
-    float sum = 0.f;
-    for (int c = 0; c < depth; ++c) {
-      sum += std::exp((input_data[i * depth + c] - max) *
-                      static_cast<float>(params.beta));
-    }
-
-    // Compute result.
-    for (int c = 0; c < depth; ++c) {
-      output_data[i * depth + c] = std::exp((input_data[i * depth + c] - max) *
-                                            static_cast<float>(params.beta)) /
-                                   sum;
-    }
-  }
-}
-
-// Quantized softmax with int8_t/uint8_t input and int8_t/uint8_t/int16_t
-// output.
-template <typename InputT, typename OutputT>
-inline void Softmax(const SoftmaxParams& params,
-                    const RuntimeShape& input_shape, const InputT* input_data,
-                    const RuntimeShape& output_shape, OutputT* output_data) {
-  const int32_t input_beta_multiplier = params.input_multiplier;
-  const int32_t input_beta_left_shift = params.input_left_shift;
-  const int diff_min = params.diff_min;
-  // The representation chosen for the input to the exp() function is Q5.26.
-  // We need to leave extra space since values that we skip might be as large as
-  // -32 before multiplying by input_beta_multiplier, and therefore as large as
-  // -16 afterwards.  Note that exp(-8) is definitely not insignificant to
-  // accumulation, but exp(-16) definitely is.
-  static const int kScaledDiffIntegerBits = 5;
-  static const int kAccumulationIntegerBits = 12;
-  using FixedPointScaledDiff =
-      gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
-  using FixedPointAccum =
-      gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
-  using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
-
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-
-  for (int i = 0; i < outer_size; ++i) {
-    InputT max_in_row = std::numeric_limits<InputT>::min();
-    for (int c = 0; c < depth; ++c) {
-      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
-    }
-
-    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
-    for (int c = 0; c < depth; ++c) {
-      int32_t input_diff =
-          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff >= diff_min) {
-        const int32_t input_diff_rescaled =
-            MultiplyByQuantizedMultiplierGreaterThanOne(
-                input_diff, input_beta_multiplier, input_beta_left_shift);
-        const FixedPointScaledDiff scaled_diff_f8 =
-            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
-                                        exp_on_negative_values(scaled_diff_f8));
-      }
-    }
-
-    int num_bits_over_unit;
-    FixedPoint0 shifted_scale = FixedPoint0::FromRaw(GetReciprocal(
-        sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit));
-
-    for (int c = 0; c < depth; ++c) {
-      int32_t input_diff =
-          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
-      if (input_diff >= diff_min) {
-        const int32_t input_diff_rescaled =
-            MultiplyByQuantizedMultiplierGreaterThanOne(
-                input_diff, input_beta_multiplier, input_beta_left_shift);
-        const FixedPointScaledDiff scaled_diff_f8 =
-            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
-
-        FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
-        int32_t unsat_output = gemmlowp::RoundingDivideByPOT(
-            (shifted_scale * exp_in_0).raw(),
-            num_bits_over_unit + 31 - (sizeof(OutputT) * 8));
-
-        const int32_t shifted_output =
-            unsat_output +
-            static_cast<int32_t>(std::numeric_limits<OutputT>::min());
-
-        output_data[i * depth + c] = static_cast<OutputT>(std::max(
-            std::min(shifted_output,
-                     static_cast<int32_t>(std::numeric_limits<OutputT>::max())),
-            static_cast<int32_t>(std::numeric_limits<OutputT>::min())));
-      } else {
-        output_data[i * depth + c] = std::numeric_limits<OutputT>::min();
-      }
-    }
-  }
-}
-
-// Quantized softmax with int16_t input and int16_t output.
-inline void SoftmaxInt16(const SoftmaxParams& params,
-                         const RuntimeShape& input_shape,
-                         const int16_t* input_data,
-                         const RuntimeShape& output_shape,
-                         int16_t* output_data) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-
-  for (int i = 0; i < outer_size; ++i) {
-    // Find the largest element
-    int16_t max_in_row = std::numeric_limits<int16_t>::min();
-    for (int c = 0; c < depth; ++c) {
-      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
-    }
-
-    // Compute exp(input - max_input)
-    std::vector<int16_t> exp_result_Q015(depth);
-    for (int c = 0; c < depth; ++c) {
-      int32_t input_diff = input_data[i * depth + c] - max_in_row;
-      // scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0]
-      int32_t scaled_diff = MultiplyByQuantizedMultiplier(
-          input_diff, params.input_multiplier, params.input_left_shift);
-      // recenter to [-32768, 32767]
-      int32_t sym_scaled_diff = scaled_diff + 32767;
-      int16_t sat_sym_scaled_diff =
-          std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
-                   static_cast<int32_t>(32767));
-      // apply the exp() LUT activation function
-      exp_result_Q015[c] =
-          generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
-    }
-
-    // sum_of_exps is a Q16.15 fixed point format.
-    int32_t sum_of_exps = 0;
-    for (int c = 0; c < depth; ++c) {
-      // Q16.15 + Q0.15
-      sum_of_exps += exp_result_Q015[c];
-    }
-
-    // Compute the reciprocal 1/sum_of_exps
-    uint8_t headroom_plus_one =
-        CountLeadingZeros(static_cast<uint32_t>(sum_of_exps));
-    int32_t shifted_sum =
-        ((static_cast<int64_t>(sum_of_exps) << (headroom_plus_one - 1)) +
-         (1 << 13)) >>
-        14;
-    // since the LUT computes 1/(1 + x) we need to first compute x = (sum - 1).
-    // also, the LUT expects a symmetrical input, so we must also recenter x
-    // from [0, 65535] to [-32768, 32767].
-    int32_t sym_shifted_sum = shifted_sum + (-((1 << 15) + (1 << 16)));
-    int16_t sat_sym_shifted_sum = static_cast<int16_t>(
-        std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
-                 static_cast<int32_t>(32767)));
-    // apply 1/(1 + x) LUT activation function
-    int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
-        sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
-
-    // Rescale the exp_result with reciprocal
-    // range of output is [0, 32767] correspond to [0.0, 1.0]
-    for (int c = 0; c < depth; ++c) {
-      uint8_t right_shift = 31 - headroom_plus_one;
-      int64_t round = 1 << (right_shift - 1);
-      int32_t result = (static_cast<int64_t>(exp_result_Q015[c]) *
-                            static_cast<int64_t>(reciprocal_scale_Q015) +
-                        round) >>
-                       right_shift;
-      output_data[i * depth + c] = static_cast<int16_t>(
-          std::min(std::max(result, static_cast<int32_t>(0)),
-                   static_cast<int32_t>(32767)));
-    }
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/strided_slice.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/strided_slice.h
@@ -1,94 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
-
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-template <typename T>
-inline void StridedSlice(const tflite::StridedSliceParams& op_params,
-                         const RuntimeShape& unextended_input_shape,
-                         const T* input_data,
-                         const RuntimeShape& unextended_output_shape,
-                         T* output_data) {
-  using strided_slice::LoopCondition;
-  using strided_slice::StartForAxis;
-  using strided_slice::StopForAxis;
-  // Note that the output_shape is not used herein.
-  tflite::StridedSliceParams params_copy = op_params;
-
-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 5);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 5);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(5, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(5, unextended_output_shape);
-
-  // Reverse and pad to 5 dimensions because that is what the runtime code
-  // requires (ie. all shapes must be 5D and are given backwards).
-  strided_slice::StridedSlicePadIndices(&params_copy, 5);
-
-  const int start_0 = StartForAxis(params_copy, input_shape, 0);
-  const int stop_0 = StopForAxis(params_copy, input_shape, 0, start_0);
-  const int start_1 = StartForAxis(params_copy, input_shape, 1);
-  const int stop_1 = StopForAxis(params_copy, input_shape, 1, start_1);
-  const int start_2 = StartForAxis(params_copy, input_shape, 2);
-  const int stop_2 = StopForAxis(params_copy, input_shape, 2, start_2);
-  const int start_3 = StartForAxis(params_copy, input_shape, 3);
-  const int stop_3 = StopForAxis(params_copy, input_shape, 3, start_3);
-  const int start_4 = StartForAxis(params_copy, input_shape, 4);
-  const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4);
-
-  T* out_ptr = output_data;
-  for (int offset_0 = start_0 * input_shape.Dims(1),
-           end_0 = stop_0 * input_shape.Dims(1),
-           step_0 = params_copy.strides[0] * input_shape.Dims(1);
-       !LoopCondition(offset_0, end_0, params_copy.strides[0]);
-       offset_0 += step_0) {
-    for (int offset_1 = (offset_0 + start_1) * input_shape.Dims(2),
-             end_1 = (offset_0 + stop_1) * input_shape.Dims(2),
-             step_1 = params_copy.strides[1] * input_shape.Dims(2);
-         !LoopCondition(offset_1, end_1, params_copy.strides[1]);
-         offset_1 += step_1) {
-      for (int offset_2 = (offset_1 + start_2) * input_shape.Dims(3),
-               end_2 = (offset_1 + stop_2) * input_shape.Dims(3),
-               step_2 = params_copy.strides[2] * input_shape.Dims(3);
-           !LoopCondition(offset_2, end_2, params_copy.strides[2]);
-           offset_2 += step_2) {
-        for (int offset_3 = (offset_2 + start_3) * input_shape.Dims(4),
-                 end_3 = (offset_2 + stop_3) * input_shape.Dims(4),
-                 step_3 = params_copy.strides[3] * input_shape.Dims(4);
-             !LoopCondition(offset_3, end_3, params_copy.strides[3]);
-             offset_3 += step_3) {
-          for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
-               !LoopCondition(offset_4, end_4, params_copy.strides[4]);
-               offset_4 += params_copy.strides[4]) {
-            *out_ptr++ = input_data[offset_4];
-          }
-        }
-      }
-    }
-  }
-}
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/sub.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/sub.h
@@ -1,516 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
-
-#include <stdint.h>
-
-#include <algorithm>
-#include <limits>
-
-#include "ruy/profiler/instrumentation.h"  // from @ruy
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-namespace reference_ops {
-
-inline void SubNonBroadcast(const ArithmeticParams& params,
-                            const RuntimeShape& input1_shape,
-                            const float* input1_data,
-                            const RuntimeShape& input2_shape,
-                            const float* input2_data,
-                            const RuntimeShape& output_shape,
-                            float* output_data) {
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = ActivationFunctionWithMinMax(
-        input1_data[i] - input2_data[i], params.float_activation_min,
-        params.float_activation_max);
-  }
-}
-
-inline void SubNonBroadcast(const ArithmeticParams& params,
-                            const RuntimeShape& input1_shape,
-                            const int32_t* input1_data,
-                            const RuntimeShape& input2_shape,
-                            const int32_t* input2_data,
-                            const RuntimeShape& output_shape,
-                            int32_t* output_data) {
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = ActivationFunctionWithMinMax(
-        input1_data[i] - input2_data[i], params.quantized_activation_min,
-        params.quantized_activation_max);
-  }
-}
-
-// TODO(b/151345304): We can implement BroadcastSub on buffers of arbitrary
-// dimensionality if the runtime code does a single loop over one dimension
-// that handles broadcasting as the base case. The code generator would then
-// generate max(D1, D2) nested for loops.
-// TODO(b/151345101): BroadcastSub is intentionally duplicated from
-// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
-// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
-// reference_ops.h.
-template <int N = 5>
-inline void BroadcastSubSlow(const ArithmeticParams& params,
-                             const RuntimeShape& input1_shape,
-                             const float* input1_data,
-                             const RuntimeShape& input2_shape,
-                             const float* input2_data,
-                             const RuntimeShape& output_shape,
-                             float* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastSubSlow/float");
-  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  auto sub_func = [&](int indexes[N]) {
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        ActivationFunctionWithMinMax(
-            input1_data[SubscriptToIndex(desc1, indexes)] -
-                input2_data[SubscriptToIndex(desc2, indexes)],
-            params.float_activation_min, params.float_activation_max);
-  };
-  NDOpsHelper<N>(output_desc, sub_func);
-}
-
-template <int N = 5>
-inline void BroadcastSubSlow(const ArithmeticParams& params,
-                             const RuntimeShape& input1_shape,
-                             const uint8_t* input1_data,
-                             const RuntimeShape& input2_shape,
-                             const uint8_t* input2_data,
-                             const RuntimeShape& output_shape,
-                             uint8_t* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8_t");
-  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  auto sub_func = [&](int indexes[N]) {
-    const int32_t input1_val =
-        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
-    const int32_t input2_val =
-        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sub, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        static_cast<uint8_t>(clamped_output);
-  };
-  NDOpsHelper<N>(output_desc, sub_func);
-}
-
-template <int N = 5>
-inline void BroadcastSubSlow(const ArithmeticParams& params,
-                             const RuntimeShape& input1_shape,
-                             const int32_t* input1_data,
-                             const RuntimeShape& input2_shape,
-                             const int32_t* input2_data,
-                             const RuntimeShape& output_shape,
-                             int32_t* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32_t");
-  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  auto sub_func = [&](int indexes[N]) {
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        ActivationFunctionWithMinMax(
-            input1_data[SubscriptToIndex(desc1, indexes)] -
-                input2_data[SubscriptToIndex(desc2, indexes)],
-            params.quantized_activation_min, params.quantized_activation_max);
-  };
-  NDOpsHelper<N>(output_desc, sub_func);
-}
-
-template <int N = 5>
-inline void BroadcastSubSlow(const ArithmeticParams& params,
-                             const RuntimeShape& input1_shape,
-                             const int8_t* input1_data,
-                             const RuntimeShape& input2_shape,
-                             const int8_t* input2_data,
-                             const RuntimeShape& output_shape,
-                             int8_t* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8_t");
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  auto sub_func = [&](int indexes[N]) {
-    const int32_t input1_val =
-        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
-    const int32_t input2_val =
-        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sub, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        static_cast<int8_t>(clamped_output);
-  };
-  NDOpsHelper<N>(output_desc, sub_func);
-}
-
-template <int N = 5>
-void BroadcastSubSlow(const ArithmeticParams& params,
-                      const RuntimeShape& input1_shape,
-                      const int64_t* input1_data,
-                      const RuntimeShape& input2_shape,
-                      const int64_t* input2_data,
-                      const RuntimeShape& output_shape, int64_t* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64_t");
-  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  auto sub_func = [&](int indexes[N]) {
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        ActivationFunctionWithMinMax(
-            input1_data[SubscriptToIndex(desc1, indexes)] -
-                input2_data[SubscriptToIndex(desc2, indexes)],
-            params.int64_activation_min, params.int64_activation_max);
-  };
-  NDOpsHelper<N>(output_desc, sub_func);
-}
-
-template <typename T, int N = 5>
-void BroadcastSubSlow(const ArithmeticParams& params,
-                      const RuntimeShape& input1_shape, const T* input1_data,
-                      const RuntimeShape& input2_shape, const T* input2_data,
-                      const RuntimeShape& output_shape, T* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastSubSlow/templated");
-  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  auto sub_func = [&](int indexes[N]) {
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        ActivationFunctionWithMinMax(
-            input1_data[SubscriptToIndex(desc1, indexes)] -
-                input2_data[SubscriptToIndex(desc2, indexes)],
-            params.quantized_activation_min, params.quantized_activation_max);
-  };
-  NDOpsHelper<N>(output_desc, sub_func);
-}
-
-// Element-wise Sub that can often be used for inner loop of broadcast sub as
-// well as the non-broadcast sub.
-inline void SubElementwise(int size, const ArithmeticParams& params,
-                           const uint8_t* input1_data,
-                           const uint8_t* input2_data, uint8_t* output_data) {
-  TFLITE_DCHECK_GT(params.input1_offset, -256);
-  TFLITE_DCHECK_GT(params.input2_offset, -256);
-  TFLITE_DCHECK_LT(params.input1_offset, 256);
-  TFLITE_DCHECK_LT(params.input2_offset, 256);
-
-  for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sub, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<uint8_t>(clamped_output);
-  }
-}
-
-// Element-wise add that can often be used for inner loop of broadcast add as
-// well as the non-broadcast add.
-inline void SubElementwise(int size, const ArithmeticParams& params,
-                           const int8_t* input1_data, const int8_t* input2_data,
-                           int8_t* output_data) {
-  const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
-  TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
-  TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
-  TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
-  TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
-
-  for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sub, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<int8_t>(clamped_output);
-  }
-}
-
-inline void Sub(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
-                const RuntimeShape& output_shape, uint8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  TFLITE_DCHECK_GT(params.input1_offset, -256);
-  TFLITE_DCHECK_GT(params.input2_offset, -256);
-  TFLITE_DCHECK_LT(params.input1_offset, 256);
-  TFLITE_DCHECK_LT(params.input2_offset, 256);
-  SubElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-inline void Sub(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const int8_t* input1_data,
-                const RuntimeShape& input2_shape, const int8_t* input2_data,
-                const RuntimeShape& output_shape, int8_t* output_data) {
-  TFLITE_DCHECK_LE(params.quantized_activation_min,
-                   params.quantized_activation_max);
-
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-
-  const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
-  TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
-  TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
-  TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
-  TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
-  SubElementwise(flat_size, params, input1_data, input2_data, output_data);
-}
-
-template <typename T>
-void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape,
-         const T* input1_data, const RuntimeShape& input2_shape,
-         const T* input2_data, const RuntimeShape& output_shape,
-         T* output_data) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              input1_data[SubscriptToIndex(desc1, b, y, x, c)] -
-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
-        }
-      }
-    }
-  }
-}
-
-inline void SetActivationMinMax(const ArithmeticParams& params,
-                                int32_t* activation_min,
-                                int32_t* activation_max) {
-  *activation_min = params.quantized_activation_min;
-  *activation_max = params.quantized_activation_max;
-}
-
-inline void SetActivationMinMax(const ArithmeticParams& params,
-                                float* activation_min, float* activation_max) {
-  *activation_min = params.float_activation_min;
-  *activation_max = params.float_activation_max;
-}
-
-inline void SetActivationMinMax(const ArithmeticParams& params,
-                                int64_t* activation_min,
-                                int64_t* activation_max) {
-  *activation_min = params.int64_activation_min;
-  *activation_max = params.int64_activation_max;
-}
-
-template <typename T>
-inline void SubWithActivation(
-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
-    const T* input1_data, const RuntimeShape& input2_shape,
-    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
-  ruy::profiler::ScopeLabel label("SubWithActivation");
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  T activation_min, activation_max;
-  SetActivationMinMax(params, &activation_min, &activation_max);
-
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = ActivationFunctionWithMinMax(
-        input1_data[i] - input2_data[i], activation_min, activation_max);
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/tanh.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/reference/tanh.h
@@ -1,129 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_
-
-#include <cmath>
-
-#include "fixedpoint/fixedpoint.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-#include "tensorflow/lite/kernels/op_macros.h"
-
-namespace tflite {
-namespace reference_ops {
-
-inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
-                 const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    float val = input_data[i];
-    float result = std::tanh(val);
-    output_data[i] = result;
-  }
-}
-
-// Convenience version that allows, for example, generated-code calls to be
-// uniform between data types.
-inline void Tanh(const TanhParams&, const RuntimeShape& input_shape,
-                 const float* input_data, const RuntimeShape& output_shape,
-                 float* output_data) {
-  // Drop params: not needed.
-  Tanh(input_shape, input_data, output_shape, output_data);
-}
-
-inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
-                 const int16_t* input_data, const RuntimeShape& output_shape,
-                 int16_t* output_data) {
-  const int input_left_shift = params.input_left_shift;
-  // Support for shifts is limited until we have a parameterized version of
-  // SaturatingRoundingMultiplyByPOT().
-  TFLITE_DCHECK_GE(input_left_shift, 0);
-  TFLITE_DCHECK_LE(input_left_shift, 1);
-
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  // F0 uses 0 integer bits, range [-1, 1].
-  // This is the return type of math functions such as tanh, logistic,
-  // whose range is in [-1, 1].
-  using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-  // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
-  using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
-
-  if (input_left_shift == 0) {
-    for (int i = 0; i < flat_size; i++) {
-      F3 input = F3::FromRaw(input_data[i]);
-      F0 output = gemmlowp::tanh(input);
-      output_data[i] = output.raw();
-    }
-  } else {
-    for (int i = 0; i < flat_size; i++) {
-      F3 input = F3::FromRaw(
-          gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i]));
-      F0 output = gemmlowp::tanh(input);
-      output_data[i] = output.raw();
-    }
-  }
-}
-
-inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
-                 const uint8_t* input_data, const RuntimeShape& output_shape,
-                 uint8_t* output_data) {
-  const int32_t input_zero_point = params.input_zero_point;
-  const int32_t input_range_radius = params.input_range_radius;
-  const int32_t input_multiplier = params.input_multiplier;
-  const int input_left_shift = params.input_left_shift;
-  const int32_t output_zero_point = 128;
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-
-  for (int i = 0; i < flat_size; i++) {
-    const uint8_t input_val_u8 = input_data[i];
-    const int32_t input_val_centered =
-        static_cast<int32_t>(input_val_u8) - input_zero_point;
-    uint8_t output_val;
-    if (input_val_centered <= -input_range_radius) {
-      output_val = 0;
-    } else if (input_val_centered >= input_range_radius) {
-      output_val = 255;
-    } else {
-      const int32_t input_val_rescaled =
-          MultiplyByQuantizedMultiplierGreaterThanOne(
-              input_val_centered, input_multiplier, input_left_shift);
-      using FixedPoint4 = gemmlowp::FixedPoint<int32_t, 4>;
-      using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
-      const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled);
-      const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4);
-      // Convert from Q0.31 to Q24.7.
-      using gemmlowp::RoundingDivideByPOT;
-      int32_t output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24);
-      output_val_s32 += output_zero_point;
-      if (output_val_s32 == 256) {
-        output_val_s32 = 255;
-      }
-      // Reinterpret as Q0.7, encoded in uint8_t.
-      TFLITE_DCHECK_GE(output_val_s32, 0);
-      TFLITE_DCHECK_LE(output_val_s32, 255);
-      output_val = static_cast<uint8_t>(output_val_s32);
-    }
-    output_data[i] = output_val;
-  }
-}
-
-}  // namespace reference_ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/strided_slice_logic.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/strided_slice_logic.h
@@ -1,204 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_
-
-#include <limits>
-#include <vector>
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace strided_slice {
-
-// Use until std::clamp() is available from C++17.
-inline int Clamp(const int v, const int lo, const int hi) {
-  TFLITE_DCHECK(!(hi < lo));
-  if (hi < v) return hi;
-  if (v < lo) return lo;
-  return v;
-}
-
-inline void StridedSlicePadIndices(tflite::StridedSliceParams* p,
-                                   int dim_count) {
-  // Add indices and mask bits to fully include extra dimensions
-  TFLITE_CHECK_LE(dim_count, 5);
-  TFLITE_CHECK_GE(dim_count, p->start_indices_count);
-  TFLITE_CHECK_EQ(p->start_indices_count, p->stop_indices_count);
-  TFLITE_CHECK_EQ(p->stop_indices_count, p->strides_count);
-
-  const int pad_count = dim_count - p->start_indices_count;
-
-  // Pad indices at start, so move arrays by pad_count.
-  for (int i = p->start_indices_count - 1; i >= 0; --i) {
-    p->strides[i + pad_count] = p->strides[i];
-    p->start_indices[i + pad_count] = p->start_indices[i];
-    p->stop_indices[i + pad_count] = p->stop_indices[i];
-  }
-  for (int i = 0; i < pad_count; ++i) {
-    p->start_indices[i] = 0;
-    p->stop_indices[i] = 1;
-    p->strides[i] = 1;
-  }
-
-  // Pad masks with 0s or 1s as required.
-  p->shrink_axis_mask <<= pad_count;
-  p->ellipsis_mask <<= pad_count;
-  p->new_axis_mask <<= pad_count;
-  p->begin_mask <<= pad_count;
-  p->end_mask <<= pad_count;
-  p->begin_mask |= (1 << pad_count) - 1;
-  p->end_mask |= (1 << pad_count) - 1;
-
-  p->start_indices_count = dim_count;
-  p->stop_indices_count = dim_count;
-  p->strides_count = dim_count;
-}
-
-// Return the index for the first element along that axis. This index will be a
-// positive integer between [0, axis_size - 1] that can be used to index
-// directly into the data.
-inline int StartForAxis(const tflite::StridedSliceParams& params,
-                        const RuntimeShape& input_shape, int axis) {
-  const auto begin_mask = params.begin_mask;
-  const auto* start_indices = params.start_indices;
-  const auto* strides = params.strides;
-  const int axis_size = input_shape.Dims(axis);
-  if (axis_size == 0) {
-    return 0;
-  }
-  // Begin with the specified index.
-  int start = start_indices[axis];
-
-  // begin_mask override
-  if (begin_mask & 1 << axis) {
-    if (strides[axis] > 0) {
-      // Forward iteration - use the first element. These values will get
-      // clamped below (Note: We could have set them to 0 and axis_size-1, but
-      // use lowest() and max() to maintain symmetry with StopForAxis())
-      start = std::numeric_limits<int>::lowest();
-    } else {
-      // Backward iteration - use the last element.
-      start = std::numeric_limits<int>::max();
-    }
-  }
-
-  // Handle negative indices
-  if (start < 0) {
-    start += axis_size;
-  }
-
-  // Clamping
-  start = Clamp(start, 0, axis_size - 1);
-
-  return start;
-}
-
-// Return the "real" index for the end of iteration along that axis. This is an
-// "end" in the traditional C sense, in that it points to one past the last
-// element. ie. So if you were iterating through all elements of a 1D array of
-// size 4, this function would return 4 as the stop, because it is one past the
-// "real" indices of 0, 1, 2 & 3.
-inline int StopForAxis(const tflite::StridedSliceParams& params,
-                       const RuntimeShape& input_shape, int axis,
-                       int start_for_axis) {
-  const auto end_mask = params.end_mask;
-  const auto shrink_axis_mask = params.shrink_axis_mask;
-  const auto* stop_indices = params.stop_indices;
-  const auto* strides = params.strides;
-  const int axis_size = input_shape.Dims(axis);
-  if (axis_size == 0) {
-    return 0;
-  }
-
-  // Begin with the specified index
-  const bool shrink_axis = shrink_axis_mask & (1 << axis);
-  int stop = stop_indices[axis];
-
-  // When shrinking an axis, the end position does not matter (and can be
-  // incorrect when negative indexing is used, see Issue #19260). Always use
-  // start_for_axis + 1 to generate a length 1 slice, since start_for_axis has
-  // already been adjusted for negative indices.
-  if (shrink_axis) {
-    stop = start_for_axis + 1;
-  }
-
-  // end_mask override
-  if (end_mask & (1 << axis)) {
-    if (strides[axis] > 0) {
-      // Forward iteration - use the last element. These values will get
-      // clamped below
-      stop = std::numeric_limits<int>::max();
-    } else {
-      // Backward iteration - use the first element.
-      stop = std::numeric_limits<int>::lowest();
-    }
-  }
-
-  // Handle negative indices
-  if (stop < 0) {
-    stop += axis_size;
-  }
-
-  // Clamping
-  // Because the end index points one past the last element, we need slightly
-  // different clamping ranges depending on the direction.
-  if (strides[axis] > 0) {
-    // Forward iteration
-    stop = Clamp(stop, 0, axis_size);
-  } else {
-    // Backward iteration
-    stop = Clamp(stop, -1, axis_size - 1);
-  }
-
-  return stop;
-}
-
-inline bool LoopCondition(int index, int stop, int stride) {
-  // True when we have reached the end of an axis and should loop.
-  return stride > 0 ? index >= stop : index <= stop;
-}
-
-inline tflite::StridedSliceParams BuildStridedSliceParams(
-    int begin_mask, int end_mask, int shrink_axis_mask,
-    const std::vector<int>& start_indices, const std::vector<int>& stop_indices,
-    const std::vector<int>& strides) {
-  tflite::StridedSliceParams op_params;
-  const int dims_count = start_indices.size();
-
-  op_params.start_indices_count = dims_count;
-  op_params.stop_indices_count = dims_count;
-  op_params.strides_count = dims_count;
-  for (int i = 0; i < dims_count; ++i) {
-    op_params.start_indices[i] = start_indices[i];
-    op_params.stop_indices[i] = stop_indices[i];
-    op_params.strides[i] = strides[i];
-  }
-
-  op_params.begin_mask = begin_mask;
-  op_params.ellipsis_mask = 0;
-  op_params.end_mask = end_mask;
-  op_params.new_axis_mask = 0;
-  op_params.shrink_axis_mask = shrink_axis_mask;
-
-  return op_params;
-}
-
-}  // namespace strided_slice
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/tensor.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/tensor.h
@@ -1,147 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_H_
-
-#include <complex>
-#include <vector>
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-#include "tensorflow/lite/string_util.h"
-
-namespace tflite {
-
-inline RuntimeShape GetTensorShape(std::vector<int32_t> data) {
-  return RuntimeShape(data.size(), data.data());
-}
-
-// A list of tensors in a format that can be used by kernels like split and
-// concatenation.
-template <typename T>
-class VectorOfTensors {
- public:
-  // Build with the tensors in 'tensor_list'.
-  VectorOfTensors(const TfLiteContext& context,
-                  const TfLiteIntArray& tensor_list) {
-    int num_tensors = tensor_list.size;
-
-    all_data_.reserve(num_tensors);
-    all_shape_.reserve(num_tensors);
-    all_shape_ptr_.reserve(num_tensors);
-
-    for (int i = 0; i < num_tensors; ++i) {
-      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
-      all_data_.push_back(GetTensorData<T>(t));
-      all_shape_.push_back(GetTensorShape(t));
-    }
-
-    // Taking the pointer from inside a std::vector is only OK if the vector is
-    // never modified, so we populate all_shape in the previous loop and then we
-    // are free to grab iterators here.
-    for (int i = 0; i < num_tensors; ++i) {
-      all_shape_ptr_.push_back(&all_shape_[i]);
-    }
-  }
-  // Return a pointer to the data pointers of all tensors in the list. For
-  // example:
-  //   float* const* f = v.data();
-  //   f[0][1] is the second element of the first tensor.
-  T* const* data() const { return all_data_.data(); }
-
-  // Return a pointer the shape pointers of all tensors in the list. For
-  // example:
-  //   const RuntimeShape* const* d = v.dims();
-  //   dims[1] are the dimensions of the second tensor in the list.
-  const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
-
- private:
-  std::vector<T*> all_data_;
-  std::vector<RuntimeShape> all_shape_;
-  std::vector<RuntimeShape*> all_shape_ptr_;
-};
-
-// A list of quantized tensors in a format that can be used by kernels like
-// split and concatenation.
-class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t> {
- public:
-  // Build with the tensors in 'tensor_list'.
-  VectorOfQuantizedTensors(const TfLiteContext& context,
-                           const TfLiteIntArray& tensor_list)
-      : VectorOfTensors<uint8_t>(context, tensor_list) {
-    for (int i = 0; i < tensor_list.size; ++i) {
-      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
-      zero_point_.push_back(t->params.zero_point);
-      scale_.push_back(t->params.scale);
-    }
-  }
-
-  const float* scale() const { return scale_.data(); }
-  const int32_t* zero_point() const { return zero_point_.data(); }
-
- private:
-  std::vector<int32_t> zero_point_;
-  std::vector<float> scale_;
-};
-
-// Writes randomly accessed values from `input` sequentially into `output`.
-template <typename T>
-class SequentialTensorWriter {
- public:
-  SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) {
-    input_data_ = GetTensorData<T>(input);
-    output_ptr_ = GetTensorData<T>(output);
-  }
-  SequentialTensorWriter(const T* input_data, T* output_data)
-      : input_data_(input_data), output_ptr_(output_data) {}
-
-  void Write(int position) { *output_ptr_++ = input_data_[position]; }
-  void WriteN(int position, int len) {
-    memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
-    output_ptr_ += len;
-  }
-
- private:
-  const T* input_data_;
-  T* output_ptr_;
-};
-
-// String ops are not yet supported on platforms w/ static memory.
-#ifndef TF_LITE_STATIC_MEMORY
-template <>
-class SequentialTensorWriter<string> {
- public:
-  SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output)
-      : input_(input), output_(output) {}
-  ~SequentialTensorWriter() { buffer_.WriteToTensor(output_, nullptr); }
-
-  void Write(int position) { this->WriteN(position, 1); }
-  void WriteN(int position, int len) {
-    for (int i = 0; i < len; i++) {
-      buffer_.AddString(GetString(input_, position + i));
-    }
-  }
-
- private:
-  const TfLiteTensor* input_;
-  TfLiteTensor* output_;
-  DynamicBuffer buffer_;
-};
-#endif  // TF_LITE_STATIC_MEMORY
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/tensor_ctypes.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/tensor_ctypes.h
@@ -1,47 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-
-template <typename T>
-inline T* GetTensorData(TfLiteTensor* tensor) {
-  return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
-}
-
-template <typename T>
-inline const T* GetTensorData(const TfLiteTensor* tensor) {
-  return tensor != nullptr ? reinterpret_cast<const T*>(tensor->data.raw)
-                           : nullptr;
-}
-
-inline RuntimeShape GetTensorShape(const TfLiteTensor* tensor) {
-  if (tensor == nullptr) {
-    return RuntimeShape();
-  }
-
-  TfLiteIntArray* dims = tensor->dims;
-  const int dims_size = dims->size;
-  const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
-  return RuntimeShape(dims_size, dims_data);
-}
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/types.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/internal/types.h
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/kernel_util.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/kernel_util.h
@@ -1,196 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
-#define TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
-
-#include <stdint.h>
-
-#include <limits>
-
-#include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/c/common.h"
-
-namespace tflite {
-
-// A fair number of functions in this header have historically been inline.
-// It is ok to change functions to not be inline if the latency with
-// benchmark_model for MobileNet + MobileBERT is unaffected. If such a change is
-// made, move the newly non-inlined function declarations to the top of this
-// header file.
-const TfLiteTensor* GetInput(const TfLiteContext* context,
-                             const TfLiteNode* node, int index);
-
-// Note: You must check if result is not null:
-// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
-// TF_LITE_ENSURE(context, my_tensor != nullptr);
-TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
-                               int index);
-
-TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
-                        int index);
-
-const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
-                                           const TfLiteNode* node, int index);
-
-inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; }
-inline int SizeOfDimension(const TfLiteTensor* t, int dim) {
-  return t->dims->data[dim];
-}
-
-#ifndef TF_LITE_STATIC_MEMORY
-inline TfLiteTensor* GetTemporary(TfLiteContext* context,
-                                  const TfLiteNode* node, int index) {
-  return &context->tensors[node->temporaries->data[index]];
-}
-inline const TfLiteTensor* GetIntermediates(TfLiteContext* context,
-                                            const TfLiteNode* node, int index) {
-  return &context->tensors[node->intermediates->data[index]];
-}
-inline int NumIntermediates(const TfLiteNode* node) {
-  return node->intermediates->size;
-}
-#endif  // TF_LITE_STATIC_MEMORY
-inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; }
-inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; }
-
-inline int64_t NumElements(const TfLiteIntArray* dims) {
-  int64_t count = 1;
-  for (int i = 0; i < dims->size; ++i) {
-    count *= dims->data[i];
-  }
-  return count;
-}
-
-inline int64_t NumElements(const TfLiteTensor* t) {
-  return NumElements(t->dims);
-}
-
-// Determines whether tensor is constant.
-// TODO(b/138199592): Introduce new query which checks for constant OR
-// persistent-read-only, which would be useful for most tensor kernels that
-// are potentially dynamic based on the input tensor value availability at the
-// time of prepare.
-inline bool IsConstantTensor(const TfLiteTensor* tensor) {
-  return tensor->allocation_type == kTfLiteMmapRo;
-}
-
-// Determines whether tensor is dynamic. Note that a tensor can be non-const and
-// not dynamic. This function specifically checks for a dynamic tensor.
-inline bool IsDynamicTensor(const TfLiteTensor* tensor) {
-  return tensor->allocation_type == kTfLiteDynamic;
-}
-
-// Sets tensor to dynamic.
-inline void SetTensorToDynamic(TfLiteTensor* tensor) {
-  if (tensor->allocation_type != kTfLiteDynamic) {
-    tensor->allocation_type = kTfLiteDynamic;
-    tensor->data.raw = nullptr;
-  }
-}
-
-// Sets tensor to persistent and read-only.
-inline void SetTensorToPersistentRo(TfLiteTensor* tensor) {
-  if (tensor->allocation_type != kTfLitePersistentRo) {
-    tensor->allocation_type = kTfLitePersistentRo;
-    tensor->data.raw = nullptr;
-  }
-}
-
-// Determines whether it is a hybrid op - one that has float inputs and
-// quantized weights.
-inline bool IsHybridOp(const TfLiteTensor* input, const TfLiteTensor* weight) {
-  return ((weight->type == kTfLiteUInt8 || weight->type == kTfLiteInt8) &&
-          input->type == kTfLiteFloat32);
-}
-
-// Check dimensionality match and populate OpData for Conv and DepthwiseConv.
-TfLiteStatus PopulateConvolutionQuantizationParams(
-    TfLiteContext* context, const TfLiteTensor* input,
-    const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
-    const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
-    int32_t* output_activation_min, int32_t* output_activation_max,
-    int32_t* per_channel_multiplier, int* per_channel_shift);
-
-TfLiteStatus PopulateConvolutionQuantizationParams(
-    TfLiteContext* context, const TfLiteTensor* input,
-    const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
-    const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
-    int32_t* output_activation_min, int32_t* output_activation_max,
-    int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels);
-
-// Calculates the multiplication factor for a quantized convolution (or
-// quantized depthwise convolution) involving the given tensors. Returns an
-// error if the scales of the tensors are not compatible.
-TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
-                                              const TfLiteTensor* input,
-                                              const TfLiteTensor* filter,
-                                              const TfLiteTensor* bias,
-                                              TfLiteTensor* output,
-                                              double* multiplier);
-
-TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
-                                              const TfLiteTensor* input,
-                                              const TfLiteTensor* filter,
-                                              TfLiteTensor* output,
-                                              double* multiplier);
-
-// Calculates the useful quantized range of an activation layer given its
-// activation tensor.
-TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
-                                               TfLiteFusedActivation activation,
-                                               TfLiteTensor* output,
-                                               int32_t* act_min,
-                                               int32_t* act_max);
-
-// Calculates the useful range of an activation layer given its activation
-// tensor.a
-template <typename T>
-void CalculateActivationRange(TfLiteFusedActivation activation,
-                              T* activation_min, T* activation_max) {
-  if (activation == kTfLiteActRelu) {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  } else if (activation == kTfLiteActRelu6) {
-    *activation_min = 0;
-    *activation_max = 6;
-  } else if (activation == kTfLiteActReluN1To1) {
-    *activation_min = -1;
-    *activation_max = 1;
-  } else {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-}
-
-// Return true if the given tensors have the same shape.
-bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2);
-
-// Calculates the output_shape that is necessary for element-wise operations
-// with broadcasting involving the two input tensors.
-TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
-                                        const TfLiteTensor* input1,
-                                        const TfLiteTensor* input2,
-                                        TfLiteIntArray** output_shape);
-
-// Calculates the output_shape that is necessary for element-wise operations
-// with broadcasting involving the three input tensors.
-TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
-                                        const TfLiteTensor* input1,
-                                        const TfLiteTensor* input2,
-                                        const TfLiteTensor* input3,
-                                        TfLiteIntArray** output_shape);
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/op_macros.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/op_macros.h
@@ -1,83 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
-#define TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
-
-// If we're on a platform without standard IO functions, fall back to a
-// non-portable function.
-#ifdef TF_LITE_MCU_DEBUG_LOG
-
-#include "tensorflow/lite/micro/debug_log.h"
-
-#define DEBUG_LOG(x) \
-  do {               \
-    DebugLog(x);     \
-  } while (0)
-
-inline void InfiniteLoop() {
-  DEBUG_LOG("HALTED\n");
-  while (1) {
-  }
-}
-
-#define TFLITE_ABORT InfiniteLoop();
-
-#else  // TF_LITE_MCU_DEBUG_LOG
-
-#include <stdio.h>
-#include <cstdlib>
-
-#define DEBUG_LOG(x)            \
-  do {                          \
-    printf("%s", (x)); \
-  } while (0)
-
-// Report Error for unsupported type by op 'op_name' and returns kTfLiteError.
-#define TF_LITE_UNSUPPORTED_TYPE(context, type, op_name)                    \
-  do {                                                                      \
-    TF_LITE_KERNEL_LOG((context), "%s:%d Type %s is unsupported by op %s.", \
-                       __FILE__, __LINE__, TfLiteTypeGetName(type),         \
-                       (op_name));                                          \
-    return kTfLiteError;                                                    \
-  } while (0)
-
-#define TFLITE_ABORT abort()
-
-#endif  // TF_LITE_MCU_DEBUG_LOG
-
-#ifdef NDEBUG
-#define TFLITE_ASSERT_FALSE (static_cast<void>(0))
-#else
-#define TFLITE_ASSERT_FALSE TFLITE_ABORT
-#endif
-
-#define TF_LITE_FATAL(msg)  \
-  do {                      \
-    DEBUG_LOG(msg);         \
-    DEBUG_LOG("\nFATAL\n"); \
-    TFLITE_ABORT;           \
-  } while (0)
-
-#define TF_LITE_ASSERT(x)        \
-  do {                           \
-    if (!(x)) TF_LITE_FATAL(#x); \
-  } while (0)
-
-#define TF_LITE_ASSERT_EQ(x, y)                            \
-  do {                                                     \
-    if ((x) != (y)) TF_LITE_FATAL(#x " didn't equal " #y); \
-  } while (0)
-
-#endif  // TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/padding.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/kernels/padding.h
@@ -1,80 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_PADDING_H_
-#define TENSORFLOW_LITE_KERNELS_PADDING_H_
-
-#include "tensorflow/lite/c/builtin_op_data.h"
-
-namespace tflite {
-
-// TODO(renjieliu): Migrate others to use ComputePaddingWithLeftover.
-inline int ComputePadding(int stride, int dilation_rate, int in_size,
-                          int filter_size, int out_size) {
-  int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
-  int padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
-  return padding > 0 ? padding : 0;
-}
-
-// It's not guaranteed that padding is symmetric. It's important to keep
-// offset for algorithms need all paddings.
-inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
-                                    int filter_size, int out_size,
-                                    int* offset) {
-  int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
-  int total_padding =
-      ((out_size - 1) * stride + effective_filter_size - in_size);
-  total_padding = total_padding > 0 ? total_padding : 0;
-  *offset = total_padding % 2;
-  return total_padding / 2;
-}
-
-// Matching GetWindowedOutputSize in TensorFlow.
-inline int ComputeOutSize(TfLitePadding padding, int image_size,
-                          int filter_size, int stride, int dilation_rate = 1) {
-  int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
-  switch (padding) {
-    case kTfLitePaddingSame:
-      return (image_size + stride - 1) / stride;
-    case kTfLitePaddingValid:
-      return (image_size + stride - effective_filter_size) / stride;
-    default:
-      return 0;
-  }
-}
-
-inline TfLitePaddingValues ComputePaddingHeightWidth(
-    int stride_height, int stride_width, int dilation_rate_height,
-    int dilation_rate_width, int in_height, int in_width, int filter_height,
-    int filter_width, TfLitePadding padding, int* out_height, int* out_width) {
-  *out_width = ComputeOutSize(padding, in_width, filter_width, stride_width,
-                              dilation_rate_width);
-  *out_height = ComputeOutSize(padding, in_height, filter_height, stride_height,
-                               dilation_rate_height);
-
-  TfLitePaddingValues padding_values;
-  int offset = 0;
-  padding_values.height =
-      ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height,
-                               filter_height, *out_height, &offset);
-  padding_values.height_offset = offset;
-  padding_values.width =
-      ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width,
-                               filter_width, *out_width, &offset);
-  padding_values.width_offset = offset;
-  return padding_values;
-}
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_KERNELS_PADDING_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/all_ops_resolver.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/all_ops_resolver.h
@@ -1,35 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
-#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
-
-#include "tensorflow/lite/micro/compatibility.h"
-#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
-
-namespace tflite {
-
-// The magic number in the template parameter is the maximum number of ops that
-// can be added to AllOpsResolver. It can be increased if needed. And most
-// applications that care about the memory footprint will want to directly use
-// MicroMutableOpResolver and have an application specific template parameter.
-// The examples directory has sample code for this.
-class AllOpsResolver : public MicroMutableOpResolver<128> {
- public:
-  AllOpsResolver();
-
- private:
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.h
@@ -1,22 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
-#define TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
-
-extern const unsigned char g_keyword_scrambled_model_data[];
-extern const unsigned int g_keyword_scrambled_model_data_length;
-
-#endif  // TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/compatibility.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/compatibility.h
@@ -1,32 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
-#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
-
-// C++ will automatically create class-specific delete operators for virtual
-// objects, which by default call the global delete function. For embedded
-// applications we want to avoid this, and won't be calling new/delete on these
-// objects, so we need to override the default implementation with one that does
-// nothing to avoid linking in ::delete().
-// This macro needs to be included in all subclasses of a virtual base class in
-// the private section.
-#ifdef TF_LITE_STATIC_MEMORY
-#define TF_LITE_REMOVE_VIRTUAL_DELETE \
-  void operator delete(void* p) {}
-#else
-#define TF_LITE_REMOVE_VIRTUAL_DELETE
-#endif
-
-#endif  // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/debug_log.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/debug_log.h
@@ -1,23 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
-#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
-
-// This function should be implemented by each target platform, and provide a
-// way for strings to be output to some text stream. For more information, see
-// tensorflow/lite/micro/debug_log.cc.
-extern "C" void DebugLog(const char* s);
-
-#endif  // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/detection_responder.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/detection_responder.h
@@ -1,34 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Provides an interface to take an action based on the output from the person
-// detection model.
-
-#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
-#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/micro/micro_error_reporter.h"
-
-// Called every time the results of a person detection run are available. The
-// `person_score` has the numerical confidence that the captured image contains
-// a person, and `no_person_score` has the numerical confidence that the image
-// does not contain a person. Typically if person_score > no person score, the
-// image is considered to contain a person.  This threshold may be adjusted for
-// particular applications.
-void RespondToDetection(tflite::ErrorReporter* error_reporter,
-                        int8_t person_score, int8_t no_person_score);
-
-#endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_DETECTION_RESPONDER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/image_provider.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/image_provider.h
@@ -1,40 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
-#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/micro/micro_error_reporter.h"
-
-// This is an abstraction around an image source like a camera, and is
-// expected to return 8-bit sample data.  The assumption is that this will be
-// called in a low duty-cycle fashion in a low-power application.  In these
-// cases, the imaging sensor need not be run in a streaming mode, but rather can
-// be idled in a relatively low-power mode between calls to GetImage().  The
-// assumption is that the overhead and time of bringing the low-power sensor out
-// of this standby mode is commensurate with the expected duty cycle of the
-// application.  The underlying sensor may actually be put into a streaming
-// configuration, but the image buffer provided to GetImage should not be
-// overwritten by the driver code until the next call to GetImage();
-//
-// The reference implementation can have no platform-specific dependencies, so
-// it just returns a static image. For real applications, you should
-// ensure there's a specialized implementation that accesses hardware APIs.
-TfLiteStatus GetImage(tflite::ErrorReporter* error_reporter, int image_width,
-                      int image_height, int channels, int8_t* image_data, 
-					  uint8_t * hardware_input);
-
-#endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_IMAGE_PROVIDER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.h
@@ -1,30 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
-#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
-
-#include "tensorflow/lite/c/common.h"
-
-// Initializes all data needed for the example. The name is important, and needs
-// to be setup() for Arduino compatibility.
-extern "C" void person_detect_init();
-
-// Runs one iteration of data gathering and inference. This should be called
-// repeatedly from the application code. The name needs to be loop() for Arduino
-// compatibility.
-extern "C" int person_detect(uint8_t * hardware_input);
-
-#endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MAIN_FUNCTIONS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/model_settings.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/model_settings.h
@@ -1,35 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
-#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
-
-// Keeping these as constant expressions allow us to allocate fixed-sized arrays
-// on the stack for our working memory.
-
-// All of these values are derived from the values used during model training,
-// if you change your model you'll need to update these constants.
-constexpr int kNumCols = 96;
-constexpr int kNumRows = 96;
-constexpr int kNumChannels = 1;
-
-constexpr int kMaxImageSize = kNumCols * kNumRows * kNumChannels;
-
-constexpr int kCategoryCount = 2;
-constexpr int kPersonIndex = 1;
-constexpr int kNotAPersonIndex = 0;
-extern const char* kCategoryLabels[kCategoryCount];
-
-#endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_MODEL_SETTINGS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/person_detect_model_data.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/examples/person_detection_experimental/person_detect_model_data.h
@@ -1,27 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// This is a standard TensorFlow Lite model file that has been converted into a
-// C data array, so it can be easily compiled into a binary for devices that
-// don't have a file system. It was created using the command:
-// xxd -i person_detect.tflite > person_detect_model_data.cc
-
-#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
-#define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
-
-extern const unsigned char g_person_detect_model_data[];
-extern const int g_person_detect_model_data_len;
-
-#endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_EXPERIMENTAL_PERSON_DETECT_MODEL_DATA_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/activation_utils.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/activation_utils.h
@@ -1,57 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
-
-#include <algorithm>
-#include <cmath>
-
-#include "tensorflow/lite/c/builtin_op_data.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/max.h"
-#include "tensorflow/lite/kernels/internal/min.h"
-
-namespace tflite {
-namespace ops {
-namespace micro {
-
-// Returns the floating point value for a fused activation:
-inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
-  switch (act) {
-    case kTfLiteActNone:
-      return a;
-    case kTfLiteActRelu:
-      return TfLiteMax(0.0f, a);
-    case kTfLiteActReluN1To1:
-      return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
-    case kTfLiteActRelu6:
-      return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
-    case kTfLiteActTanh:
-      return std::tanh(a);
-    case kTfLiteActSignBit:
-      return std::signbit(a);
-    case kTfLiteActSigmoid:
-      return 1.0f / (1.0f + std::exp(-a));
-  }
-  return 0.0f;  // To indicate an unsupported activation (i.e. when a new fused
-                // activation is added to the enum and not handled here).
-}
-
-}  // namespace micro
-}  // namespace ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/kernel_runner.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/kernel_runner.h
@@ -1,83 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/micro/simple_memory_allocator.h"
-
-namespace tflite {
-namespace micro {
-
-// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
-// (init, prepare, invoke). All internal allocations are handled by this class.
-// Simply pass in the registration, list of required tensors, inputs array,
-// outputs array, and any pre-builtin data. Calling Invoke() will automatically
-// walk the kernl and outputs will be ready on the the TfLiteTensor output
-// provided during construction.
-class KernelRunner {
- public:
-  KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
-               int tensors_size, TfLiteIntArray* inputs,
-               TfLiteIntArray* outputs, void* builtin_data,
-               ErrorReporter* error_reporter);
-
-  // Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
-  // exceptions will be reported through the error_reporter and returned as a
-  // status code here.
-  TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
-
-  // Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
-  // After successful invoke, results will be available in the output tensor as
-  // passed into the constructor of this class.
-  TfLiteStatus Invoke();
-
- protected:
-  static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
-                                 int tensor_index);
-  static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
-                                         int tensor_index);
-  static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
-  static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
-                                                  size_t bytes,
-                                                  int* buffer_index);
-  static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
-  static void ReportOpError(struct TfLiteContext* context, const char* format,
-                            ...);
-
- private:
-  static constexpr int kNumScratchBuffers_ = 5;
-
-  static constexpr int kKernelRunnerBufferSize_ = 10000;
-  static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
-
-  SimpleMemoryAllocator* allocator_ = nullptr;
-  const TfLiteRegistration& registration_;
-  TfLiteTensor* tensors_ = nullptr;
-  ErrorReporter* error_reporter_ = nullptr;
-
-  TfLiteContext context_ = {};
-  TfLiteNode node_ = {};
-
-  int scratch_buffer_count_ = 0;
-  uint8_t* scratch_buffers_[kNumScratchBuffers_];
-};
-
-}  // namespace micro
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/kernel_util.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/kernel_util.h
@@ -1,83 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
-
-#include <cstdint>
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/types.h"
-
-namespace tflite {
-namespace micro {
-
-// Returns a mutable tensor for a given input index. is_variable must be checked
-// during prepare when the full TfLiteTensor is available.
-inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
-                                             const TfLiteNode* node,
-                                             int index) {
-  TFLITE_DCHECK(context != nullptr);
-  TFLITE_DCHECK(node != nullptr);
-  return context->GetEvalTensor(context, node->inputs->data[index]);
-}
-
-// Returns the TfLiteEvalTensor struct for a given input index in a node.
-inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
-                                            const TfLiteNode* node, int index) {
-  return GetMutableEvalInput(context, node, index);
-}
-
-// Returns the TfLiteEvalTensor struct for a given output index in a node.
-inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
-                                       const TfLiteNode* node, int index) {
-  TFLITE_DCHECK(context != nullptr);
-  TFLITE_DCHECK(node != nullptr);
-  return context->GetEvalTensor(context, node->outputs->data[index]);
-}
-
-// Returns data for a TfLiteEvalTensor struct.
-template <typename T>
-T* GetTensorData(TfLiteEvalTensor* tensor) {
-  return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
-}
-
-// Returns const data for a TfLiteEvalTensor struct.
-template <typename T>
-const T* GetTensorData(const TfLiteEvalTensor* tensor) {
-  TFLITE_DCHECK(tensor != nullptr);
-  return reinterpret_cast<const T*>(tensor->data.raw);
-}
-
-// Returns the shape of a TfLiteEvalTensor struct.
-inline const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
-  if (tensor == nullptr) {
-    return RuntimeShape();
-  }
-  TfLiteIntArray* dims = tensor->dims;
-  const int dims_size = dims->size;
-  const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
-  return RuntimeShape(dims_size, dims_data);
-}
-
-// Return true if the given tensors have the same shape.
-bool HaveSameShapes(const TfLiteEvalTensor* input1,
-                    const TfLiteEvalTensor* input2);
-
-}  // namespace micro
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/micro_ops.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/micro_ops.h
@@ -1,92 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
-
-#include "tensorflow/lite/c/common.h"
-
-namespace tflite {
-namespace ops {
-namespace micro {
-
-// Forward declaration of all micro op kernel registration methods. These
-// registrations are included with the standard `BuiltinOpResolver`.
-//
-// This header is particularly useful in cases where only a subset of ops are
-// needed. In such cases, the client can selectively add only the registrations
-// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
-// registration in turn allows the linker to strip unused kernels.
-
-TfLiteRegistration Register_ABS();
-TfLiteRegistration Register_ADD();
-TfLiteRegistration Register_ARG_MAX();
-TfLiteRegistration Register_ARG_MIN();
-TfLiteRegistration Register_AVERAGE_POOL_2D();
-TfLiteRegistration Register_CEIL();
-// TODO(b/160234179): Change custom OPs to also return by value.
-TfLiteRegistration* Register_CIRCULAR_BUFFER();
-TfLiteRegistration Register_CONV_2D();
-TfLiteRegistration Register_CONCATENATION();
-TfLiteRegistration Register_COS();
-TfLiteRegistration Register_DEPTHWISE_CONV_2D();
-TfLiteRegistration Register_DEQUANTIZE();
-TfLiteRegistration Register_EQUAL();
-TfLiteRegistration Register_FLOOR();
-TfLiteRegistration Register_FULLY_CONNECTED();
-TfLiteRegistration Register_GREATER();
-TfLiteRegistration Register_GREATER_EQUAL();
-TfLiteRegistration Register_HARD_SWISH();
-TfLiteRegistration Register_LESS();
-TfLiteRegistration Register_LESS_EQUAL();
-TfLiteRegistration Register_LOG();
-TfLiteRegistration Register_LOGICAL_AND();
-TfLiteRegistration Register_LOGICAL_NOT();
-TfLiteRegistration Register_LOGICAL_OR();
-TfLiteRegistration Register_LOGISTIC();
-TfLiteRegistration Register_MAXIMUM();
-TfLiteRegistration Register_MAX_POOL_2D();
-TfLiteRegistration Register_MEAN();
-TfLiteRegistration Register_MINIMUM();
-TfLiteRegistration Register_MUL();
-TfLiteRegistration Register_NEG();
-TfLiteRegistration Register_NOT_EQUAL();
-TfLiteRegistration Register_PACK();
-TfLiteRegistration Register_PAD();
-TfLiteRegistration Register_PADV2();
-TfLiteRegistration Register_PRELU();
-TfLiteRegistration Register_QUANTIZE();
-TfLiteRegistration Register_RELU();
-TfLiteRegistration Register_RELU6();
-TfLiteRegistration Register_RESHAPE();
-TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
-TfLiteRegistration Register_ROUND();
-TfLiteRegistration Register_RSQRT();
-TfLiteRegistration Register_SIN();
-TfLiteRegistration Register_SOFTMAX();
-TfLiteRegistration Register_SPLIT();
-TfLiteRegistration Register_SQRT();
-TfLiteRegistration Register_SQUARE();
-TfLiteRegistration Register_STRIDED_SLICE();
-TfLiteRegistration Register_SUB();
-TfLiteRegistration Register_SVDF();
-TfLiteRegistration Register_UNPACK();
-TfLiteRegistration Register_L2_NORMALIZATION();
-TfLiteRegistration Register_TANH();
-
-}  // namespace micro
-}  // namespace ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/micro_utils.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/kernels/micro_utils.h
@@ -1,37 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
-namespace tflite {
-namespace ops {
-namespace micro {
-
-// Same as gtl::Greater but defined here to reduce dependencies and
-// binary size for micro environment.
-struct Greater {
-  template <typename T>
-  bool operator()(const T& x, const T& y) const {
-    return x > y;
-  }
-};
-
-struct Less {
-  template <typename T>
-  bool operator()(const T& x, const T& y) const {
-    return x < y;
-  }
-};
-
-}  // namespace micro
-}  // namespace ops
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/memory_helpers.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/memory_helpers.h
@@ -1,59 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
-#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
-
-#include <cstddef>
-#include <cstdint>
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-
-// Returns the next pointer address aligned to the given alignment.
-uint8_t* AlignPointerUp(uint8_t* data, size_t alignment);
-
-// Returns the previous pointer address aligned to the given alignment.
-uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
-
-// Returns an increased size that's a multiple of alignment.
-size_t AlignSizeUp(size_t size, size_t alignment);
-
-// Returns size in bytes for a given TfLiteType.
-TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
-
-// How many bytes are needed to hold a tensor's contents.
-TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
-                                    size_t* bytes, size_t* type_size,
-                                    ErrorReporter* error_reporter);
-
-// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
-// returned in out_bytes.
-TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
-                                        size_t* out_bytes);
-
-// Deduce output dimensions from input and allocate given size.
-// Useful for operators with two inputs where the largest input should equal the
-// output dimension.
-TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
-                                               const TfLiteTensor* input1,
-                                               const TfLiteTensor* input2,
-                                               TfLiteTensor* output);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
@@ -1,163 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
-#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
-
-#include "tensorflow/lite/micro/compatibility.h"
-#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
-
-namespace tflite {
-
-constexpr int kOnlinePlannedBuffer = -1;
-
-// A memory planner that uses a greedy algorithm to arrange buffers in memory
-// to minimize the overall arena size needed.
-//
-// The algorithm works like this:
-//  - The client enters the buffer information through AddBuffer().
-//  - When a function like GetOffsetForBuffer() is called, the
-//    CalculateOffsetsIfNeeded() method is invoked.
-//  - If an up to date plan is not already present, one will be calculated.
-//  - The buffers are sorted in descending order of size.
-//  - The largest buffer is placed at offset zero.
-//  - The rest of the buffers are looped through in descending size order.
-//  - The other buffers that need to be in memory at the same time are found.
-//  - The first gap between simultaneously active buffers that the current
-//    buffer fits into will be used.
-//  - If no large-enough gap is found, the current buffer is placed after the
-//    last buffer that's simultaneously active.
-//  - This continues until all buffers are placed, and the offsets stored.
-//
-// This is not guaranteed to produce the best placement, since that's an
-// NP-Complete problem, but in practice it should produce one that's decent.
-class GreedyMemoryPlanner : public MemoryPlanner {
- public:
-  // You need to pass in an area of memory to be used for planning. This memory
-  // needs to have a lifetime as long as the planner, but isn't owned by this
-  // object, so management should be handled by the client. This is so it can be
-  // stack or globally allocated if necessary on devices without dynamic memory
-  // allocation. How many buffers can be planned for will depend on the size of
-  // this scratch memory, so you should enlarge it if you see an error when
-  // calling AddBuffer(). The memory can be reused once you're done with the
-  // planner, as long as you copy the calculated offsets to another location.
-  // Each buffer requires about 36 bytes of scratch.
-  GreedyMemoryPlanner(unsigned char* scratch_buffer, int scratch_buffer_size);
-  ~GreedyMemoryPlanner() override;
-
-  // Record details of a buffer we want to place.
-  TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
-                         int first_time_used, int last_time_used) override;
-
-  // Record details of an offline planned buffer offset we want to place.
-  // offline_offset is the buffer offset from the start of the arena.
-  TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
-                         int first_time_used, int last_time_used,
-                         int offline_offset);
-
-  // Returns the high-water mark of used memory. This is the minimum size of a
-  // memory arena you'd need to allocate to hold these buffers.
-  size_t GetMaximumMemorySize() override;
-
-  // How many buffers have been recorded.
-  int GetBufferCount() override;
-
-  // Where a given buffer should be placed in the memory arena.
-  // This information is stored in the memory arena itself, so once the arena
-  // is used for inference, it will be overwritten.
-  TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter,
-                                  int buffer_index, int* offset) override;
-
-  // Prints an ascii-art diagram of the buffer layout plan.
-  void PrintMemoryPlan(ErrorReporter* error_reporter);
-
-  // Debug method to check whether any buffer allocations are overlapping. This
-  // is an O(N^2) complexity operation, so only use for testing.
-  bool DoAnyBuffersOverlap(ErrorReporter* error_reporter);
-
-  // Used to store a list of buffers ordered by their offset.
-  struct ListEntry {
-    int offset;
-    int requirements_index;
-    int next_entry_index;
-  };
-
-  // Number of bytes required in order to plan a buffer.
-  static size_t per_buffer_size() {
-    const int per_buffer_size =
-        sizeof(BufferRequirements) +  // requirements_
-        sizeof(int) +                 // buffer_sizes_sorted_
-        sizeof(int) +                 // buffer_ids_sorted_
-        sizeof(ListEntry) +           // buffers_sorted_by_offset_
-        sizeof(int);                  // buffer_offsets_;
-    return per_buffer_size;
-  }
-
- private:
-  // Whether a buffer is active in a given time range.
-  bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
-                              const int last_time_used) const;
-
-  // Walks the list to return the next buffer that is active in a given time
-  // range, or a null pointer if there are none.
-  ListEntry* NextSimultaneouslyActiveBuffer(const ListEntry* start,
-                                            const int first_time_used,
-                                            const int last_time_used);
-
-  // If there isn't an up to date plan, calculate a new one.
-  void CalculateOffsetsIfNeeded();
-
-  // How many buffers we can plan for, based on the arena size we're given in
-  // the constructor.
-  int max_buffer_count_;
-
-  // The number of buffers added so far.
-  int buffer_count_;
-
-  // Records the client-provided information about each buffer.
-  struct BufferRequirements {
-    int size;
-    int offline_offset;
-    int first_time_used;
-    int last_time_used;
-  };
-
-  // Working arrays used during the layout algorithm.
-  BufferRequirements* requirements_;
-  // buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
-  //   {
-  //     offline planned buffers,
-  //     online planned buffers sorted by size
-  //   }
-  int* buffer_sizes_sorted_;
-  int* buffer_ids_sorted_;
-  ListEntry* buffers_sorted_by_offset_;
-  int next_free_entry_;    // Index of the next free entry of
-                           // buffers_sorted_by_offset_
-  int first_entry_index_;  // Index of the first entry (smallest offset) of
-                           // buffers_sorted_by_offset_
-
-  // Stores the outcome of the plan, the location of each buffer in the arena.
-  int* buffer_offsets_;
-
-  // Whether buffers have been added since the last plan was calculated.
-  bool need_to_calculate_offsets_;
-
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/memory_planner/linear_memory_planner.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/memory_planner/linear_memory_planner.h
@@ -1,50 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
-#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
-
-#include "tensorflow/lite/micro/compatibility.h"
-#include "tensorflow/lite/micro/memory_planner/memory_planner.h"
-
-namespace tflite {
-
-// The simplest possible memory planner that just lays out all buffers at
-// increasing offsets without trying to reuse memory.
-class LinearMemoryPlanner : public MemoryPlanner {
- public:
-  LinearMemoryPlanner();
-  ~LinearMemoryPlanner() override;
-
-  TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter, int size,
-                         int first_time_used, int last_time_used) override;
-
-  size_t GetMaximumMemorySize() override;
-  int GetBufferCount() override;
-  TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
-                                  int buffer_index, int* offset) override;
-
- private:
-  static constexpr int kMaxBufferCount = 1024;
-  size_t buffer_offsets_[kMaxBufferCount];
-  int current_buffer_count_;
-  size_t next_free_offset_;
-
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/memory_planner/memory_planner.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/memory_planner/memory_planner.h
@@ -1,71 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
-#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-
-namespace tflite {
-
-// Interface class for planning the layout of memory buffers during the
-// execution of a graph.
-// It's designed to be used by a client that iterates in any order through the
-// buffers it wants to lay out, and then calls the getter functions for
-// information about the calculated layout. For example:
-//
-// SomeMemoryPlanner planner;
-// planner.AddBuffer(reporter, 100, 0, 1);  // Buffer 0
-// planner.AddBuffer(reporter, 50, 2, 3);   // Buffer 1
-// planner.AddBuffer(reporter, 50, 2, 3);   // Buffer 2
-//
-// int offset0;
-// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 0, &offset0));
-// int offset1;
-// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 1, &offset1));
-// int offset2;
-// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 2, &offset2));
-// const int arena_size_needed = planner.GetMaximumMemorySize();
-//
-// The goal is for applications to be able to experiment with different layout
-// strategies without changing their client code, by swapping out classes that
-// implement this interface.=
-class MemoryPlanner {
- public:
-  MemoryPlanner() {}
-  virtual ~MemoryPlanner() {}
-
-  // Pass information about a buffer's size and lifetime to the layout
-  // algorithm. The order this is called implicitly assigns an index to the
-  // result, so the buffer information that's passed into the N-th call of
-  // this method will be used as the buffer_index argument to
-  // GetOffsetForBuffer().
-  virtual TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter,
-                                 int size, int first_time_used,
-                                 int last_time_used) = 0;
-
-  // The largest contiguous block of memory that's needed to hold the layout.
-  virtual size_t GetMaximumMemorySize() = 0;
-  // How many buffers have been added to the planner.
-  virtual int GetBufferCount() = 0;
-  // Calculated layout offset for the N-th buffer added to the planner.
-  virtual TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
-                                          int buffer_index, int* offset) = 0;
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_allocator.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_allocator.h
@@ -1,250 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-b/160894903
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
-
-#include <cstddef>
-#include <cstdint>
-
-#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/micro/compatibility.h"
-#include "tensorflow/lite/micro/micro_op_resolver.h"
-#include "tensorflow/lite/micro/simple_memory_allocator.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-
-// Namespace used for unittests.
-namespace internal {
-
-// Sets up all of the data structure members for a TfLiteTensor based on the
-// contents of a serialized tensor in the flatbuffer.
-// TODO(b/160894903): Once all kernels have been updated to the new
-// TfLiteEvalTensor API - drop the allocate_temp flag. This enables internal
-// flatbuffer quantization or dimension allocations to take place in either the
-// temp or tail section of the arena.
-TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
-    SimpleMemoryAllocator* allocator, bool allocate_temp,
-    const tflite::Tensor& flatbuffer_tensor,
-    const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
-    ErrorReporter* error_reporter, TfLiteTensor* result);
-
-// A handle tracking scratch buffer allocation. This handle is created by
-// `RequestScratchBufferInArena`. `data` field is populated in
-// `FinishModelAllocation` after static memory planning.
-// TODO(b/150257460) As a future optimization, this struct could be replaced by
-// a union, since once `data` is populated, `bytes` and `node_idx` is not
-// needed.
-typedef struct {
-  // Pointer to the scratch buffer.
-  uint8_t* data;
-  // Number of bytes required by the buffer. The actual allocated size might be
-  // greater than `bytes` due to buffer alignment.
-  size_t bytes;
-  // Node where the buffer is allocated for. This provides useful information to
-  // determine the lifetime of the buffer. In AllocationInfo, this buffer will
-  // have `before` = node_idx and `after` = node_idx.
-  int node_idx;
-} ScratchBufferHandle;
-}  // namespace internal
-
-typedef struct {
-  TfLiteNode node;
-  const TfLiteRegistration* registration;
-} NodeAndRegistration;
-
-// Allocator responsible for allocating memory for all intermediate tensors
-// necessary to invoke a model.
-//
-// The lifetime of the model, tensor arena and error reporter must be at
-// least as long as that of the allocator object, since the allocator needs
-// them to be accessible during its entire lifetime.
-//
-// The MicroAllocator simply plans out additional allocations that are required
-// to standup a model for inference in TF Micro. This class currently relies on
-// an additional allocator - SimpleMemoryAllocator - for all allocations from an
-// arena. These allocations are divided into head (non-persistent) and tail
-// (persistent) regions:
-//
-// Memory layout to help understand how it works
-// This information could change in the future version.
-// ************** .memory_allocator->GetBuffer()
-// Tensors/Scratch buffers (head)
-// ************** .head_watermark
-// unused memory
-// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
-//                                               - ->GetDataSize()
-// persistent area (tail)
-// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
-class MicroAllocator {
- public:
-  // Creates a MicroAllocator instance from a given tensor arena. This arena
-  // will be managed by the created instance.
-  // Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
-  // bytes aligned, otherwise some head room will be wasted.
-  // TODO(b/157615197): Cleanup constructor + factory usage.
-  static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
-                                ErrorReporter* error_reporter);
-
-  // Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
-  // intance. This allocator instance will use the SimpleMemoryAllocator
-  // instance to manage allocations internally.
-  static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
-                                ErrorReporter* error_reporter);
-
-  // Begin allocating internal resources required for model inference.
-  // This method will run through the flatbuffer data supplied in the model to
-  // properly allocate tensor, node, and op registration data. This method is
-  // expected to be followed with a call to FinishModelAllocation() before
-  // resuming allocation with another model. All persistent tensor buffers are
-  // stored in the out-param eval_tensors. This value is allocated from the
-  // persistent memory arena and will be used to host runtime tensor buffers.
-  TfLiteStatus StartModelAllocation(
-      const Model* model, const MicroOpResolver& op_resolver,
-      NodeAndRegistration** node_and_registrations,
-      TfLiteEvalTensor** eval_tensors);
-
-  // Finish allocating internal resources required for model inference.
-  // This method will plan non-persistent buffers and commit a memory plan to
-  // the 'head' section of the memory arena. All variable tensor data will also
-  // be allocated. This method should be called after assigning model resources
-  // in StartModelAllocation(). The eval_tensors pointer should be the value
-  // passed into this class during StartModelAllocation().
-  TfLiteStatus FinishModelAllocation(const Model* model,
-                                     TfLiteEvalTensor* eval_tensors);
-
-  // Allocates a TfLiteTensor struct and populates the returned value with
-  // properties from the model flatbuffer. This struct is allocated from
-  // persistent arena memory is only guaranteed for the lifetime of the
-  // application. The eval_tensors pointer should be the value passed into this
-  // class during StartModelAllocation() and contains the source-of-truth for
-  // buffers.
-  virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
-      const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
-
-  // Allocates a TfLiteTensor struct and populates the returned value with
-  // properties from the model flatbuffer. This struct is allocated from
-  // temporary arena memory is only guaranteed until a call is made to
-  // ResetTempAllocations(). The eval_tensors pointer should be the value passed
-  // into this class during StartModelAllocation() and contains the
-  // source-of-truth for buffers.
-  virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
-                                                 TfLiteEvalTensor* eval_tensors,
-                                                 int tensor_index);
-
-  // Resets all temporary allocations. This method should be called after a
-  // chain of temp allocations (e.g. chain of TfLiteTensor objects via
-  // AllocateTfLiteTensor()).
-  virtual void ResetTempAllocations();
-
-  // Allocates persistent buffer which has the same life time as the allocator.
-  // The memory is immediately available and is allocated from the tail of the
-  // arena.
-  void* AllocatePersistentBuffer(size_t bytes);
-
-  // Register a scratch buffer of size `bytes` for Node with `node_id`.
-  // This method only allocates a BufferHandle holding information for memory
-  // planning. The buffer ptr is ready after `FinishModelAllocation` and can
-  // be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
-  // Note that there should be no tail allocation between two consecutive
-  // `RequestScratchBufferInArena` calls.
-  TfLiteStatus RequestScratchBufferInArena(int node_id, size_t bytes,
-                                           int* buffer_idx);
-  // Returns the pointer to the planned scratch buffer.
-  void* GetScratchBuffer(int buffer_idx) const;
-
-  // Returns the arena usage in bytes, only available after
-  // `FinishModelAllocation`. Otherwise, it will return 0.
-  size_t used_bytes() const;
-
- protected:
-  MicroAllocator(SimpleMemoryAllocator* memory_allocator,
-                 ErrorReporter* error_reporter);
-  virtual ~MicroAllocator();
-
-  // Allocates an array in the arena to hold pointers to the node and
-  // registration pointers required to represent the inference graph of the
-  // model.
-  virtual TfLiteStatus AllocateNodeAndRegistrations(
-      const Model* model, NodeAndRegistration** node_and_registrations);
-
-  // Populates node and registration pointers representing the inference graph
-  // of the model from values inside the flatbuffer (loaded from the TfLiteModel
-  // instance). Persistent data (e.g. operator data) is allocated from the
-  // arena.
-  virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
-      const Model* model, const MicroOpResolver& op_resolver,
-      NodeAndRegistration* node_and_registrations);
-
-  // Allocates the list of persistent TfLiteEvalTensors that are used for the
-  // "eval" phase of model inference. These structs will be the source of truth
-  // for all tensor buffers. Allocation results are stored in the out-param
-  // eval_tensors.
-  virtual TfLiteStatus AllocateTfLiteEvalTensors(
-      const Model* model, TfLiteEvalTensor** eval_tensors);
-
-  // Allocates persistent tensor buffers for variable tensors in the subgraph.
-  virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
-                                         TfLiteEvalTensor* eval_tensors);
-
-  // TODO(b/160894903): Once all kernels have been updated to the new API drop
-  // this method. It is only used to record TfLiteTensor persistent allocations.
-  virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
-      const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
-
-  // Populates a TfLiteTensor struct with data from the model flatbuffer. Any
-  // quantization data is allocated from either the tail (persistent) or temp
-  // sections of the arena based on the allocation flag.
-  // TODO(b/160894903): Once all kernels have been updated to the new API drop
-  // this function since all allocations for quantized data will take place in
-  // the temp section.
-  virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
-      const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
-      int tensor_index, bool allocate_temp);
-
-  ErrorReporter* error_reporter() const;
-
-  // Returns the first subgraph from the model.
-  const SubGraph* GetSubGraphFromModel(const Model* model);
-
- private:
-  // Commits a memory plan for all non-persistent buffer allocations in the
-  // 'head' section of the memory arena. The eval_tensors pointer is the list of
-  // pre-allocated TfLiteEvalTensor structs that will point to the buffers that
-  // will be allocated into the head section in this function call.
-  virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
-                                              const SubGraph* subgraph,
-                                              TfLiteEvalTensor* eval_tensors);
-
-  // A simple memory allocator that always allocate from the arena tail or head.
-  SimpleMemoryAllocator* memory_allocator_;
-
-  ErrorReporter* error_reporter_;
-  bool model_is_allocating_;
-
-  // In reverse order for efficiency.
-  // i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
-  // corresponding to the last RequestScratchBufferInArena call.
-  internal::ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
-  // How many scratch buffers have been allocated.
-  size_t scratch_buffer_count_ = 0;
-
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-}  // namespace tflite
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_error_reporter.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_error_reporter.h
@@ -1,36 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
-
-#include <cstdarg>
-
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/micro/compatibility.h"
-
-namespace tflite {
-
-class MicroErrorReporter : public ErrorReporter {
- public:
-  ~MicroErrorReporter() override {}
-  int Report(const char* format, va_list args) override;
-
- private:
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_interpreter.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_interpreter.h
@@ -1,208 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
-
-#include <cstddef>
-#include <cstdint>
-
-#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/core/api/profiler.h"
-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
-#include "tensorflow/lite/micro/micro_allocator.h"
-#include "tensorflow/lite/micro/micro_op_resolver.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-#include "tensorflow/lite/type_to_tflitetype.h"
-
-namespace tflite {
-
-namespace internal {
-
-// A helper class to encapsulate the implementation of APIs in Context.
-// context->impl_ points to an instance of this class.
-// Check tensorflow/lite/c/common.h for detailed descriptions.
-// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
-class ContextHelper {
- public:
-  explicit ContextHelper(ErrorReporter* error_reporter,
-                         MicroAllocator* allocator, const Model* model);
-
-  // Functions that will be assigned to function pointers on TfLiteContext:
-  static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
-  static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
-                                                  size_t bytes,
-                                                  int* buffer_idx);
-  static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
-  static void ReportOpError(struct TfLiteContext* context, const char* format,
-                            ...);
-  static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
-                                 int tensor_idx);
-  static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
-                                         int tensor_idx);
-
-  // Sets the current node index to assist with scratch buffer allocations:
-  void SetNodeIndex(int idx);
-
-  // Sets the pointer to a list of TfLiteEvalTensor instances.
-  void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
-
- private:
-  MicroAllocator* allocator_;
-  ErrorReporter* error_reporter_;
-  const Model* model_;
-  TfLiteEvalTensor* eval_tensors_;
-  int current_node_idx_ = -1;
-};
-
-}  // namespace internal
-
-class MicroInterpreter {
- public:
-  // The lifetime of the model, op resolver, tensor arena, error reporter and
-  // profiler must be at least as long as that of the interpreter object, since
-  // the interpreter may need to access them at any time. This means that you
-  // should usually create them with the same scope as each other, for example
-  // having them all allocated on the stack as local variables through a
-  // top-level function. The interpreter doesn't do any deallocation of any of
-  // the pointed-to objects, ownership remains with the caller.
-  MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
-                   uint8_t* tensor_arena, size_t tensor_arena_size,
-                   ErrorReporter* error_reporter,
-                   tflite::Profiler* profiler = nullptr);
-
-  // Create an interpreter instance using an existing MicroAllocator instance.
-  // This constructor should be used when creating an allocator that needs to
-  // have allocation handled in more than one interpreter or for recording
-  // allocations inside the interpreter. The lifetime of the allocator must be
-  // as long as that of the interpreter object.
-  MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
-                   MicroAllocator* allocator, ErrorReporter* error_reporter,
-                   tflite::Profiler* profiler = nullptr);
-
-  ~MicroInterpreter();
-
-  // Runs through the model and allocates all necessary input, output and
-  // intermediate tensors.
-  TfLiteStatus AllocateTensors();
-
-  // In order to support partial graph runs for strided models, this can return
-  // values other than kTfLiteOk and kTfLiteError.
-  // TODO(b/149795762): Add this to the TfLiteStatus enum.
-  TfLiteStatus Invoke();
-
-  size_t tensors_size() const { return context_.tensors_size; }
-  TfLiteTensor* tensor(size_t tensor_index);
-  template <class T>
-  T* typed_tensor(int tensor_index) {
-    if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) {
-      if (tensor_ptr->type == typeToTfLiteType<T>()) {
-        return GetTensorData<T>(tensor_ptr);
-      }
-    }
-    return nullptr;
-  }
-
-  TfLiteTensor* input(size_t index);
-  size_t inputs_size() const { return subgraph_->inputs()->Length(); }
-  const flatbuffers::Vector<int32_t>& inputs() const {
-    return *subgraph_->inputs();
-  }
-  TfLiteTensor* input_tensor(size_t index) { return input(index); }
-  template <class T>
-  T* typed_input_tensor(int tensor_index) {
-    if (TfLiteTensor* tensor_ptr = input_tensor(tensor_index)) {
-      if (tensor_ptr->type == typeToTfLiteType<T>()) {
-        return GetTensorData<T>(tensor_ptr);
-      }
-    }
-    return nullptr;
-  }
-
-  TfLiteTensor* output(size_t index);
-  size_t outputs_size() const { return subgraph_->outputs()->Length(); }
-  const flatbuffers::Vector<int32_t>& outputs() const {
-    return *subgraph_->outputs();
-  }
-  TfLiteTensor* output_tensor(size_t index) { return output(index); }
-  template <class T>
-  T* typed_output_tensor(int tensor_index) {
-    if (TfLiteTensor* tensor_ptr = output_tensor(tensor_index)) {
-      if (tensor_ptr->type == typeToTfLiteType<T>()) {
-        return GetTensorData<T>(tensor_ptr);
-      }
-    }
-    return nullptr;
-  }
-
-  // Reset all variable tensors to the default value.
-  TfLiteStatus ResetVariableTensors();
-
-  TfLiteStatus initialization_status() const { return initialization_status_; }
-
-  size_t operators_size() const { return subgraph_->operators()->size(); }
-
-  // For debugging only.
-  const NodeAndRegistration node_and_registration(int node_index) const {
-    return node_and_registrations_[node_index];
-  }
-
-  // For debugging only.
-  // Returns the actual used arena in bytes. This method gives the optimal arena
-  // size. It's only available after `AllocateTensors` has been called.
-  // Note that normally `tensor_arena` requires 16 bytes alignment to fully
-  // utilize the space. If it's not the case, the optimial arena size would be
-  // arena_used_bytes() + 16.
-  size_t arena_used_bytes() const { return allocator_.used_bytes(); }
-
- protected:
-  const MicroAllocator& allocator() const { return allocator_; }
-  const TfLiteContext& context() const { return context_; }
-
- private:
-  // TODO(b/158263161): Consider switching to Create() function to enable better
-  // error reporting during initialization.
-  void Init(tflite::Profiler* profiler);
-
-  void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr);
-
-  template <class T>
-  void CorrectTensorDataEndianness(T* data, int32_t size);
-
-  NodeAndRegistration* node_and_registrations_ = nullptr;
-
-  const Model* model_;
-  const MicroOpResolver& op_resolver_;
-  ErrorReporter* error_reporter_;
-  TfLiteContext context_ = {};
-  MicroAllocator& allocator_;
-  bool tensors_allocated_;
-
-  TfLiteStatus initialization_status_;
-
-  const SubGraph* subgraph_;
-  TfLiteEvalTensor* eval_tensors_;
-  internal::ContextHelper context_helper_;
-
-  // TODO(b/160894903): Clean these pointers up when all APIs are updated to new
-  // TfLiteEvalTensor buffers.
-  TfLiteTensor* input_tensor_;
-  TfLiteTensor* output_tensor_;
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_mutable_op_resolver.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_mutable_op_resolver.h
@@ -1,458 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
-
-#include <stdio.h>
-#include <cstring>
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/op_macros.h"
-#include "tensorflow/lite/micro/compatibility.h"
-#include "tensorflow/lite/micro/kernels/micro_ops.h"
-#include "tensorflow/lite/micro/micro_op_resolver.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-
-template <unsigned int tOpCount>
-class MicroMutableOpResolver : public MicroOpResolver {
- public:
-  explicit MicroMutableOpResolver(ErrorReporter* error_reporter = nullptr)
-      : error_reporter_(error_reporter) {}
-
-  const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override {
-    if (op == BuiltinOperator_CUSTOM) return nullptr;
-
-    for (unsigned int i = 0; i < registrations_len_; ++i) {
-      const TfLiteRegistration& registration = registrations_[i];
-      if (registration.builtin_code == op) {
-        return &registration;
-      }
-    }
-    return nullptr;
-  }
-
-  const TfLiteRegistration* FindOp(const char* op) const override {
-    for (unsigned int i = 0; i < registrations_len_; ++i) {
-      const TfLiteRegistration& registration = registrations_[i];
-      if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
-          (strcmp(registration.custom_name, op) == 0)) {
-        return &registration;
-      }
-    }
-    return nullptr;
-  }
-
-  MicroOpResolver::BuiltinParseFunction GetOpDataParser(
-      BuiltinOperator op) const override {
-    TFLITE_DCHECK(num_buitin_ops_ <= tOpCount);
-    for (unsigned int i = 0; i < num_buitin_ops_; ++i) {
-      if (builtin_codes_[i] == op) return builtin_parsers_[i];
-    }
-    return nullptr;
-  }
-
-  // Registers a Custom Operator with the MicroOpResolver.
-  //
-  // Only the first call for a given name will be successful. i.e. if this
-  // function is called again for a previously added Custom Operator, the
-  // MicroOpResolver will be unchanged and this function will return
-  // kTfLiteError.
-  TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) {
-    if (registrations_len_ >= tOpCount) {
-      if (error_reporter_) {
-        TF_LITE_REPORT_ERROR(
-            error_reporter_,
-            "Couldn't register custom op '%s', resolver size is too small (%d)",
-            name, tOpCount);
-      }
-      return kTfLiteError;
-    }
-
-    if (FindOp(name) != nullptr) {
-      if (error_reporter_ != nullptr) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Calling AddCustom for the same op more than once "
-                             "is not supported (Op: %s).",
-                             name);
-      }
-      return kTfLiteError;
-    }
-
-    TfLiteRegistration* new_registration = &registrations_[registrations_len_];
-    registrations_len_ += 1;
-
-    *new_registration = *registration;
-    new_registration->builtin_code = BuiltinOperator_CUSTOM;
-    new_registration->custom_name = name;
-    return kTfLiteOk;
-  }
-
-  // The Add* functions below add the various Builtin operators to the
-  // MicroMutableOpResolver object.
-
-  TfLiteStatus AddAbs() {
-    return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(),
-                      ParseAbs);
-  }
-
-  TfLiteStatus AddAdd() {
-    return AddBuiltin(BuiltinOperator_ADD, tflite::ops::micro::Register_ADD(),
-                      ParseAdd);
-  }
-
-  TfLiteStatus AddArgMax() {
-    return AddBuiltin(BuiltinOperator_ARG_MAX,
-                      tflite::ops::micro::Register_ARG_MAX(), ParseArgMax);
-  }
-
-  TfLiteStatus AddArgMin() {
-    return AddBuiltin(BuiltinOperator_ARG_MIN,
-                      tflite::ops::micro::Register_ARG_MIN(), ParseArgMin);
-  }
-
-  TfLiteStatus AddAveragePool2D() {
-    return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
-                      tflite::ops::micro::Register_AVERAGE_POOL_2D(),
-                      ParsePool);
-  }
-
-  TfLiteStatus AddCeil() {
-    return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
-                      ParseCeil);
-  }
-
-  TfLiteStatus AddCircularBuffer() {
-    return AddCustom("CIRCULAR_BUFFER",
-                     tflite::ops::micro::Register_CIRCULAR_BUFFER());
-  }
-
-  TfLiteStatus AddConcatenation() {
-    return AddBuiltin(BuiltinOperator_CONCATENATION,
-                      tflite::ops::micro::Register_CONCATENATION(),
-                      ParseConcatenation);
-  }
-
-  TfLiteStatus AddConv2D() {
-    return AddBuiltin(BuiltinOperator_CONV_2D,
-                      tflite::ops::micro::Register_CONV_2D(), ParseConv2D);
-  }
-
-  TfLiteStatus AddCos() {
-    return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(),
-                      ParseCos);
-  }
-
-  TfLiteStatus AddDepthwiseConv2D() {
-    return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
-                      tflite::ops::micro::Register_DEPTHWISE_CONV_2D(),
-                      ParseDepthwiseConv2D);
-  }
-
-  TfLiteStatus AddDequantize() {
-    return AddBuiltin(BuiltinOperator_DEQUANTIZE,
-                      tflite::ops::micro::Register_DEQUANTIZE(),
-                      ParseDequantize);
-  }
-
-  TfLiteStatus AddEqual() {
-    return AddBuiltin(BuiltinOperator_EQUAL,
-                      tflite::ops::micro::Register_EQUAL(), ParseEqual);
-  }
-
-  TfLiteStatus AddFloor() {
-    return AddBuiltin(BuiltinOperator_FLOOR,
-                      tflite::ops::micro::Register_FLOOR(), ParseFloor);
-  }
-
-  TfLiteStatus AddFullyConnected() {
-    return AddBuiltin(BuiltinOperator_FULLY_CONNECTED,
-                      tflite::ops::micro::Register_FULLY_CONNECTED(),
-                      ParseFullyConnected);
-  }
-
-  TfLiteStatus AddGreater() {
-    return AddBuiltin(BuiltinOperator_GREATER,
-                      tflite::ops::micro::Register_GREATER(), ParseGreater);
-  }
-
-  TfLiteStatus AddGreaterEqual() {
-    return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
-                      tflite::ops::micro::Register_GREATER_EQUAL(),
-                      ParseGreaterEqual);
-  }
-
-  TfLiteStatus AddHardSwish() {
-    return AddBuiltin(BuiltinOperator_HARD_SWISH,
-                      tflite::ops::micro::Register_HARD_SWISH(),
-                      ParseHardSwish);
-  }
-
-  TfLiteStatus AddL2Normalization() {
-    return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
-                      tflite::ops::micro::Register_L2_NORMALIZATION(),
-                      ParseL2Normalization);
-  }
-
-  TfLiteStatus AddLess() {
-    return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
-                      ParseLess);
-  }
-
-  TfLiteStatus AddLessEqual() {
-    return AddBuiltin(BuiltinOperator_LESS_EQUAL,
-                      tflite::ops::micro::Register_LESS_EQUAL(),
-                      ParseLessEqual);
-  }
-
-  TfLiteStatus AddLog() {
-    return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(),
-                      ParseLog);
-  }
-
-  TfLiteStatus AddLogicalAnd() {
-    return AddBuiltin(BuiltinOperator_LOGICAL_AND,
-                      tflite::ops::micro::Register_LOGICAL_AND(),
-                      ParseLogicalAnd);
-  }
-
-  TfLiteStatus AddLogicalNot() {
-    return AddBuiltin(BuiltinOperator_LOGICAL_NOT,
-                      tflite::ops::micro::Register_LOGICAL_NOT(),
-                      ParseLogicalNot);
-  }
-
-  TfLiteStatus AddLogicalOr() {
-    return AddBuiltin(BuiltinOperator_LOGICAL_OR,
-                      tflite::ops::micro::Register_LOGICAL_OR(),
-                      ParseLogicalOr);
-  }
-
-  TfLiteStatus AddLogistic() {
-    return AddBuiltin(BuiltinOperator_LOGISTIC,
-                      tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
-  }
-
-  TfLiteStatus AddMaximum() {
-    return AddBuiltin(BuiltinOperator_MAXIMUM,
-                      tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
-  }
-
-  TfLiteStatus AddMaxPool2D() {
-    return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
-                      tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
-  }
-
-  TfLiteStatus AddMean() {
-    return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
-                      ParseReducer);
-  }
-
-  TfLiteStatus AddMinimum() {
-    return AddBuiltin(BuiltinOperator_MINIMUM,
-                      tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
-  }
-
-  TfLiteStatus AddMul() {
-    return AddBuiltin(BuiltinOperator_MUL, tflite::ops::micro::Register_MUL(),
-                      ParseMul);
-  }
-
-  TfLiteStatus AddNeg() {
-    return AddBuiltin(BuiltinOperator_NEG, tflite::ops::micro::Register_NEG(),
-                      ParseNeg);
-  }
-
-  TfLiteStatus AddNotEqual() {
-    return AddBuiltin(BuiltinOperator_NOT_EQUAL,
-                      tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
-  }
-
-  TfLiteStatus AddPack() {
-    return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
-                      ParsePack);
-  }
-
-  TfLiteStatus AddPad() {
-    return AddBuiltin(BuiltinOperator_PAD, tflite::ops::micro::Register_PAD(),
-                      ParsePad);
-  }
-
-  TfLiteStatus AddPadV2() {
-    return AddBuiltin(BuiltinOperator_PADV2,
-                      tflite::ops::micro::Register_PADV2(), ParsePadV2);
-  }
-
-  TfLiteStatus AddPrelu() {
-    return AddBuiltin(BuiltinOperator_PRELU,
-                      tflite::ops::micro::Register_PRELU(), ParsePrelu);
-  }
-
-  TfLiteStatus AddQuantize() {
-    return AddBuiltin(BuiltinOperator_QUANTIZE,
-                      tflite::ops::micro::Register_QUANTIZE(), ParseQuantize);
-  }
-
-  TfLiteStatus AddRelu() {
-    return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
-                      ParseRelu);
-  }
-
-  TfLiteStatus AddRelu6() {
-    return AddBuiltin(BuiltinOperator_RELU6,
-                      tflite::ops::micro::Register_RELU6(), ParseRelu6);
-  }
-
-  TfLiteStatus AddReshape() {
-    return AddBuiltin(BuiltinOperator_RESHAPE,
-                      tflite::ops::micro::Register_RESHAPE(), ParseReshape);
-  }
-
-  TfLiteStatus AddResizeNearestNeighbor() {
-    return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
-                      tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
-                      ParseResizeNearestNeighbor);
-  }
-
-  TfLiteStatus AddRound() {
-    return AddBuiltin(BuiltinOperator_ROUND,
-                      tflite::ops::micro::Register_ROUND(), ParseRound);
-  }
-
-  TfLiteStatus AddRsqrt() {
-    return AddBuiltin(BuiltinOperator_RSQRT,
-                      tflite::ops::micro::Register_RSQRT(), ParseRsqrt);
-  }
-
-  TfLiteStatus AddSin() {
-    return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(),
-                      ParseSin);
-  }
-
-  TfLiteStatus AddSoftmax() {
-    return AddBuiltin(BuiltinOperator_SOFTMAX,
-                      tflite::ops::micro::Register_SOFTMAX(), ParseSoftmax);
-  }
-
-  TfLiteStatus AddSplit() {
-    return AddBuiltin(BuiltinOperator_SPLIT,
-                      tflite::ops::micro::Register_SPLIT(), ParseSplit);
-  }
-
-  TfLiteStatus AddSqrt() {
-    return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(),
-                      ParseSqrt);
-  }
-
-  TfLiteStatus AddSquare() {
-    return AddBuiltin(BuiltinOperator_SQUARE,
-                      tflite::ops::micro::Register_SQUARE(), ParseSquare);
-  }
-
-  TfLiteStatus AddStridedSlice() {
-    return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
-                      tflite::ops::micro::Register_STRIDED_SLICE(),
-                      ParseStridedSlice);
-  }
-
-  TfLiteStatus AddSub() {
-    return AddBuiltin(BuiltinOperator_SUB, tflite::ops::micro::Register_SUB(),
-                      ParseSub);
-  }
-
-  TfLiteStatus AddSvdf() {
-    return AddBuiltin(BuiltinOperator_SVDF, tflite::ops::micro::Register_SVDF(),
-                      ParseSvdf);
-  }
-
-  TfLiteStatus AddTanh() {
-    return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(),
-                      ParseTanh);
-  }
-
-  TfLiteStatus AddUnpack() {
-    return AddBuiltin(BuiltinOperator_UNPACK,
-                      tflite::ops::micro::Register_UNPACK(), ParseUnpack);
-  }
-
-  unsigned int GetRegistrationLength() { return registrations_len_; }
-
- private:
-  TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
-                          const TfLiteRegistration& registration,
-                          MicroOpResolver::BuiltinParseFunction parser) {
-    if (op == BuiltinOperator_CUSTOM) {
-      if (error_reporter_ != nullptr) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Invalid parameter BuiltinOperator_CUSTOM to the "
-                             "AddBuiltin function.");
-      }
-      return kTfLiteError;
-    }
-
-    if (FindOp(op) != nullptr) {
-      if (error_reporter_ != nullptr) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Calling AddBuiltin with the same op more than "
-                             "once is not supported (Op: #%d).",
-                             op);
-      }
-      return kTfLiteError;
-    }
-
-    if (registrations_len_ >= tOpCount) {
-      if (error_reporter_) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Couldn't register builtin op #%d, resolver size "
-                             "is too small (%d).",
-                             op, tOpCount);
-      }
-      return kTfLiteError;
-    }
-
-    registrations_[registrations_len_] = registration;
-    // Strictly speaking, the builtin_code is not necessary for TFLM but filling
-    // it in regardless.
-    registrations_[registrations_len_].builtin_code = op;
-    registrations_len_++;
-
-    builtin_codes_[num_buitin_ops_] = op;
-    builtin_parsers_[num_buitin_ops_] = parser;
-    num_buitin_ops_++;
-
-    return kTfLiteOk;
-  }
-
-  TfLiteRegistration registrations_[tOpCount];
-  unsigned int registrations_len_ = 0;
-
-  // Arrays (and counter) to store the builtin codes and their corresponding
-  // parse functions as these are registered with the Op Resolver.
-  BuiltinOperator builtin_codes_[tOpCount];
-  MicroOpResolver::BuiltinParseFunction builtin_parsers_[tOpCount];
-  unsigned int num_buitin_ops_ = 0;
-
-  ErrorReporter* error_reporter_;
-
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-};  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_op_resolver.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_op_resolver.h
@@ -1,73 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
-
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
-#include "tensorflow/lite/core/api/op_resolver.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-
-// This is an interface for the OpResolver for TFLiteMicro. The differences from
-// the TFLite OpResolver base class are to:
-//  * explicitly remove support for Op versions
-//  * allow for finer grained registration of the Builtin Ops to reduce code
-//    size for TFLiteMicro.
-//
-// We need an interface class instead of directly using MicroMutableOpResolver
-// because MicroMutableOpResolver is a class template with the number of
-// registered Ops as the template parameter.
-class MicroOpResolver : public OpResolver {
- public:
-  typedef TfLiteStatus (*BuiltinParseFunction)(const Operator* op,
-                                               ErrorReporter* error_reporter,
-                                               BuiltinDataAllocator* allocator,
-                                               void** builtin_data);
-
-  // Returns the Op registration struct corresponding to the enum code from the
-  // flatbuffer schema. Returns nullptr if the op is not found or if op ==
-  // BuiltinOperator_CUSTOM.
-  virtual const TfLiteRegistration* FindOp(BuiltinOperator op) const = 0;
-
-  // Returns the Op registration struct corresponding to the custom operator by
-  // name.
-  virtual const TfLiteRegistration* FindOp(const char* op) const = 0;
-
-  // This implementation exists for compatibility with the OpResolver base class
-  // and disregards the version parameter.
-  const TfLiteRegistration* FindOp(BuiltinOperator op,
-                                   int version) const final {
-    return FindOp(op);
-  }
-
-  // This implementation exists for compatibility with the OpResolver base class
-  // and disregards the version parameter.
-  const TfLiteRegistration* FindOp(const char* op, int version) const final {
-    return FindOp(op);
-  }
-
-  // Returns the operator specific parsing function for the OpData for a
-  // BuiltinOperator (if registered), else nullptr.
-  virtual BuiltinParseFunction GetOpDataParser(BuiltinOperator op) const = 0;
-
-  ~MicroOpResolver() override {}
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_optional_debug_tools.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_optional_debug_tools.h
@@ -1,30 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Optional debugging functionality. For small sized binaries, these are not
-// needed.
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
-
-#include "tensorflow/lite/micro/micro_interpreter.h"
-
-namespace tflite {
-// Helper function to print model flatbuffer data. This function is not called
-// by default. Hence it's not linked in to the final binary code.
-void PrintModelData(const Model* model, ErrorReporter* error_reporter);
-// Prints a dump of what tensors and what nodes are in the interpreter.
-void PrintInterpreterState(MicroInterpreter* interpreter);
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_profiler.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_profiler.h
@@ -1,71 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
-
-#include "tensorflow/lite/core/api/error_reporter.h"
-#include "tensorflow/lite/core/api/profiler.h"
-#include "tensorflow/lite/micro/compatibility.h"
-
-namespace tflite {
-
-// MicroProfiler creates a common way to gain fine-grained insight into runtime
-// performance. Bottleck operators can be identified along with slow code
-// sections. This can be used in conjunction with running the relevant micro
-// benchmark to evaluate end-to-end performance.
-//
-// Usage example:
-// MicroProfiler profiler(error_reporter);
-// {
-//   ScopedProfile scoped_profile(profiler, tag);
-//   work_to_profile();
-// }
-//
-// This will call the following methods in order:
-// int event_handle = profiler->BeginEvent(op_name, EventType::DEFAULT, 0)
-// work_to_profile();
-// profiler->EndEvent(event_handle)
-class MicroProfiler : public tflite::Profiler {
- public:
-  explicit MicroProfiler(tflite::ErrorReporter* reporter);
-  ~MicroProfiler() override = default;
-
-  // AddEvent is unused for Tf Micro.
-  void AddEvent(const char* tag, EventType event_type, uint64_t start,
-                uint64_t end, int64_t event_metadata1,
-                int64_t event_metadata2) override{};
-
-  // BeginEvent followed by code followed by EndEvent will profile the code
-  // enclosed. Multiple concurrent events are unsupported, so the return value
-  // is always 0. Event_metadata1 and event_metadata2 are unused. The tag
-  // pointer must be valid until EndEvent is called.
-  uint32_t BeginEvent(const char* tag, EventType event_type,
-                      int64_t event_metadata1,
-                      int64_t event_metadata2) override;
-
-  // Event_handle is ignored since TF Micro does not support concurrent events.
-  void EndEvent(uint32_t event_handle) override;
-
- private:
-  tflite::ErrorReporter* reporter_;
-  int32_t start_time_;
-  const char* event_tag_;
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_string.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_string.h
@@ -1,33 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
-
-#include <cstdarg>
-
-// Implements simple string formatting for numeric types.  Returns the number of
-// bytes written to output.
-extern "C" {
-// Functionally equivalent to vsnprintf, trimmed down for TFLite Micro.
-// MicroSnprintf() is implemented using MicroVsnprintf().
-int MicroVsnprintf(char* output, int len, const char* format, va_list args);
-// Functionally equavalent to snprintf, trimmed down for TFLite Micro.
-// For example, MicroSnprintf(buffer, 10, "int %d", 10) will put the string
-// "int 10" in the buffer.
-// Floating point values are logged in exponent notation (1.XXX*2^N).
-int MicroSnprintf(char* output, int len, const char* format, ...);
-}
-
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_STRING_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_time.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_time.h
@@ -1,31 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
-
-#include <stdint.h>
-
-namespace tflite {
-
-// These functions should be implemented by each target platform, and provide an
-// accurate tick count along with how many ticks there are per second.
-int32_t ticks_per_second();
-
-// Return time in ticks.  The meaning of a tick varies per platform.
-int32_t GetCurrentTimeTicks();
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_utils.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/micro_utils.h
@@ -1,110 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
-
-#include <stdint.h>
-
-#include "tensorflow/lite/c/common.h"
-
-namespace tflite {
-
-// Returns number of elements in the shape array.
-
-int ElementCount(const TfLiteIntArray& dims);
-
-uint8_t FloatToAsymmetricQuantizedUInt8(const float value, const float scale,
-                                        const int zero_point);
-
-uint8_t FloatToSymmetricQuantizedUInt8(const float value, const float scale);
-
-int8_t FloatToAsymmetricQuantizedInt8(const float value, const float scale,
-                                      const int zero_point);
-
-int16_t FloatToAsymmetricQuantizedInt16(const float value, const float scale,
-                                        const int zero_point);
-
-int8_t FloatToSymmetricQuantizedInt8(const float value, const float scale);
-
-// Converts a float value into a signed thirty-two-bit quantized value.  Note
-// that values close to max int and min int may see significant error due to
-// a lack of floating point granularity for large values.
-int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale);
-
-// Helper methods to quantize arrays of floats to the desired format.
-//
-// There are several key flavors of quantization in TfLite:
-//        asymmetric symmetric  per channel
-// int8_t  |     X    |    X    |     X      |
-// uint8_t |     X    |    X    |            |
-// int16_t |     X    |         |            |
-// int32_t |          |    X    |     X      |
-//
-// The per-op quantization spec can be found here:
-// https://www.tensorflow.org/lite/performance/quantization_spec
-
-void AsymmetricQuantize(const float* input, int8_t* output, int num_elements,
-                        float scale, int zero_point = 0);
-
-void AsymmetricQuantize(const float* input, uint8_t* output, int num_elements,
-                        float scale, int zero_point = 128);
-
-void AsymmetricQuantize(const float* input, int16_t* output, int num_elements,
-                        float scale, int zero_point = 0);
-
-void SymmetricQuantize(const float* input, int32_t* output, int num_elements,
-                       float scale);
-
-void SymmetricPerChannelQuantize(const float* input, int32_t* output,
-                                 int num_elements, int num_channels,
-                                 float* scales);
-
-void SignedSymmetricPerChannelQuantize(const float* values,
-                                       TfLiteIntArray* dims,
-                                       int quantized_dimension,
-                                       int8_t* quantized_values,
-                                       float* scaling_factor);
-
-void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
-                             int8_t* quantized_values, float* scaling_factor);
-
-void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
-                             int16_t* quantized_values, float* scaling_factor);
-
-void SignedSymmetricQuantize(const float* values, TfLiteIntArray* dims,
-                             int32_t* quantized_values, float* scaling_factor);
-
-void SymmetricQuantize(const float* values, TfLiteIntArray* dims,
-                       uint8_t* quantized_values, float* scaling_factor);
-
-void SymmetricDequantize(const int8_t* values, const int size,
-                         const float dequantization_scale,
-                         float* dequantized_values);
-
-template <typename T>
-void AsymmetricDequantize(const T* values, const int size,
-                          const float dequantization_scale,
-                          int dequantization_zero_point,
-                          float* dequantized_values) {
-  for (int i = 0; i < size; ++i) {
-    dequantized_values[i] =
-        (values[i] - dequantization_zero_point) * dequantization_scale;
-  }
-}
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/recording_micro_allocator.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/recording_micro_allocator.h
@@ -1,120 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
-#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
-
-#include "tensorflow/lite/micro/compatibility.h"
-#include "tensorflow/lite/micro/micro_allocator.h"
-#include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
-
-namespace tflite {
-
-// List of buckets currently recorded by this class. Each type keeps a list of
-// allocated information during model initialization.
-enum class RecordedAllocationType {
-  kTfLiteEvalTensorData,
-  kPersistentTfLiteTensorData,
-  kPersistentTfLiteTensorQuantizationData,
-  kTfLiteTensorVariableBufferData,
-  kNodeAndRegistrationArray,
-  kOpData,
-};
-
-// Container for holding information about allocation recordings by a given
-// type. Each recording contains the number of bytes requested, the actual bytes
-// allocated (can defer from requested by alignment), and the number of items
-// allocated.
-struct RecordedAllocation {
-  size_t requested_bytes;
-  size_t used_bytes;
-  size_t count;
-};
-
-// Utility subclass of MicroAllocator that records all allocations
-// inside the arena. A summary of allocations can be logged through the
-// ErrorReporter by invoking LogAllocations(). This special allocator requires
-// an instance of RecordingSimpleMemoryAllocator to capture allocations in the
-// head and tail. Arena allocation recording can be retrieved by type through
-// the GetRecordedAllocation() function. This class should only be used for
-// auditing memory usage or integration testing.
-class RecordingMicroAllocator : public MicroAllocator {
- public:
-  static RecordingMicroAllocator* Create(uint8_t* tensor_arena,
-                                         size_t arena_size,
-                                         ErrorReporter* error_reporter);
-
-  // Returns the recorded allocations information for a given allocation type.
-  RecordedAllocation GetRecordedAllocation(
-      RecordedAllocationType allocation_type) const;
-
-  const RecordingSimpleMemoryAllocator* GetSimpleMemoryAllocator() const;
-
-  // Logs out through the ErrorReporter all allocation recordings by type
-  // defined in RecordedAllocationType.
-  void PrintAllocations() const;
-
- protected:
-  TfLiteStatus AllocateNodeAndRegistrations(
-      const Model* model,
-      NodeAndRegistration** node_and_registrations) override;
-  TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
-      const Model* model, const MicroOpResolver& op_resolver,
-      NodeAndRegistration* node_and_registrations) override;
-  TfLiteStatus AllocateTfLiteEvalTensors(
-      const Model* model, TfLiteEvalTensor** eval_tensors) override;
-  TfLiteStatus AllocateVariables(const SubGraph* subgraph,
-                                 TfLiteEvalTensor* eval_tensors) override;
-  // TODO(b/160894903): Once all kernels have been updated to the new API drop
-  // this method. It is only used to record TfLiteTensor persistent allocations.
-  TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
-      const Model* model, TfLiteEvalTensor* eval_tensors,
-      int tensor_index) override;
-  // TODO(b/160894903): Once all kernels have been updated to the new API drop
-  // this function since all allocations for quantized data will take place in
-  // the temp section.
-  TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
-                                                  const SubGraph* subgraph,
-                                                  TfLiteTensor* tensor,
-                                                  int tensor_index,
-                                                  bool allocate_temp) override;
-
- private:
-  RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator,
-                          ErrorReporter* error_reporter);
-
-  void PrintRecordedAllocation(RecordedAllocationType allocation_type,
-                               const char* allocation_name,
-                               const char* allocation_description) const;
-
-  RecordedAllocation SnapshotAllocationUsage() const;
-  void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation,
-                             RecordedAllocation& recorded_allocation);
-
-  const RecordingSimpleMemoryAllocator* recording_memory_allocator_;
-
-  RecordedAllocation recorded_tflite_eval_tensor_data_ = {};
-  RecordedAllocation recorded_persistent_tflite_tensor_data_ = {};
-  RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {};
-  RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
-  RecordedAllocation recorded_node_and_registration_array_data_ = {};
-  RecordedAllocation recorded_op_data_ = {};
-
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/recording_micro_interpreter.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/recording_micro_interpreter.h
@@ -1,65 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
-#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
-
-#include "tensorflow/lite/micro/micro_interpreter.h"
-#include "tensorflow/lite/micro/recording_micro_allocator.h"
-
-namespace tflite {
-
-// Utility subclass that enables internal recordings of the MicroInterpreter.
-// This class should be used to audit and analyze memory arena usage for a given
-// model and interpreter.
-//
-// After construction and the first Invoke() or AllocateTensors() call - the
-// memory usage is recorded and available through the GetMicroAllocator()
-// function. See RecordingMicroAlloctor for more details on what is currently
-// recorded from arena allocations.
-//
-// It is recommended for users to increase the tensor arena size by at least 1kb
-// to ensure enough additional memory is available for internal recordings.
-class RecordingMicroInterpreter : public MicroInterpreter {
- public:
-  RecordingMicroInterpreter(const Model* model,
-                            const MicroOpResolver& op_resolver,
-                            uint8_t* tensor_arena, size_t tensor_arena_size,
-                            ErrorReporter* error_reporter)
-      : MicroInterpreter(model, op_resolver,
-                         RecordingMicroAllocator::Create(
-                             tensor_arena, tensor_arena_size, error_reporter),
-                         error_reporter),
-        recording_micro_allocator_(
-            static_cast<const RecordingMicroAllocator&>(allocator())) {}
-
-  RecordingMicroInterpreter(const Model* model,
-                            const MicroOpResolver& op_resolver,
-                            RecordingMicroAllocator* allocator,
-                            ErrorReporter* error_reporter)
-      : MicroInterpreter(model, op_resolver, allocator, error_reporter),
-        recording_micro_allocator_(*allocator) {}
-
-  const RecordingMicroAllocator& GetMicroAllocator() const {
-    return recording_micro_allocator_;
-  }
-
- private:
-  const RecordingMicroAllocator& recording_micro_allocator_;
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
--- a/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/recording_simple_memory_allocator.h
+++ b/components/ai/tflite_micro/ARM_CortexM55_lib/tensorflow/lite/micro/recording_simple_memory_allocator.h
@@ -1,64 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
-#define TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
-
-#include "tensorflow/lite/micro/compatibility.h"
-#include "tensorflow/lite/micro/simple_memory_allocator.h"
-
-namespace tflite {
-
-// Utility class used to log allocations of a SimpleMemoryAllocator. Should only
-// be used in debug/evaluation settings or unit tests to evaluate allocation
-// usage.
-class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
- public:
-  RecordingSimpleMemoryAllocator(ErrorReporter* error_reporter,
-                                 uint8_t* buffer_head, size_t buffer_size);
-  // TODO(b/157615197): Cleanup constructors/destructor and use factory
-  // functions.
-  ~RecordingSimpleMemoryAllocator() override;
-
-  static RecordingSimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
-                                                uint8_t* buffer_head,
-                                                size_t buffer_size);
-
-  // Returns the number of bytes requested from the head or tail.
-  size_t GetRequestedBytes() const;
-
-  // Returns the number of bytes actually allocated from the head or tail. This
-  // value will be >= to the number of requested bytes due to padding and
-  // alignment.
-  size_t GetUsedBytes() const;
-
-  // Returns the number of alloc calls from the head or tail.
-  size_t GetAllocatedCount() const;
-
-  TfLiteStatus EnsureHeadSize(size_t size, size_t alignment) override;
-  uint8_t* AllocateFromTail(size_t size, size_t alignment) override;
-
- private:
-  size_t requested_head_bytes_;
-  size_t requested_tail_bytes_;
-  size_t used_bytes_;
-  size_t alloc_count_;
-
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
--- a/Show More
+++ b/Show More