tflite_micro_person_detection_init

This commit is contained in:
yangqingsheng
2020-12-08 17:16:20 +08:00
parent 55168d954d
commit 200c0ff460
310 changed files with 121982 additions and 208 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,23 @@
#ifndef _DELAY_H
#define _DELAY_H
#include <sys.h>
//////////////////////////////////////////////////////////////////////////////////
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֻ<EFBFBD><D6BB>ѧϰʹ<CFB0>ã<EFBFBD>δ<EFBFBD><CEB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɣ<EFBFBD><C9A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>κ<EFBFBD><CEBA><EFBFBD>;
//ALIENTEK STM32F429<32><39><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//ʹ<><CAB9>SysTick<63><6B><EFBFBD><EFBFBD>ͨ<EFBFBD><CDA8><EFBFBD><EFBFBD>ģʽ<C4A3><CABD><EFBFBD>ӳٽ<D3B3><D9BD>й<EFBFBD><D0B9><EFBFBD><><D6A7>ucosii)
//<2F><><EFBFBD><EFBFBD>delay_us,delay_ms
//<2F><><EFBFBD><EFBFBD>ԭ<EFBFBD><D4AD>@ALIENTEK
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>̳:www.openedv.com
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>:2015/6/10
//<2F><EFBFBD><E6B1BE>V1.0
//<2F><>Ȩ<EFBFBD><C8A8><EFBFBD>У<EFBFBD><D0A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ؾ<EFBFBD><D8BE><EFBFBD>
//Copyright(C) <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӿƼ<D3BF><C6BC><EFBFBD><EFBFBD>޹<EFBFBD>˾ 2014-2024
//All rights reserved
//********************************************************************************
//<2F>޸<EFBFBD>˵<EFBFBD><CBB5>
//////////////////////////////////////////////////////////////////////////////////
void delay_us(uint32_t nus);
void delay_ms(uint32_t time_ms);
#endif

View File

@@ -0,0 +1,70 @@
/*****************************************************************************
* | File : LCD_2IN4_Driver.h
* | Author : Waveshare team
* | Function : LCD driver
* | Info :
*----------------
* | This version: V1.0
* | Date : 2020-07-29
* | Info :
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documnetation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS OR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
******************************************************************************/
#ifndef __LCD_2IN4_DRIVER_H
#define __LCD_2IN4_DRIVER_H
#include "lcd_Config.h"
#ifdef __cplusplus
extern "C" {
#endif
#define LCD_2IN4_WIDTH 240 //LCD width
#define LCD_2IN4_HEIGHT 320 //LCD height
#define LCD_2IN4_CS_0 DEV_Digital_Write(DEV_CS_PIN, 0)
#define LCD_2IN4_CS_1 DEV_Digital_Write(DEV_CS_PIN, 1)
#define LCD_2IN4_RST_0 DEV_Digital_Write(DEV_RST_PIN,0)
#define LCD_2IN4_RST_1 DEV_Digital_Write(DEV_RST_PIN,1)
#define LCD_2IN4_DC_0 DEV_Digital_Write(DEV_DC_PIN, 0)
#define LCD_2IN4_DC_1 DEV_Digital_Write(DEV_DC_PIN, 1)
void LCD_2IN4_Init(void);
void LCD_2IN4_Clear(UWORD Color);
void LCD_2IN4_Display(UWORD *image,int width, int height);
void LCD_2IN4_DrawPaint(UWORD x, UWORD y, UWORD Color);
void LCD_2IN4_SetBackLight(UWORD Value);
void LCD_2IN4_WriteData_Word(UWORD da);
void LCD_2IN4_SetCursor(UWORD X, UWORD Y);
void LCD_2IN4_SetWindow(UWORD Xstart, UWORD Ystart, UWORD Xend, UWORD Yend);
void LCD_2IN4_ClearWindow(UWORD Xstart, UWORD Ystart, UWORD Xend, UWORD Yend,UWORD color);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,87 @@
/*****************************************************************************
* | File : DEV_Config.h
* | Author : Waveshare team
* | Function : Hardware underlying interface
* | Info :
* Used to shield the underlying layers of each master
* and enhance portability
*----------------
* | This version: V1.0
* | Date : 2018-11-22
* | Info :
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documnetation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS OR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
******************************************************************************/
#ifndef _DEV_CONFIG_H_
#define _DEV_CONFIG_H_
#include "stm32l4xx_hal.h"
#include <stdint.h>
#include <stdio.h>
#include "spi.h"
#include "tim.h"
#include "main.h"
#define UBYTE uint8_t
#define UWORD uint16_t
#define UDOUBLE uint32_t
/**
* GPIO config
**/
#define DC_Pin GPIO_PIN_12
#define DC_GPIO_Port GPIOB
#define RST_Pin GPIO_PIN_11
#define RST_GPIO_Port GPIOA
#define CS_Pin GPIO_PIN_12
#define CS_GPIO_Port GPIOA
#define DEV_RST_PIN RST_GPIO_Port,RST_Pin //PA11
#define DEV_DC_PIN DC_GPIO_Port,DC_Pin //PB12
#define DEV_CS_PIN CS_GPIO_Port,CS_Pin //PA12
#define DEV_BL_PIN TIM4->CCR1 //PB6
/**
* GPIO read and write
**/
#define DEV_Digital_Write(_pin, _value) HAL_GPIO_WritePin(_pin, _value == 0? GPIO_PIN_RESET:GPIO_PIN_SET)
#define DEV_Digital_Read(_pin) HAL_GPIO_ReadPin(_pin)
/**
* SPI
**/
#define DEV_SPI_WRITE(_dat) DEV_SPI_WRite(_dat);
/**
* delay x ms
**/
#define DEV_Delay_ms(__xms) HAL_Delay(__xms)
/**
* PWM_BL
**/
#define DEV_Set_PWM(_Value) DEV_BL_PIN= _Value
/*-----------------------------------------------------------------------------*/
void DEV_SPI_WRite(UBYTE _dat);
int DEV_Module_Init(void);
void DEV_Module_Exit(void);
#endif

View File

@@ -0,0 +1,144 @@
#ifndef _OV2640_H
#define _OV2640_H
#include "sys.h"
#include "sccb.h"
/*
* picture size
*/
#define OV2640_PIXEL_WIDTH ((uint16_t)96)
#define OV2640_PIXEL_HEIGHT ((uint16_t)96)
//#define OV2640_PWDN PGout(9) //POWER DOWN<57><4E><EFBFBD><EFBFBD><EFBFBD>ź<EFBFBD>
//#define OV2640_RST PGout(15) //<2F><>λ<EFBFBD><CEBB><EFBFBD><EFBFBD><EFBFBD>ź<EFBFBD>
void OV2640_PWDN(uint8_t signal);
void OV2640_RST(uint8_t signal);
//////////////////////////////////////////////////////////////////////////////////
#define OV2640_MID 0X7FA2
#define OV2640_PID 0X2642
//<2F><>ѡ<EFBFBD><D1A1>DSP<53><50>ַ(0XFF=0X00)ʱ,OV2640<34><30>DSP<53>Ĵ<EFBFBD><C4B4><EFBFBD><EFBFBD><EFBFBD>ַӳ<D6B7><D3B3><EFBFBD><EFBFBD>
#define OV2640_DSP_R_BYPASS 0x05
#define OV2640_DSP_Qs 0x44
#define OV2640_DSP_CTRL 0x50
#define OV2640_DSP_HSIZE1 0x51
#define OV2640_DSP_VSIZE1 0x52
#define OV2640_DSP_XOFFL 0x53
#define OV2640_DSP_YOFFL 0x54
#define OV2640_DSP_VHYX 0x55
#define OV2640_DSP_DPRP 0x56
#define OV2640_DSP_TEST 0x57
#define OV2640_DSP_ZMOW 0x5A
#define OV2640_DSP_ZMOH 0x5B
#define OV2640_DSP_ZMHH 0x5C
#define OV2640_DSP_BPADDR 0x7C
#define OV2640_DSP_BPDATA 0x7D
#define OV2640_DSP_CTRL2 0x86
#define OV2640_DSP_CTRL3 0x87
#define OV2640_DSP_SIZEL 0x8C
#define OV2640_DSP_HSIZE2 0xC0
#define OV2640_DSP_VSIZE2 0xC1
#define OV2640_DSP_CTRL0 0xC2
#define OV2640_DSP_CTRL1 0xC3
#define OV2640_DSP_R_DVP_SP 0xD3
#define OV2640_DSP_IMAGE_MODE 0xDA
#define OV2640_DSP_RESET 0xE0
#define OV2640_DSP_MS_SP 0xF0
#define OV2640_DSP_SS_ID 0x7F
#define OV2640_DSP_SS_CTRL 0xF8
#define OV2640_DSP_MC_BIST 0xF9
#define OV2640_DSP_MC_AL 0xFA
#define OV2640_DSP_MC_AH 0xFB
#define OV2640_DSP_MC_D 0xFC
#define OV2640_DSP_P_STATUS 0xFE
#define OV2640_DSP_RA_DLMT 0xFF
//<2F><>ѡ<EFBFBD>񴫸<EFBFBD><F1B4ABB8><EFBFBD><EFBFBD><EFBFBD>ַ(0XFF=0X01)ʱ,OV2640<34><30>DSP<53>Ĵ<EFBFBD><C4B4><EFBFBD><EFBFBD><EFBFBD>ַӳ<D6B7><D3B3><EFBFBD><EFBFBD>
#define OV2640_SENSOR_GAIN 0x00
#define OV2640_SENSOR_COM1 0x03
#define OV2640_SENSOR_REG04 0x04
#define OV2640_SENSOR_REG08 0x08
#define OV2640_SENSOR_COM2 0x09
#define OV2640_SENSOR_PIDH 0x0A
#define OV2640_SENSOR_PIDL 0x0B
#define OV2640_SENSOR_COM3 0x0C
#define OV2640_SENSOR_COM4 0x0D
#define OV2640_SENSOR_AEC 0x10
#define OV2640_SENSOR_CLKRC 0x11
#define OV2640_SENSOR_COM7 0x12
#define OV2640_SENSOR_COM8 0x13
#define OV2640_SENSOR_COM9 0x14
#define OV2640_SENSOR_COM10 0x15
#define OV2640_SENSOR_HREFST 0x17
#define OV2640_SENSOR_HREFEND 0x18
#define OV2640_SENSOR_VSTART 0x19
#define OV2640_SENSOR_VEND 0x1A
#define OV2640_SENSOR_MIDH 0x1C
#define OV2640_SENSOR_MIDL 0x1D
#define OV2640_SENSOR_AEW 0x24
#define OV2640_SENSOR_AEB 0x25
#define OV2640_SENSOR_W 0x26
#define OV2640_SENSOR_REG2A 0x2A
#define OV2640_SENSOR_FRARL 0x2B
#define OV2640_SENSOR_ADDVSL 0x2D
#define OV2640_SENSOR_ADDVHS 0x2E
#define OV2640_SENSOR_YAVG 0x2F
#define OV2640_SENSOR_REG32 0x32
#define OV2640_SENSOR_ARCOM2 0x34
#define OV2640_SENSOR_REG45 0x45
#define OV2640_SENSOR_FLL 0x46
#define OV2640_SENSOR_FLH 0x47
#define OV2640_SENSOR_COM19 0x48
#define OV2640_SENSOR_ZOOMS 0x49
#define OV2640_SENSOR_COM22 0x4B
#define OV2640_SENSOR_COM25 0x4E
#define OV2640_SENSOR_BD50 0x4F
#define OV2640_SENSOR_BD60 0x50
#define OV2640_SENSOR_REG5D 0x5D
#define OV2640_SENSOR_REG5E 0x5E
#define OV2640_SENSOR_REG5F 0x5F
#define OV2640_SENSOR_REG60 0x60
#define OV2640_SENSOR_HISTO_LOW 0x61
#define OV2640_SENSOR_HISTO_HIGH 0x62
uint8_t OV2640_Init(void);
void OV2640_JPEG_Mode(void);
void OV2640_RGB565_Mode(void);
void OV2640_Auto_Exposure(uint8_t level);
void OV2640_Light_Mode(uint8_t mode);
void OV2640_Color_Saturation(uint8_t sat);
void OV2640_Brightness(uint8_t bright);
void OV2640_Contrast(uint8_t contrast);
void OV2640_Special_Effects(uint8_t eft);
void OV2640_Color_Bar(uint8_t sw);
void OV2640_Window_Set(uint16_t sx,uint16_t sy,uint16_t width,uint16_t height);
uint8_t OV2640_OutSize_Set(uint16_t width,uint16_t height);
uint8_t OV2640_ImageWin_Set(uint16_t offx,uint16_t offy,uint16_t width,uint16_t height);
uint8_t OV2640_ImageSize_Set(uint16_t width,uint16_t height);
#endif

View File

@@ -0,0 +1,492 @@
#ifndef _OV2640CFG_H
#define _OV2640CFG_H
#include "ov2640.h"
//////////////////////////////////////////////////////////////////////////////////
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֻ<EFBFBD><D6BB>ѧϰʹ<CFB0>ã<EFBFBD>δ<EFBFBD><CEB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɣ<EFBFBD><C9A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>κ<EFBFBD><CEBA><EFBFBD>;
//ALIENTEK STM32F407<30><37><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//OV2640 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//<2F><><EFBFBD><EFBFBD>ԭ<EFBFBD><D4AD>@ALIENTEK
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>̳:www.openedv.com
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>:2014/5/14
//<2F><EFBFBD><E6B1BE>V1.0
//<2F><>Ȩ<EFBFBD><C8A8><EFBFBD>У<EFBFBD><D0A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ؾ<EFBFBD><D8BE><EFBFBD>
//Copyright(C) <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӿƼ<D3BF><C6BC><EFBFBD><EFBFBD>޹<EFBFBD>˾ 2014-2024
//All rights reserved
//////////////////////////////////////////////////////////////////////////////////
//OV2640 SXGA<47><41>ʼ<EFBFBD><CABC><EFBFBD>Ĵ<EFBFBD><C4B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD>б<EFBFBD>
//<2F><>ģʽ<C4A3><CABD>֡<EFBFBD><D6A1>Ϊ15֡
//SXGA(1600*1200)
const uint8_t ov2640_sxga_init_reg_tbl[][2]=
{
0xff, 0x00,
0x2c, 0xff,
0x2e, 0xdf,
0xff, 0x01,
0x3c, 0x32,
//
0x11, 0x00,
0x09, 0x02,
0x04, 0xD8,//ˮƽ<CBAE><C6BD><EFBFBD><EFBFBD>,<2C><>ֱ<EFBFBD><D6B1>ת
0x13, 0xe5,
0x14, 0x48,
0x2c, 0x0c,
0x33, 0x78,
0x3a, 0x33,
0x3b, 0xfB,
//
0x3e, 0x00,
0x43, 0x11,
0x16, 0x10,
//
0x39, 0x92,
//
0x35, 0xda,
0x22, 0x1a,
0x37, 0xc3,
0x23, 0x00,
0x34, 0xc0,
0x36, 0x1a,
0x06, 0x88,
0x07, 0xc0,
0x0d, 0x87,
0x0e, 0x41,
0x4c, 0x00,
0x48, 0x00,
0x5B, 0x00,
0x42, 0x03,
//
0x4a, 0x81,
0x21, 0x99,
//
0x24, 0x40,
0x25, 0x38,
0x26, 0x82,
0x5c, 0x00,
0x63, 0x00,
0x46, 0x00,
0x0c, 0x3c,
//
0x61, 0x70,
0x62, 0x80,
0x7c, 0x05,
//
0x20, 0x80,
0x28, 0x30,
0x6c, 0x00,
0x6d, 0x80,
0x6e, 0x00,
0x70, 0x02,
0x71, 0x94,
0x73, 0xc1,
0x3d, 0x34,
0x5a, 0x57,
//
0x12, 0x00,//UXGA 1600*1200
0x17, 0x11,
0x18, 0x75,
0x19, 0x01,
0x1a, 0x97,
0x32, 0x36,
0x03, 0x0f,
0x37, 0x40,
//
0x4f, 0xca,
0x50, 0xa8,
0x5a, 0x23,
0x6d, 0x00,
0x6d, 0x38,
//
0xff, 0x00,
0xe5, 0x7f,
0xf9, 0xc0,
0x41, 0x24,
0xe0, 0x14,
0x76, 0xff,
0x33, 0xa0,
0x42, 0x20,
0x43, 0x18,
0x4c, 0x00,
0x87, 0xd5,
0x88, 0x3f,
0xd7, 0x03,
0xd9, 0x10,
0xd3, 0x82,
//
0xc8, 0x08,
0xc9, 0x80,
//
0x7c, 0x00,
0x7d, 0x00,
0x7c, 0x03,
0x7d, 0x48,
0x7d, 0x48,
0x7c, 0x08,
0x7d, 0x20,
0x7d, 0x10,
0x7d, 0x0e,
//
0x90, 0x00,
0x91, 0x0e,
0x91, 0x1a,
0x91, 0x31,
0x91, 0x5a,
0x91, 0x69,
0x91, 0x75,
0x91, 0x7e,
0x91, 0x88,
0x91, 0x8f,
0x91, 0x96,
0x91, 0xa3,
0x91, 0xaf,
0x91, 0xc4,
0x91, 0xd7,
0x91, 0xe8,
0x91, 0x20,
//
0x92, 0x00,
0x93, 0x06,
0x93, 0xe3,
0x93, 0x05,
0x93, 0x05,
0x93, 0x00,
0x93, 0x04,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
//
0x96, 0x00,
0x97, 0x08,
0x97, 0x19,
0x97, 0x02,
0x97, 0x0c,
0x97, 0x24,
0x97, 0x30,
0x97, 0x28,
0x97, 0x26,
0x97, 0x02,
0x97, 0x98,
0x97, 0x80,
0x97, 0x00,
0x97, 0x00,
//
0xc3, 0xef,
0xa4, 0x00,
0xa8, 0x00,
0xc5, 0x11,
0xc6, 0x51,
0xbf, 0x80,
0xc7, 0x10,
0xb6, 0x66,
0xb8, 0xA5,
0xb7, 0x64,
0xb9, 0x7C,
0xb3, 0xaf,
0xb4, 0x97,
0xb5, 0xFF,
0xb0, 0xC5,
0xb1, 0x94,
0xb2, 0x0f,
0xc4, 0x5c,
//
0xc0, 0xc8,
0xc1, 0x96,
0x8c, 0x00,
0x86, 0x3d,
0x50, 0x00,
0x51, 0x90,
0x52, 0x2c,
0x53, 0x00,
0x54, 0x00,
0x55, 0x88,
0x5a, 0x90,
0x5b, 0x2C,
0x5c, 0x05,
0xd3, 0x02,//auto<74><6F><EFBFBD><EFBFBD>ҪС<D2AA><D0A1>
//
0xc3, 0xed,
0x7f, 0x00,
0xda, 0x09,
0xe5, 0x1f,
0xe1, 0x67,
0xe0, 0x00,
0xdd, 0x7f,
0x05, 0x00,
};
//OV2640 SVGA<47><41>ʼ<EFBFBD><CABC><EFBFBD>Ĵ<EFBFBD><C4B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD>б<EFBFBD>
//<2F><>ģʽ<C4A3><CABD><>ʿ<EFBFBD><CABF>Դﵽ30֡
//SVGA 800*600
const uint8_t ov2640_svga_init_reg_tbl[][2]=
{
0xff, 0x00,
0x2c, 0xff,
0x2e, 0xdf,
0xff, 0x01,
0x3c, 0x32,
//
0x11, 0x00,
0x09, 0x02,
0x04, 0xD8,//ˮƽ<CBAE><C6BD><EFBFBD><EFBFBD>,<2C><>ֱ<EFBFBD><D6B1>ת
0x13, 0xe5,
0x14, 0x48,
0x2c, 0x0c,
0x33, 0x78,
0x3a, 0x33,
0x3b, 0xfB,
//
0x3e, 0x00,
0x43, 0x11,
0x16, 0x10,
//
0x39, 0x92,
//
0x35, 0xda,
0x22, 0x1a,
0x37, 0xc3,
0x23, 0x00,
0x34, 0xc0,
0x36, 0x1a,
0x06, 0x88,
0x07, 0xc0,
0x0d, 0x87,
0x0e, 0x41,
0x4c, 0x00,
0x48, 0x00,
0x5B, 0x00,
0x42, 0x03,
//
0x4a, 0x81,
0x21, 0x99,
//
0x24, 0x40,
0x25, 0x38,
0x26, 0x82,
0x5c, 0x00,
0x63, 0x00,
0x46, 0x22,
0x0c, 0x3c,
//
0x61, 0x70,
0x62, 0x80,
0x7c, 0x05,
//
0x20, 0x80,
0x28, 0x30,
0x6c, 0x00,
0x6d, 0x80,
0x6e, 0x00,
0x70, 0x02,
0x71, 0x94,
0x73, 0xc1,
0x3d, 0x34,
0x5a, 0x57,
//<2F><><EFBFBD>ݷֱ<DDB7><D6B1>ʲ<EFBFBD>ͬ<EFBFBD><CDAC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
0x12, 0x40,//SVGA 800*600
0x17, 0x11,
0x18, 0x43,
0x19, 0x00,
0x1a, 0x4b,
0x32, 0x09,
0x37, 0xc0,
//
0x4f, 0xca,
0x50, 0xa8,
0x5a, 0x23,
0x6d, 0x00,
0x3d, 0x38,
//
0xff, 0x00,
0xe5, 0x7f,
0xf9, 0xc0,
0x41, 0x24,
0xe0, 0x14,
0x76, 0xff,
0x33, 0xa0,
0x42, 0x20,
0x43, 0x18,
0x4c, 0x00,
0x87, 0xd5,
0x88, 0x3f,
0xd7, 0x03,
0xd9, 0x10,
0xd3, 0x82,
//
0xc8, 0x08,
0xc9, 0x80,
//
0x7c, 0x00,
0x7d, 0x00,
0x7c, 0x03,
0x7d, 0x48,
0x7d, 0x48,
0x7c, 0x08,
0x7d, 0x20,
0x7d, 0x10,
0x7d, 0x0e,
//
0x90, 0x00,
0x91, 0x0e,
0x91, 0x1a,
0x91, 0x31,
0x91, 0x5a,
0x91, 0x69,
0x91, 0x75,
0x91, 0x7e,
0x91, 0x88,
0x91, 0x8f,
0x91, 0x96,
0x91, 0xa3,
0x91, 0xaf,
0x91, 0xc4,
0x91, 0xd7,
0x91, 0xe8,
0x91, 0x20,
//
0x92, 0x00,
0x93, 0x06,
0x93, 0xe3,
0x93, 0x05,
0x93, 0x05,
0x93, 0x00,
0x93, 0x04,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
0x93, 0x00,
//
0x96, 0x00,
0x97, 0x08,
0x97, 0x19,
0x97, 0x02,
0x97, 0x0c,
0x97, 0x24,
0x97, 0x30,
0x97, 0x28,
0x97, 0x26,
0x97, 0x02,
0x97, 0x98,
0x97, 0x80,
0x97, 0x00,
0x97, 0x00,
//
0xc3, 0xed,
0xa4, 0x00,
0xa8, 0x00,
0xc5, 0x11,
0xc6, 0x51,
0xbf, 0x80,
0xc7, 0x10,
0xb6, 0x66,
0xb8, 0xA5,
0xb7, 0x64,
0xb9, 0x7C,
0xb3, 0xaf,
0xb4, 0x97,
0xb5, 0xFF,
0xb0, 0xC5,
0xb1, 0x94,
0xb2, 0x0f,
0xc4, 0x5c,
//<2F><><EFBFBD>ݷֱ<DDB7><D6B1>ʲ<EFBFBD>ͬ<EFBFBD><CDAC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
0xc0, 0x64,
0xc1, 0x4B,
0x8c, 0x00,
0x86, 0x3D,
0x50, 0x00,
0x51, 0xC8,
0x52, 0x96,
0x53, 0x00,
0x54, 0x00,
0x55, 0x00,
0x5a, 0xC8,
0x5b, 0x96,
0x5c, 0x00,
0xd3, 0x02,//auto<74><6F><EFBFBD><EFBFBD>ҪС<D2AA><D0A1>
//
0xc3, 0xed,
0x7f, 0x00,
0xda, 0x09,
0xe5, 0x1f,
0xe1, 0x67,
0xe0, 0x00,
0xdd, 0x7f,
0x05, 0x00,
};
const uint8_t ov2640_jpeg_reg_tbl[][2]=
{
0xff, 0x01,
0xe0, 0x14,
0xe1, 0x77,
0xe5, 0x1f,
0xd7, 0x03,
0xda, 0x10,
0xe0, 0x00,
};
const uint8_t ov2640_rgb565_reg_tbl[][2]=
{
0xFF, 0x00,
0xDA, 0x09,
0xD7, 0x03,
0xDF, 0x02,
0x33, 0xa0,
0x3C, 0x00,
0xe1, 0x67,
0xff, 0x01,
0xe0, 0x00,
0xe1, 0x00,
0xe5, 0x00,
0xd7, 0x00,
0xda, 0x00,
0xe0, 0x00,
};
const uint8_t ov2640_yuv422_reg_tbl[][2]=
{
0xFF, 0x00,
0xDA, 0x10,
0xD7, 0x03,
0xDF, 0x00,
0x33, 0x80,
0x3C, 0x40,
0xe1, 0x77,
0x00, 0x00,
};
#endif

View File

@@ -0,0 +1,38 @@
#ifndef __SCCB_H
#define __SCCB_H
#include "sys.h"
#include "gpio.h"
//IO<49><4F><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#define SCCB_SDA_IN() {GPIOB->MODER&=~(3<<(5*2));GPIOB->MODER|=0<<5*2;} //PD7 <20><><EFBFBD><EFBFBD>
#define SCCB_SDA_OUT() {GPIOB->MODER&=~(3<<(5*2));GPIOB->MODER|=1<<5*2;} //PD7 <20><><EFBFBD><EFBFBD>
#define SCCB_ID 0X60 //OV2640<34><30>ID
//IO<49><4F><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#define SCCB_SDA_IN() {GPIOB->MODER&=~(3<<(5*2));GPIOB->MODER|=0<<5*2;} //PD7 <20><><EFBFBD><EFBFBD>
#define SCCB_SDA_OUT() {GPIOB->MODER&=~(3<<(5*2));GPIOB->MODER|=1<<5*2;} //PD7 <20><><EFBFBD><EFBFBD>
#define SCCB_ID 0X60 //OV2640<34><30>ID
void SCCB_Init(void);
void SCCB_Start(void);
void SCCB_Stop(void);
void SCCB_No_Ack(void);
uint8_t SCCB_WR_Byte(uint8_t dat);
uint8_t SCCB_RD_Byte(void);
uint8_t SCCB_WR_Reg(uint8_t reg,uint8_t data);
uint8_t SCCB_RD_Reg(uint8_t reg);
void SCCB_SCL(uint8_t sccb_scl);
void SCCB_SDA(uint8_t sccb_sda);
uint8_t SCCB_READ_SDA(void);
#endif

View File

@@ -0,0 +1,82 @@
#include "delay.h"
#include "sys.h"
#ifdef __cplusplus
extern "C" {
#endif
void delay_us(uint32_t time_us) {
uint32_t clk = 80; // CPU 80MHz
uint32_t ticks = time_us * clk; // time is us
uint32_t told = SysTick->VAL;
uint32_t tnow = told;
uint32_t tcnt = 0;
for(; tcnt<ticks; tnow=SysTick->VAL)
{
if(tnow != told) {
if(tnow < told) {
tcnt += told - tnow;
} else {
tcnt += SysTick->LOAD-tnow + told;
} told = tnow;
}
}
}
void delay_ms(uint32_t time_ms) {
uint32_t clk = 80; // CPU 80MHz
uint32_t ticks = time_ms * clk * 1000; // time is ms
uint32_t told = SysTick->VAL;
uint32_t tnow = told;
uint32_t tcnt = 0;
for(; tcnt<ticks; tnow=SysTick->VAL)
{
if(tnow != told) {
if(tnow < told) {
tcnt += told - tnow;
} else {
tcnt += SysTick->LOAD-tnow + told;
} told = tnow;
}
}
}
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,278 @@
#include "lcd_2inch4.h"
#include <string.h>
/*******************************************************************************
function:
Hardware reset
*******************************************************************************/
static void LCD_2IN4_Reset(void)
{
LCD_2IN4_RST_1;
DEV_Delay_ms(100);
LCD_2IN4_RST_0;
DEV_Delay_ms(100);
LCD_2IN4_RST_1;
DEV_Delay_ms(100);
}
/*******************************************************************************
function:
Write data and commands
*******************************************************************************/
static void LCD_2IN4_Write_Command(UBYTE data)
{
LCD_2IN4_CS_0;
LCD_2IN4_DC_0;
DEV_SPI_WRITE(data);
}
static void LCD_2IN4_WriteData_Byte(UBYTE data)
{
LCD_2IN4_CS_0;
LCD_2IN4_DC_1;
DEV_SPI_WRITE(data);
LCD_2IN4_CS_1;
}
void LCD_2IN4_WriteData_Word(UWORD data)
{
LCD_2IN4_CS_0;
LCD_2IN4_DC_1;
DEV_SPI_WRITE((data>>8) & 0xff);
DEV_SPI_WRITE(data);
LCD_2IN4_CS_1;
}
/******************************************************************************
function:
Common register initialization
******************************************************************************/
void LCD_2IN4_Init(void)
{
LCD_2IN4_Reset();
LCD_2IN4_SetBackLight(500);//<2F>򿪱<EFBFBD><F2BFAAB1><EFBFBD>
HAL_Delay(100);
//************* Start Initial Sequence **********//
LCD_2IN4_Write_Command(0x11); //Sleep out
HAL_Delay(120); //Delay 120ms
//************* Start Initial Sequence **********//
LCD_2IN4_Write_Command(0xCF);
LCD_2IN4_WriteData_Byte(0x00);
LCD_2IN4_WriteData_Byte(0xC1);
LCD_2IN4_WriteData_Byte(0X30);
LCD_2IN4_Write_Command(0xED);
LCD_2IN4_WriteData_Byte(0x64);
LCD_2IN4_WriteData_Byte(0x03);
LCD_2IN4_WriteData_Byte(0X12);
LCD_2IN4_WriteData_Byte(0X81);
LCD_2IN4_Write_Command(0xE8);
LCD_2IN4_WriteData_Byte(0x85);
LCD_2IN4_WriteData_Byte(0x00);
LCD_2IN4_WriteData_Byte(0x79);
LCD_2IN4_Write_Command(0xCB);
LCD_2IN4_WriteData_Byte(0x39);
LCD_2IN4_WriteData_Byte(0x2C);
LCD_2IN4_WriteData_Byte(0x00);
LCD_2IN4_WriteData_Byte(0x34);
LCD_2IN4_WriteData_Byte(0x02);
LCD_2IN4_Write_Command(0xF7);
LCD_2IN4_WriteData_Byte(0x20);
LCD_2IN4_Write_Command(0xEA);
LCD_2IN4_WriteData_Byte(0x00);
LCD_2IN4_WriteData_Byte(0x00);
LCD_2IN4_Write_Command(0xC0); //Power control
LCD_2IN4_WriteData_Byte(0x1D); //VRH[5:0]
LCD_2IN4_Write_Command(0xC1); //Power control
LCD_2IN4_WriteData_Byte(0x12); //SAP[2:0];BT[3:0]
LCD_2IN4_Write_Command(0xC5); //VCM control
LCD_2IN4_WriteData_Byte(0x33);
LCD_2IN4_WriteData_Byte(0x3F);
LCD_2IN4_Write_Command(0xC7); //VCM control
LCD_2IN4_WriteData_Byte(0x92);
LCD_2IN4_Write_Command(0x3A); // Memory Access Control
LCD_2IN4_WriteData_Byte(0x55);
LCD_2IN4_Write_Command(0x36); // Memory Access Control
LCD_2IN4_WriteData_Byte(0x08);
LCD_2IN4_Write_Command(0xB1);
LCD_2IN4_WriteData_Byte(0x00);
LCD_2IN4_WriteData_Byte(0x12);
LCD_2IN4_Write_Command(0xB6); // Display Function Control
LCD_2IN4_WriteData_Byte(0x0A);
LCD_2IN4_WriteData_Byte(0xA2);
LCD_2IN4_Write_Command(0x44);
LCD_2IN4_WriteData_Byte(0x02);
LCD_2IN4_Write_Command(0xF2); // 3Gamma Function Disable
LCD_2IN4_WriteData_Byte(0x00);
LCD_2IN4_Write_Command(0x26); //Gamma curve selected
LCD_2IN4_WriteData_Byte(0x01);
LCD_2IN4_Write_Command(0xE0); //Set Gamma
LCD_2IN4_WriteData_Byte(0x0F);
LCD_2IN4_WriteData_Byte(0x22);
LCD_2IN4_WriteData_Byte(0x1C);
LCD_2IN4_WriteData_Byte(0x1B);
LCD_2IN4_WriteData_Byte(0x08);
LCD_2IN4_WriteData_Byte(0x0F);
LCD_2IN4_WriteData_Byte(0x48);
LCD_2IN4_WriteData_Byte(0xB8);
LCD_2IN4_WriteData_Byte(0x34);
LCD_2IN4_WriteData_Byte(0x05);
LCD_2IN4_WriteData_Byte(0x0C);
LCD_2IN4_WriteData_Byte(0x09);
LCD_2IN4_WriteData_Byte(0x0F);
LCD_2IN4_WriteData_Byte(0x07);
LCD_2IN4_WriteData_Byte(0x00);
LCD_2IN4_Write_Command(0XE1); //Set Gamma
LCD_2IN4_WriteData_Byte(0x00);
LCD_2IN4_WriteData_Byte(0x23);
LCD_2IN4_WriteData_Byte(0x24);
LCD_2IN4_WriteData_Byte(0x07);
LCD_2IN4_WriteData_Byte(0x10);
LCD_2IN4_WriteData_Byte(0x07);
LCD_2IN4_WriteData_Byte(0x38);
LCD_2IN4_WriteData_Byte(0x47);
LCD_2IN4_WriteData_Byte(0x4B);
LCD_2IN4_WriteData_Byte(0x0A);
LCD_2IN4_WriteData_Byte(0x13);
LCD_2IN4_WriteData_Byte(0x06);
LCD_2IN4_WriteData_Byte(0x30);
LCD_2IN4_WriteData_Byte(0x38);
LCD_2IN4_WriteData_Byte(0x0F);
LCD_2IN4_Write_Command(0x29); //Display on
}
/******************************************************************************
function: Set the cursor position
parameter :
Xstart: Start UWORD x coordinate
Ystart: Start UWORD y coordinate
Xend : End UWORD coordinates
Yend : End UWORD coordinatesen
******************************************************************************/
void LCD_2IN4_SetWindow(UWORD Xstart, UWORD Ystart, UWORD Xend, UWORD Yend)
{
LCD_2IN4_Write_Command(0x2a);
LCD_2IN4_WriteData_Byte(Xstart >>8);
LCD_2IN4_WriteData_Byte(Xstart & 0xff);
LCD_2IN4_WriteData_Byte((Xend - 1) >> 8);
LCD_2IN4_WriteData_Byte((Xend - 1) & 0xff);
LCD_2IN4_Write_Command(0x2b);
LCD_2IN4_WriteData_Byte(Ystart >>8);
LCD_2IN4_WriteData_Byte(Ystart & 0xff);
LCD_2IN4_WriteData_Byte((Yend - 1) >> 8);
LCD_2IN4_WriteData_Byte((Yend - 1) & 0xff);
LCD_2IN4_Write_Command(0x2C);
}
/******************************************************************************
function: Settings window
parameter :
Xstart: Start UWORD x coordinate
Ystart: Start UWORD y coordinate
******************************************************************************/
void LCD_2IN4_SetCursor(UWORD X, UWORD Y)
{
LCD_2IN4_Write_Command(0x2a);
LCD_2IN4_WriteData_Byte(X >> 8);
LCD_2IN4_WriteData_Byte(X);
LCD_2IN4_WriteData_Byte(X >> 8);
LCD_2IN4_WriteData_Byte(X);
LCD_2IN4_Write_Command(0x2b);
LCD_2IN4_WriteData_Byte(Y >> 8);
LCD_2IN4_WriteData_Byte(Y);
LCD_2IN4_WriteData_Byte(Y >> 8);
LCD_2IN4_WriteData_Byte(Y);
LCD_2IN4_Write_Command(0x2C);
}
/******************************************************************************
function: Clear screen function, refresh the screen to a certain color
parameter :
Color : The color you want to clear all the screen
******************************************************************************/
void LCD_2IN4_Clear(UWORD Color)
{
UWORD i,j;
LCD_2IN4_SetWindow(0, 0, LCD_2IN4_WIDTH, LCD_2IN4_HEIGHT);
DEV_Digital_Write(DEV_DC_PIN, 1);
for(i = 0; i < LCD_2IN4_WIDTH; i++){
for(j = 0; j < LCD_2IN4_HEIGHT; j++){
LCD_2IN4_WriteData_Word(Color);
}
}
}
/******************************************************************************
function: Refresh a certain area to the same color
parameter :
Xstart: Start UWORD x coordinate
Ystart: Start UWORD y coordinate
Xend : End UWORD coordinates
Yend : End UWORD coordinates
color : Set the color
******************************************************************************/
void LCD_2IN4_ClearWindow(UWORD Xstart, UWORD Ystart, UWORD Xend, UWORD Yend,UWORD color)
{
UWORD i,j;
LCD_2IN4_SetWindow(Xstart, Ystart, Xend,Yend);
for(i = Ystart; i <= Yend; i++){
for(j = Xstart; j <= Xend; j++){
LCD_2IN4_WriteData_Word(color);
}
}
}
/******************************************************************************
function: Show a picture
parameter :
image: Picture buffer
******************************************************************************/
void LCD_2IN4_Display(UWORD *image,int width, int height)
{
UWORD i,j;
if(width > LCD_2IN4_WIDTH || height > LCD_2IN4_HEIGHT){
printf("Picture size out of range!\r\n");
return;
}
LCD_2IN4_SetWindow(0, 0, width, height);
DEV_Digital_Write(DEV_DC_PIN, 1);
for(i = 0; i < width; i++){
for(j = 0; j < height; j++){
LCD_2IN4_WriteData_Word(*(image+i*height+j));
}
}
}
/******************************************************************************
function: Draw a point
parameter :
X : Set the X coordinate
Y : Set the Y coordinate
Color : Set the color
******************************************************************************/
void LCD_2IN4_DrawPaint(UWORD x, UWORD y, UWORD Color)
{
LCD_2IN4_SetCursor(x, y);
LCD_2IN4_WriteData_Word(Color);
}
/*******************************************************************************
function:
Setting backlight
parameter :
value : Range 0~1000 Duty cycle is value/1000
*******************************************************************************/
void LCD_2IN4_SetBackLight(UWORD Value)
{
DEV_Set_PWM(Value);
}

View File

@@ -0,0 +1,66 @@
/*****************************************************************************
* | File : DEV_Config.c
* | Author : Waveshare team
* | Function : Hardware underlying interface
* | Info :
* Used to shield the underlying layers of each master
* and enhance portability
*----------------
* | This version: V1.0
* | Date : 2018-11-22
* | Info :
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documnetation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS OR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
******************************************************************************/
#include "lcd_config.h"
/********************************************************************************
function: Delay function
note:
Driver_Delay_ms(xms) : Delay x ms
********************************************************************************/
void DEV_delay_ms(uint16_t xms )
{
HAL_Delay(xms);
}
void DEV_SPI_WRite(UBYTE _dat)
{
HAL_SPI_Transmit(&hspi1, (uint8_t *)&_dat, 1, 500);
}
int DEV_Module_Init(void)
{
DEV_Digital_Write(DEV_DC_PIN, 1);
DEV_Digital_Write(DEV_CS_PIN, 1);
DEV_Digital_Write(DEV_RST_PIN, 1);
HAL_TIM_PWM_Start(&htim4, TIM_CHANNEL_1);
return 0;
}
void DEV_Module_Exit(void)
{
DEV_Digital_Write(DEV_DC_PIN, 0);
DEV_Digital_Write(DEV_CS_PIN, 0);
//close
DEV_Digital_Write(DEV_RST_PIN, 0);
HAL_TIM_PWM_Stop(&htim4,TIM_CHANNEL_1);
}

View File

@@ -0,0 +1,431 @@
#include "sys.h"
#include "ov2640.h"
#include "ov2640cfg.h"
//#include "timer.h"
#include "delay.h"
#include "usart.h"
#include "sccb.h"
#include "stdio.h"
//<2F><>ʼ<EFBFBD><CABC>OV2640
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ժ<EFBFBD><><C4AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>1600*1200<30>ߴ<EFBFBD><DFB4><EFBFBD>ͼƬ!!
//<2F><><EFBFBD><EFBFBD>ֵ:0,<2C>ɹ<EFBFBD>
// <20><><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
void OV2640_PWDN(uint8_t signal)
{
HAL_GPIO_WritePin(GPIOB, GPIO_PIN_15, (GPIO_PinState)signal);
}
void OV2640_RST(uint8_t signal)
{
HAL_GPIO_WritePin(GPIOB, GPIO_PIN_13, (GPIO_PinState)signal);
}
uint8_t OV2640_Init(void)
{
uint16_t i=0;
uint16_t reg;
//<2F><><EFBFBD><EFBFBD>IO
GPIO_InitTypeDef GPIO_InitStructure;
__HAL_RCC_GPIOG_CLK_ENABLE(); //ʹ<><CAB9>GPIOBʱ<42><CAB1>
HAL_GPIO_WritePin(GPIOB, GPIO_PIN_13|GPIO_PIN_15, GPIO_PIN_RESET);
//GPIOF9,F10<31><30>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
GPIO_InitStructure.Pin = GPIO_PIN_13|GPIO_PIN_15;
GPIO_InitStructure.Mode = GPIO_MODE_OUTPUT_PP;
GPIO_InitStructure.Speed = GPIO_SPEED_FAST;
GPIO_InitStructure.Pull = GPIO_PULLUP;//<2F><><EFBFBD><EFBFBD>
HAL_GPIO_Init(GPIOB, &GPIO_InitStructure);//<2F><>ʼ<EFBFBD>
OV2640_PWDN(0); //POWER ON
delay_ms(10);
OV2640_RST(0); //<2F><>λOV2640
delay_ms(10);
OV2640_RST(1); //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>λ
SCCB_Init(); //<2F><>ʼ<EFBFBD><CABC>SCCB <20><>IO<49><4F>
SCCB_WR_Reg(OV2640_DSP_RA_DLMT, 0x01); //<2F><><EFBFBD><EFBFBD>sensor<6F>Ĵ<EFBFBD><C4B4><EFBFBD>
SCCB_WR_Reg(OV2640_SENSOR_COM7, 0x80); //<2F><><EFBFBD><EFBFBD>λOV2640
delay_ms(50);
reg=SCCB_RD_Reg(OV2640_SENSOR_MIDH); //<2F><>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD>ID <20>߰<EFBFBD>λ
reg<<=8;
reg|=SCCB_RD_Reg(OV2640_SENSOR_MIDL); //<2F><>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD>ID <20>Ͱ<EFBFBD>λ
printf("OV2640_MID = %#X\n" , reg);
if(reg!=OV2640_MID)
{
printf("MID:%d\r\n",reg);
return 1;
}
reg=SCCB_RD_Reg(OV2640_SENSOR_PIDH); //<2F><>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD>ID <20>߰<EFBFBD>λ
reg<<=8;
reg|=SCCB_RD_Reg(OV2640_SENSOR_PIDL); //<2F><>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD>ID <20>Ͱ<EFBFBD>λ
if(reg!=OV2640_PID)
{
printf("HID:%d\r\n",reg);
return 2;
}
for(i=0;i<sizeof(ov2640_svga_init_reg_tbl)/2;i++)
{
SCCB_WR_Reg(ov2640_svga_init_reg_tbl[i][0],ov2640_svga_init_reg_tbl[i][1]);
}
printf("OV2640_init SUCCESS\n");
return 0x00; //ok
}
//OV2640<34>л<EFBFBD>ΪJPEGģʽ
void OV2640_JPEG_Mode(void)
{
uint16_t i=0;
//<2F><><EFBFBD><EFBFBD>:YUV422<32><32>ʽ
for(i=0;i<(sizeof(ov2640_yuv422_reg_tbl)/2);i++)
{
SCCB_WR_Reg(ov2640_yuv422_reg_tbl[i][0],ov2640_yuv422_reg_tbl[i][1]);
}
//<2F><><EFBFBD><EFBFBD>:<3A><><EFBFBD><EFBFBD>JPEG<45><47><EFBFBD><EFBFBD>
for(i=0;i<(sizeof(ov2640_jpeg_reg_tbl)/2);i++)
{
SCCB_WR_Reg(ov2640_jpeg_reg_tbl[i][0],ov2640_jpeg_reg_tbl[i][1]);
}
}
//OV2640<34>л<EFBFBD>ΪRGB565ģʽ
void OV2640_RGB565_Mode(void)
{
uint16_t i=0;
//<2F><><EFBFBD><EFBFBD>:RGB565<36><35><EFBFBD><EFBFBD>
for(i=0;i<(sizeof(ov2640_rgb565_reg_tbl)/2);i++)
{
SCCB_WR_Reg(ov2640_rgb565_reg_tbl[i][0],ov2640_rgb565_reg_tbl[i][1]);
}
printf("OV2640_RGB565 SET!\n");
}
//<2F>Զ<EFBFBD><D4B6>ع<EFBFBD><D8B9><EFBFBD><EFBFBD>ò<EFBFBD><C3B2><EFBFBD><EFBFBD><EFBFBD><><D6A7>5<EFBFBD><35><EFBFBD>ȼ<EFBFBD>
const static uint8_t OV2640_AUTOEXPOSURE_LEVEL[5][8]=
{
{
0xFF,0x01,
0x24,0x20,
0x25,0x18,
0x26,0x60,
},
{
0xFF,0x01,
0x24,0x34,
0x25,0x1c,
0x26,0x00,
},
{
0xFF,0x01,
0x24,0x3e,
0x25,0x38,
0x26,0x81,
},
{
0xFF,0x01,
0x24,0x48,
0x25,0x40,
0x26,0x81,
},
{
0xFF,0x01,
0x24,0x58,
0x25,0x50,
0x26,0x92,
},
};
//OV2640<34>Զ<EFBFBD><D4B6>ع<EFBFBD><D8B9>ȼ<EFBFBD><C8BC><EFBFBD><EFBFBD><EFBFBD>
//level:0~4
void OV2640_Auto_Exposure(uint8_t level)
{
uint8_t i;
uint8_t *p=(uint8_t*)OV2640_AUTOEXPOSURE_LEVEL[level];
for(i=0;i<4;i++)
{
SCCB_WR_Reg(p[i*2],p[i*2+1]);
}
}
//<2F><>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//0:<3A>Զ<EFBFBD>
//1:̫<><CCAB>sunny
//2,<2C><><EFBFBD><EFBFBD>cloudy
//3,<2C><EFBFBD><ECB9AB>office
//4,<2C><><EFBFBD><EFBFBD>home
void OV2640_Light_Mode(uint8_t mode)
{
uint8_t regccval=0X5E;//Sunny
uint8_t regcdval=0X41;
uint8_t regceval=0X54;
switch(mode)
{
case 0://auto
SCCB_WR_Reg(0XFF,0X00);
SCCB_WR_Reg(0XC7,0X10);//AWB ON
return;
case 2://cloudy
regccval=0X65;
regcdval=0X41;
regceval=0X4F;
break;
case 3://office
regccval=0X52;
regcdval=0X41;
regceval=0X66;
break;
case 4://home
regccval=0X42;
regcdval=0X3F;
regceval=0X71;
break;
}
SCCB_WR_Reg(0XFF,0X00);
SCCB_WR_Reg(0XC7,0X40); //AWB OFF
SCCB_WR_Reg(0XCC,regccval);
SCCB_WR_Reg(0XCD,regcdval);
SCCB_WR_Reg(0XCE,regceval);
}
//ɫ<><C9AB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//0:-2
//1:-1
//2,0
//3,+1
//4,+2
void OV2640_Color_Saturation(uint8_t sat)
{
uint8_t reg7dval=((sat+2)<<4)|0X08;
SCCB_WR_Reg(0XFF,0X00);
SCCB_WR_Reg(0X7C,0X00);
SCCB_WR_Reg(0X7D,0X02);
SCCB_WR_Reg(0X7C,0X03);
SCCB_WR_Reg(0X7D,reg7dval);
SCCB_WR_Reg(0X7D,reg7dval);
}
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//0:(0X00)-2
//1:(0X10)-1
//2,(0X20) 0
//3,(0X30)+1
//4,(0X40)+2
void OV2640_Brightness(uint8_t bright)
{
SCCB_WR_Reg(0xff, 0x00);
SCCB_WR_Reg(0x7c, 0x00);
SCCB_WR_Reg(0x7d, 0x04);
SCCB_WR_Reg(0x7c, 0x09);
SCCB_WR_Reg(0x7d, bright<<4);
SCCB_WR_Reg(0x7d, 0x00);
}
//<2F>Աȶ<D4B1><C8B6><EFBFBD><EFBFBD><EFBFBD>
//0:-2
//1:-1
//2,0
//3,+1
//4,+2
void OV2640_Contrast(uint8_t contrast)
{
uint8_t reg7d0val=0X20;//Ĭ<><C4AC>Ϊ<EFBFBD><CEAA>ͨģʽ
uint8_t reg7d1val=0X20;
switch(contrast)
{
case 0://-2
reg7d0val=0X18;
reg7d1val=0X34;
break;
case 1://-1
reg7d0val=0X1C;
reg7d1val=0X2A;
break;
case 3://1
reg7d0val=0X24;
reg7d1val=0X16;
break;
case 4://2
reg7d0val=0X28;
reg7d1val=0X0C;
break;
}
SCCB_WR_Reg(0xff,0x00);
SCCB_WR_Reg(0x7c,0x00);
SCCB_WR_Reg(0x7d,0x04);
SCCB_WR_Reg(0x7c,0x07);
SCCB_WR_Reg(0x7d,0x20);
SCCB_WR_Reg(0x7d,reg7d0val);
SCCB_WR_Reg(0x7d,reg7d1val);
SCCB_WR_Reg(0x7d,0x06);
}
//<2F><>Ч<EFBFBD><D0A7><EFBFBD><EFBFBD>
//0:<3A><>ͨģʽ
//1,<2C><>Ƭ
//2,<2C>ڰ<EFBFBD>
//3,ƫ<><C6AB>ɫ
//4,ƫ<><C6AB>ɫ
//5,ƫ<><C6AB>ɫ
//6,<2C><><EFBFBD><EFBFBD>
void OV2640_Special_Effects(uint8_t eft)
{
uint8_t reg7d0val=0X00;//Ĭ<><C4AC>Ϊ<EFBFBD><CEAA>ͨģʽ
uint8_t reg7d1val=0X80;
uint8_t reg7d2val=0X80;
switch(eft)
{
case 1://<2F><>Ƭ
reg7d0val=0X40;
break;
case 2://<2F>ڰ<EFBFBD>
reg7d0val=0X18;
break;
case 3://ƫ<><C6AB>ɫ
reg7d0val=0X18;
reg7d1val=0X40;
reg7d2val=0XC0;
break;
case 4://ƫ<><C6AB>ɫ
reg7d0val=0X18;
reg7d1val=0X40;
reg7d2val=0X40;
break;
case 5://ƫ<><C6AB>ɫ
reg7d0val=0X18;
reg7d1val=0XA0;
reg7d2val=0X40;
break;
case 6://<2F><><EFBFBD><EFBFBD>
reg7d0val=0X18;
reg7d1val=0X40;
reg7d2val=0XA6;
break;
}
SCCB_WR_Reg(0xff,0x00);
SCCB_WR_Reg(0x7c,0x00);
SCCB_WR_Reg(0x7d,reg7d0val);
SCCB_WR_Reg(0x7c,0x05);
SCCB_WR_Reg(0x7d,reg7d1val);
SCCB_WR_Reg(0x7d,reg7d2val);
}
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//sw:0,<2C>رղ<D8B1><D5B2><EFBFBD>
// 1,<2C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><><D7A2>OV2640<34>IJ<EFBFBD><C4B2><EFBFBD><EFBFBD>ǵ<EFBFBD><C7B5><EFBFBD><EFBFBD><EFBFBD>ͼ<EFBFBD><CDBC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)
void OV2640_Color_Bar(uint8_t sw)
{
uint8_t reg;
SCCB_WR_Reg(0XFF,0X01);
reg=SCCB_RD_Reg(0X12);
reg&=~(1<<1);
if(sw)reg|=1<<1;
SCCB_WR_Reg(0X12,reg);
}
//<2F><><EFBFBD><EFBFBD>ͼ<EFBFBD><CDBC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//sx,sy,<2C><>ʼ<EFBFBD><CABC>ַ
//width,height:<3A><><EFBFBD><EFBFBD>(<28><>Ӧ:horizontal)<29>͸߶<CDB8>(<28><>Ӧ:vertical)
void OV2640_Window_Set(uint16_t sx,uint16_t sy,uint16_t width,uint16_t height)
{
uint16_t endx;
uint16_t endy;
uint8_t temp;
endx=sx+width/2; //V*2
endy=sy+height/2;
SCCB_WR_Reg(0XFF,0X01);
temp=SCCB_RD_Reg(0X03); //<2F><>ȡVref֮ǰ<D6AE><C7B0>ֵ
temp&=0XF0;
temp|=((endy&0X03)<<2)|(sy&0X03);
SCCB_WR_Reg(0X03,temp); //<2F><><EFBFBD><EFBFBD>Vref<65><66>start<72><74>end<6E><64><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
SCCB_WR_Reg(0X19,sy>>2); //<2F><><EFBFBD><EFBFBD>Vref<65><66>start<72><74>
SCCB_WR_Reg(0X1A,endy>>2); //<2F><><EFBFBD><EFBFBD>Vref<65><66>end<6E>ĸ<EFBFBD>
temp=SCCB_RD_Reg(0X32); //<2F><>ȡHref֮ǰ<D6AE><C7B0>ֵ
temp&=0XC0;
temp|=((endx&0X07)<<3)|(sx&0X07);
SCCB_WR_Reg(0X32,temp); //<2F><><EFBFBD><EFBFBD>Href<65><66>start<72><74>end<6E><64><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
SCCB_WR_Reg(0X17,sx>>3); //<2F><><EFBFBD><EFBFBD>Href<65><66>start<72><74>
SCCB_WR_Reg(0X18,endx>>3); //<2F><><EFBFBD><EFBFBD>Href<65><66>end<6E>ĸ<EFBFBD>
}
//<2F><><EFBFBD><EFBFBD>ͼ<EFBFBD><CDBC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>С
//OV2640<34><30><EFBFBD><EFBFBD>ͼ<EFBFBD><CDBC><EFBFBD>Ĵ<EFBFBD>С(<28>ֱ<EFBFBD><D6B1><EFBFBD>),<2C><>ȫ<EFBFBD>ɸĺ<C9B8><C4BA><EFBFBD>ȷ<EFBFBD><C8B7>
//width,height:<3A><><EFBFBD><EFBFBD>(<28><>Ӧ:horizontal)<29>͸߶<CDB8>(<28><>Ӧ:vertical),width<74><68>height<68><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ı<EFBFBD><C4B1><EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ:0,<2C><><EFBFBD>óɹ<C3B3>
// <20><><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD>ʧ<EFBFBD><CAA7>
uint8_t OV2640_OutSize_Set(uint16_t width,uint16_t height)
{
uint16_t outh;
uint16_t outw;
uint8_t temp;
if(width%4)return 1;
if(height%4)return 2;
outw=width/4;
outh=height/4;
SCCB_WR_Reg(0XFF,0X00);
SCCB_WR_Reg(0XE0,0X04);
SCCB_WR_Reg(0X5A,outw&0XFF); //<2F><><EFBFBD><EFBFBD>OUTW<54>ĵͰ<C4B5>λ
SCCB_WR_Reg(0X5B,outh&0XFF); //<2F><><EFBFBD><EFBFBD>OUTH<54>ĵͰ<C4B5>λ
temp=(outw>>8)&0X03;
temp|=(outh>>6)&0X04;
SCCB_WR_Reg(0X5C,temp); //<2F><><EFBFBD><EFBFBD>OUTH/OUTW<54>ĸ<EFBFBD>λ
SCCB_WR_Reg(0XE0,0X00);
return 0;
}
//<2F><><EFBFBD><EFBFBD>ͼ<EFBFBD>񿪴<EFBFBD><F1BFAAB4><EFBFBD>С
//<2F><>:OV2640_ImageSize_Setȷ<74><C8B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֱ<EFBFBD><D6B1>ʴӴ<CAB4>С.
//<2F>ú<EFBFBD><C3BA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Χ<EFBFBD><CEA7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>п<EFBFBD><D0BF><EFBFBD>,<2C><><EFBFBD><EFBFBD>OV2640_OutSize_Set<65><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//ע<><D7A2>:<3A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ<EFBFBD><C4BF>Ⱥ͸߶<CDB8>,<2C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڵ<EFBFBD><DAB5><EFBFBD>OV2640_OutSize_Set<65><74><EFBFBD><EFBFBD><EFBFBD>Ŀ<EFBFBD><C4BF>Ⱥ͸߶<CDB8>
// OV2640_OutSize_Set<65><74><EFBFBD>õĿ<C3B5><C4BF>Ⱥ͸߶<CDB8>,<2C><><EFBFBD>ݱ<EFBFBD><DDB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>õĿ<C3B5><C4BF>Ⱥ͸߶<CDB8>,<2C><>DSP
// <20>Զ<EFBFBD><D4B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ű<EFBFBD><C5B1><EFBFBD>,<2C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ⲿ<EFBFBD>豸.
//width,height:<3A><><EFBFBD><EFBFBD>(<28><>Ӧ:horizontal)<29>͸߶<CDB8>(<28><>Ӧ:vertical),width<74><68>height<68><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ı<EFBFBD><C4B1><EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ:0,<2C><><EFBFBD>óɹ<C3B3>
// <20><><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD>ʧ<EFBFBD><CAA7>
uint8_t OV2640_ImageWin_Set(uint16_t offx,uint16_t offy,uint16_t width,uint16_t height)
{
uint16_t hsize;
uint16_t vsize;
uint8_t temp;
if(width%4)return 1;
if(height%4)return 2;
hsize=width/4;
vsize=height/4;
SCCB_WR_Reg(0XFF,0X00);
SCCB_WR_Reg(0XE0,0X04);
SCCB_WR_Reg(0X51,hsize&0XFF); //<2F><><EFBFBD><EFBFBD>H_SIZE<5A>ĵͰ<C4B5>λ
SCCB_WR_Reg(0X52,vsize&0XFF); //<2F><><EFBFBD><EFBFBD>V_SIZE<5A>ĵͰ<C4B5>λ
SCCB_WR_Reg(0X53,offx&0XFF); //<2F><><EFBFBD><EFBFBD>offx<66>ĵͰ<C4B5>λ
SCCB_WR_Reg(0X54,offy&0XFF); //<2F><><EFBFBD><EFBFBD>offy<66>ĵͰ<C4B5>λ
temp=(vsize>>1)&0X80;
temp|=(offy>>4)&0X70;
temp|=(hsize>>5)&0X08;
temp|=(offx>>8)&0X07;
SCCB_WR_Reg(0X55,temp); //<2F><><EFBFBD><EFBFBD>H_SIZE/V_SIZE/OFFX,OFFY<46>ĸ<EFBFBD>λ
SCCB_WR_Reg(0X57,(hsize>>2)&0X80); //<2F><><EFBFBD><EFBFBD>H_SIZE/V_SIZE/OFFX,OFFY<46>ĸ<EFBFBD>λ
SCCB_WR_Reg(0XE0,0X00);
return 0;
}
//<2F>ú<EFBFBD><C3BA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͼ<EFBFBD><CDBC><EFBFBD>ߴ<EFBFBD><DFB4><EFBFBD>С<><D2B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ѡ<EFBFBD><D1A1>ʽ<EFBFBD><CABD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֱ<EFBFBD><D6B1><EFBFBD>
//UXGA:1600*1200,SVGA:800*600,CIF:352*288
//width,height:ͼ<><CDBC><EFBFBD><EFBFBD><EFBFBD>Ⱥ<EFBFBD>ͼ<EFBFBD><CDBC><EFBFBD>߶<EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ:0,<2C><><EFBFBD>óɹ<C3B3>
// <20><><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD>ʧ<EFBFBD><CAA7>
uint8_t OV2640_ImageSize_Set(uint16_t width,uint16_t height)
{
uint8_t temp;
SCCB_WR_Reg(0XFF,0X00);
SCCB_WR_Reg(0XE0,0X04);
SCCB_WR_Reg(0XC0,(width)>>3&0XFF); //<2F><><EFBFBD><EFBFBD>HSIZE<5A><45>10:3λ
SCCB_WR_Reg(0XC1,(height)>>3&0XFF); //<2F><><EFBFBD><EFBFBD>VSIZE<5A><45>10:3λ
temp=(width&0X07)<<3;
temp|=height&0X07;
temp|=(width>>4)&0X80;
SCCB_WR_Reg(0X8C,temp);
SCCB_WR_Reg(0XE0,0X00);
return 0;
}

View File

@@ -0,0 +1,167 @@
#include "sys.h"
#include "sccb.h"
#include "stdio.h"
#include "delay.h"
void SCCB_SCL(uint8_t sccb_scl)
{
HAL_GPIO_WritePin(GPIOB, GPIO_PIN_4, (GPIO_PinState)sccb_scl);
}
void SCCB_SDA(uint8_t sccb_sda)
{
HAL_GPIO_WritePin(GPIOB, GPIO_PIN_5, (GPIO_PinState)sccb_sda);
}
uint8_t SCCB_READ_SDA()
{
uint8_t sccb_sda = HAL_GPIO_ReadPin(GPIOB, GPIO_PIN_5);
return sccb_sda;
}
//<2F><>ʼ<EFBFBD><CABC>SCCB<43>ӿ<EFBFBD>
void SCCB_Init(void)
{
GPIO_InitTypeDef GPIO_InitStructure;
__HAL_RCC_GPIOB_CLK_ENABLE(); //ʹ<><CAB9>GPIOBʱ<42><CAB1>
//GPIOF9,F10<31><30>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
GPIO_InitStructure.Pin = GPIO_PIN_4|GPIO_PIN_5;//PB4,5 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
GPIO_InitStructure.Mode = GPIO_MODE_OUTPUT_PP; //PB4,5 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
GPIO_InitStructure.Speed = GPIO_SPEED_FAST;//100MHz
GPIO_InitStructure.Pull = GPIO_PULLUP;//<2F><><EFBFBD><EFBFBD>
HAL_GPIO_Init(GPIOB, &GPIO_InitStructure);//<2F><>ʼ<EFBFBD><CABC>
SCCB_SDA_OUT();
}
//SCCB<43><42>ʼ<EFBFBD>ź<EFBFBD>
//<2F><>ʱ<EFBFBD><CAB1>Ϊ<EFBFBD>ߵ<EFBFBD>ʱ<EFBFBD><CAB1>,<2C><><EFBFBD><EFBFBD><EFBFBD>ߵĸߵ<C4B8><DFB5><EFBFBD>,ΪSCCB<43><42>ʼ<EFBFBD>ź<EFBFBD>
//<2F>ڼ<EFBFBD><DABC><EFBFBD>״̬<D7B4><CCAC>,SDA<44><41>SCL<43><4C>Ϊ<EFBFBD>͵<EFBFBD>ƽ
void SCCB_Start(void)
{
SCCB_SDA(1); //<2F><><EFBFBD><EFBFBD><EFBFBD>߸ߵ<DFB8>ƽ
SCCB_SCL(1); //<2F><>ʱ<EFBFBD><CAB1><EFBFBD>߸ߵ<DFB8>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɸ<EFBFBD><C9B8><EFBFBD><EFBFBD><EFBFBD>
delay_us(50);
SCCB_SDA(0);
delay_us(50);
SCCB_SCL(0); //<2F><><EFBFBD><EFBFBD><EFBFBD>߻ָ<DFBB><D6B8>͵<EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ
}
//SCCBֹͣ<CDA3>ź<EFBFBD>
//<2F><>ʱ<EFBFBD><CAB1>Ϊ<EFBFBD>ߵ<EFBFBD>ʱ<EFBFBD><CAB1>,<2C><><EFBFBD><EFBFBD><EFBFBD>ߵĵ͵<C4B5><CDB5><EFBFBD>,ΪSCCBֹͣ<CDA3>ź<EFBFBD>
//<2F><><EFBFBD><EFBFBD>״<EFBFBD><D7B4><EFBFBD><EFBFBD>,SDA,SCL<43><4C>Ϊ<EFBFBD>ߵ<EFBFBD>ƽ
void SCCB_Stop(void)
{
SCCB_SDA(0);
delay_us(50);
SCCB_SCL(1);
delay_us(50);
SCCB_SDA(1);
delay_us(50);
}
//<2F><><EFBFBD><EFBFBD>NA<4E>ź<EFBFBD>
void SCCB_No_Ack(void)
{
delay_us(50);
SCCB_SDA(1);
SCCB_SCL(1);
delay_us(50);
SCCB_SCL(0);
delay_us(50);
SCCB_SDA(0);
delay_us(50);
}
//SCCB,д<><D0B4>һ<EFBFBD><D2BB><EFBFBD>ֽ<EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ:0,<2C>ɹ<EFBFBD>;1,ʧ<><CAA7>.
uint8_t SCCB_WR_Byte(uint8_t dat)
{
uint8_t j,res;
for(j=0;j<8;j++) //ѭ<><D1AD>8<EFBFBD>η<EFBFBD><CEB7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
if(dat&0x80)SCCB_SDA(1);
else SCCB_SDA(0);
dat<<=1;
delay_us(50);
SCCB_SCL(1);
delay_us(50);
SCCB_SCL(0);
}
SCCB_SDA_IN(); //<2F><><EFBFBD><EFBFBD>SDAΪ<41><CEAA><EFBFBD><EFBFBD>
delay_us(50);
SCCB_SCL(1); //<2F><><EFBFBD>յھ<D5B5>λ,<2C><><EFBFBD>ж<EFBFBD><D0B6>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD>ͳɹ<CDB3>
delay_us(50);
if(SCCB_READ_SDA())res=1; //SDA=1<><31><EFBFBD><EFBFBD>ʧ<EFBFBD>ܣ<EFBFBD><DCA3><EFBFBD><EFBFBD><EFBFBD>1
else res=0; //SDA=0<><30><EFBFBD>ͳɹ<CDB3><C9B9><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>0
SCCB_SCL(0);
SCCB_SDA_OUT(); //<2F><><EFBFBD><EFBFBD>SDAΪ<41><CEAA><EFBFBD><EFBFBD>
return res;
}
//SCCB <20><>ȡһ<C8A1><D2BB><EFBFBD>ֽ<EFBFBD>
//<2F><>SCL<43><4C><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ:<3A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
uint8_t SCCB_RD_Byte(void)
{
uint8_t temp=0,j;
SCCB_SDA_IN(); //<2F><><EFBFBD><EFBFBD>SDAΪ<41><CEAA><EFBFBD><EFBFBD>
for(j=8;j>0;j--) //ѭ<><D1AD>8<EFBFBD>ν<EFBFBD><CEBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
delay_us(50);
SCCB_SCL(1);
temp=temp<<1;
if(SCCB_READ_SDA())temp++;
delay_us(50);
SCCB_SCL(0);
}
SCCB_SDA_OUT(); //<2F><><EFBFBD><EFBFBD>SDAΪ<41><CEAA><EFBFBD><EFBFBD>
return temp;
}
//д<>Ĵ<EFBFBD><C4B4><EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ:0,<2C>ɹ<EFBFBD>;1,ʧ<><CAA7>.
uint8_t SCCB_WR_Reg(uint8_t reg,uint8_t data)
{
uint8_t res=0;
SCCB_Start(); //<2F><><EFBFBD><EFBFBD>SCCB<43><42><EFBFBD><EFBFBD>
if(SCCB_WR_Byte(SCCB_ID)){
res=1; //д<><D0B4><EFBFBD><EFBFBD>ID
}
delay_us(100);
if(SCCB_WR_Byte(reg))res=1; //д<>Ĵ<EFBFBD><C4B4><EFBFBD><EFBFBD><EFBFBD>ַ
delay_us(100);
if(SCCB_WR_Byte(data))res=1; //д<><D0B4><EFBFBD><EFBFBD>
SCCB_Stop();
return res;
}
//<2F><><EFBFBD>Ĵ<EFBFBD><C4B4><EFBFBD>
//<2F><><EFBFBD><EFBFBD>ֵ:<3A><><EFBFBD><EFBFBD><EFBFBD>ļĴ<C4BC><C4B4><EFBFBD>ֵ
uint8_t SCCB_RD_Reg(uint8_t reg)
{
uint8_t val=0;
SCCB_Start(); //<2F><><EFBFBD><EFBFBD>SCCB<43><42><EFBFBD><EFBFBD>
SCCB_WR_Byte(SCCB_ID); //д<><D0B4><EFBFBD><EFBFBD>ID
delay_us(100);
SCCB_WR_Byte(reg); //д<>Ĵ<EFBFBD><C4B4><EFBFBD><EFBFBD><EFBFBD>ַ
delay_us(100);
SCCB_Stop();
delay_us(100);
//<2F><><EFBFBD>üĴ<C3BC><C4B4><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD>󣬲<EFBFBD><F3A3ACB2>Ƕ<EFBFBD>
SCCB_Start();
SCCB_WR_Byte(SCCB_ID|0X01); //<2F><><EFBFBD>Ͷ<EFBFBD><CDB6><EFBFBD><EFBFBD><EFBFBD>
delay_us(100);
val=SCCB_RD_Byte(); //<2F><>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD>
SCCB_No_Ack();
SCCB_Stop();
return val;
}

View File

@@ -0,0 +1,58 @@
/**
******************************************************************************
* File Name : DCMI.h
* Description : This file provides code for the configuration
* of the DCMI instances.
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Define to prevent recursive inclusion -------------------------------------*/
#ifndef __dcmi_H
#define __dcmi_H
#ifdef __cplusplus
extern "C" {
#endif
/* Includes ------------------------------------------------------------------*/
#include "main.h"
/* USER CODE BEGIN Includes */
/* USER CODE END Includes */
extern DCMI_HandleTypeDef hdcmi;
/* USER CODE BEGIN Private defines */
/* USER CODE END Private defines */
void MX_DCMI_Init(void);
/* USER CODE BEGIN Prototypes */
/* USER CODE END Prototypes */
#ifdef __cplusplus
}
#endif
#endif /*__ dcmi_H */
/**
* @}
*/
/**
* @}
*/
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -0,0 +1,56 @@
/**
******************************************************************************
* File Name : dma.h
* Description : This file contains all the function prototypes for
* the dma.c file
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Define to prevent recursive inclusion -------------------------------------*/
#ifndef __dma_H
#define __dma_H
#ifdef __cplusplus
extern "C" {
#endif
/* Includes ------------------------------------------------------------------*/
#include "main.h"
/* DMA memory to memory transfer handles -------------------------------------*/
/* USER CODE BEGIN Includes */
/* USER CODE END Includes */
/* USER CODE BEGIN Private defines */
/* USER CODE END Private defines */
void MX_DMA_Init(void);
/* USER CODE BEGIN Prototypes */
/* USER CODE END Prototypes */
#ifdef __cplusplus
}
#endif
#endif /* __dma_H */
/**
* @}
*/
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -1,12 +1,12 @@
/**
******************************************************************************
* File Name : gpio.h
* Description : This file contains all the functions prototypes for
* the gpio
* Description : This file contains all the functions prototypes for
* the gpio
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2019 STMicroelectronics.
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,

View File

@@ -0,0 +1,58 @@
/**
******************************************************************************
* File Name : I2C.h
* Description : This file provides code for the configuration
* of the I2C instances.
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Define to prevent recursive inclusion -------------------------------------*/
#ifndef __i2c_H
#define __i2c_H
#ifdef __cplusplus
extern "C" {
#endif
/* Includes ------------------------------------------------------------------*/
#include "main.h"
/* USER CODE BEGIN Includes */
/* USER CODE END Includes */
extern I2C_HandleTypeDef hi2c1;
/* USER CODE BEGIN Private defines */
/* USER CODE END Private defines */
void MX_I2C1_Init(void);
/* USER CODE BEGIN Prototypes */
/* USER CODE END Prototypes */
#ifdef __cplusplus
}
#endif
#endif /*__ i2c_H */
/**
* @}
*/
/**
* @}
*/
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -60,20 +60,34 @@ void Error_Handler(void);
/* Private defines -----------------------------------------------------------*/
#define B1_Pin GPIO_PIN_13
#define B1_GPIO_Port GPIOC
#define LD3_Pin GPIO_PIN_14
#define LD3_GPIO_Port GPIOB
#define LCD_CLK_Pin GPIO_PIN_5
#define LCD_CLK_GPIO_Port GPIOA
#define LCD_DIN_Pin GPIO_PIN_7
#define LCD_DIN_GPIO_Port GPIOA
#define LCD_DC_Pin GPIO_PIN_12
#define LCD_DC_GPIO_Port GPIOB
#define LED_Pin GPIO_PIN_14
#define LED_GPIO_Port GPIOB
#define USB_OverCurrent_Pin GPIO_PIN_5
#define USB_OverCurrent_GPIO_Port GPIOG
#define USB_PowerSwitchOn_Pin GPIO_PIN_6
#define USB_PowerSwitchOn_GPIO_Port GPIOG
#define STLK_RX_Pin GPIO_PIN_7
#define STLK_RX_GPIO_Port GPIOG
#define STLK_TX_Pin GPIO_PIN_8
#define STLK_TX_GPIO_Port GPIOG
#define LCD_RST_Pin GPIO_PIN_11
#define LCD_RST_GPIO_Port GPIOA
#define LCD_CS_Pin GPIO_PIN_12
#define LCD_CS_GPIO_Port GPIOA
#define TMS_Pin GPIO_PIN_13
#define TMS_GPIO_Port GPIOA
#define TCK_Pin GPIO_PIN_14
#define TCK_GPIO_Port GPIOA
#define SWO_Pin GPIO_PIN_3
#define SWO_GPIO_Port GPIOB
#define LD2_Pin GPIO_PIN_7
#define LD2_GPIO_Port GPIOB
#define LCD_BL_Pin GPIO_PIN_6
#define LCD_BL_GPIO_Port GPIOB
/* USER CODE BEGIN Private defines */
/* USER CODE END Private defines */

View File

@@ -8,8 +8,14 @@
#include "stm32l4xx_hal.h"
#include "usart.h"
#include "gpio.h"
#include "dcmi.h"
#include "dma.h"
#include "i2c.h"
#include "spi.h"
#include "tim.h"
#include "ov2640.h"
#include "lcd_2inch4.h"
#include "tos_k.h"
void board_init(void);
void SystemClock_Config(void);

View File

@@ -0,0 +1,58 @@
/**
******************************************************************************
* File Name : SPI.h
* Description : This file provides code for the configuration
* of the SPI instances.
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Define to prevent recursive inclusion -------------------------------------*/
#ifndef __spi_H
#define __spi_H
#ifdef __cplusplus
extern "C" {
#endif
/* Includes ------------------------------------------------------------------*/
#include "main.h"
/* USER CODE BEGIN Includes */
/* USER CODE END Includes */
extern SPI_HandleTypeDef hspi1;
/* USER CODE BEGIN Private defines */
/* USER CODE END Private defines */
void MX_SPI1_Init(void);
/* USER CODE BEGIN Prototypes */
/* USER CODE END Prototypes */
#ifdef __cplusplus
}
#endif
#endif /*__ spi_H */
/**
* @}
*/
/**
* @}
*/
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -1,11 +1,11 @@
/**
******************************************************************************
* @file stm32l4xx_hal_conf.h
* @brief HAL configuration file.
* @brief HAL configuration file.
******************************************************************************
* @attention
*
* <h2><center>&copy; COPYRIGHT(c) 2019 STMicroelectronics</center></h2>
* <h2><center>&copy; COPYRIGHT(c) 2020 STMicroelectronics</center></h2>
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@@ -30,7 +30,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************
*/
*/
/* Define to prevent recursive inclusion -------------------------------------*/
#ifndef __STM32L4xx_HAL_CONF_H
@@ -45,10 +45,10 @@
/* ########################## Module Selection ############################## */
/**
* @brief This is the list of modules to be used in the HAL driver
* @brief This is the list of modules to be used in the HAL driver
*/
#define HAL_MODULE_ENABLED
#define HAL_MODULE_ENABLED
/*#define HAL_ADC_MODULE_ENABLED */
/*#define HAL_CRYP_MODULE_ENABLED */
/*#define HAL_CAN_MODULE_ENABLED */
@@ -56,7 +56,7 @@
/*#define HAL_CRC_MODULE_ENABLED */
/*#define HAL_CRYP_MODULE_ENABLED */
/*#define HAL_DAC_MODULE_ENABLED */
/*#define HAL_DCMI_MODULE_ENABLED */
#define HAL_DCMI_MODULE_ENABLED
/*#define HAL_DMA2D_MODULE_ENABLED */
/*#define HAL_DFSDM_MODULE_ENABLED */
/*#define HAL_DSI_MODULE_ENABLED */
@@ -85,17 +85,17 @@
/*#define HAL_SD_MODULE_ENABLED */
/*#define HAL_SMBUS_MODULE_ENABLED */
/*#define HAL_SMARTCARD_MODULE_ENABLED */
/*#define HAL_SPI_MODULE_ENABLED */
#define HAL_SPI_MODULE_ENABLED
/*#define HAL_SRAM_MODULE_ENABLED */
/*#define HAL_SWPMI_MODULE_ENABLED */
/*#define HAL_TIM_MODULE_ENABLED */
#define HAL_TIM_MODULE_ENABLED
/*#define HAL_TSC_MODULE_ENABLED */
#define HAL_UART_MODULE_ENABLED
/*#define HAL_USART_MODULE_ENABLED */
/*#define HAL_WWDG_MODULE_ENABLED */
/*#define HAL_EXTI_MODULE_ENABLED */
#define HAL_GPIO_MODULE_ENABLED
#define HAL_EXTI_MODULE_ENABLED
#define HAL_EXTI_MODULE_ENABLED
#define HAL_I2C_MODULE_ENABLED
#define HAL_DMA_MODULE_ENABLED
#define HAL_RCC_MODULE_ENABLED
@@ -107,9 +107,9 @@
/**
* @brief Adjust the value of External High Speed oscillator (HSE) used in your application.
* This value is used by the RCC HAL module to compute the system frequency
* (when HSE is used as system clock source, directly or through the PLL).
* (when HSE is used as system clock source, directly or through the PLL).
*/
#if !defined (HSE_VALUE)
#if !defined (HSE_VALUE)
#define HSE_VALUE ((uint32_t)8000000U) /*!< Value of the External oscillator in Hz */
#endif /* HSE_VALUE */
@@ -127,7 +127,7 @@
/**
* @brief Internal High Speed oscillator (HSI) value.
* This value is used by the RCC HAL module to compute the system frequency
* (when HSI is used as system clock source, directly or through the PLL).
* (when HSI is used as system clock source, directly or through the PLL).
*/
#if !defined (HSI_VALUE)
#define HSI_VALUE ((uint32_t)16000000U) /*!< Value of the Internal oscillator in Hz*/
@@ -140,7 +140,7 @@
* When the CRS is not used, the HSI48 RC oscillator runs on it default frequency
* which is subject to manufacturing process variations.
*/
#if !defined (HSI48_VALUE)
#if !defined (HSI48_VALUE)
#define HSI48_VALUE ((uint32_t)48000000U) /*!< Value of the Internal High Speed oscillator for USB FS/SDMMC/RNG in Hz.
The real value my vary depending on manufacturing process variations.*/
#endif /* HSI48_VALUE */
@@ -148,7 +148,7 @@
/**
* @brief Internal Low Speed oscillator (LSI) value.
*/
#if !defined (LSI_VALUE)
#if !defined (LSI_VALUE)
#define LSI_VALUE ((uint32_t)32000U) /*!< LSI Typical Value in Hz*/
#endif /* LSI_VALUE */ /*!< Value of the Internal Low Speed oscillator in Hz
The real value may vary depending on the variations
@@ -168,7 +168,7 @@
/**
* @brief External clock source for SAI1 peripheral
* This value is used by the RCC HAL module to compute the SAI1 & SAI2 clock source
* This value is used by the RCC HAL module to compute the SAI1 & SAI2 clock source
* frequency.
*/
#if !defined (EXTERNAL_SAI1_CLOCK_VALUE)
@@ -177,7 +177,7 @@
/**
* @brief External clock source for SAI2 peripheral
* This value is used by the RCC HAL module to compute the SAI1 & SAI2 clock source
* This value is used by the RCC HAL module to compute the SAI1 & SAI2 clock source
* frequency.
*/
#if !defined (EXTERNAL_SAI2_CLOCK_VALUE)
@@ -190,18 +190,18 @@
/* ########################### System Configuration ######################### */
/**
* @brief This is the HAL system configuration section
*/
#define VDD_VALUE ((uint32_t)3300U) /*!< Value of VDD in mv */
#define TICK_INT_PRIORITY ((uint32_t)0U) /*!< tick interrupt priority */
#define USE_RTOS 0U
*/
#define VDD_VALUE ((uint32_t)3300U) /*!< Value of VDD in mv */
#define TICK_INT_PRIORITY ((uint32_t)0U) /*!< tick interrupt priority */
#define USE_RTOS 0U
#define PREFETCH_ENABLE 0U
#define INSTRUCTION_CACHE_ENABLE 1U
#define DATA_CACHE_ENABLE 1U
/* ########################## Assert Selection ############################## */
/**
* @brief Uncomment the line below to expanse the "assert_param" macro in the
* @brief Uncomment the line below to expanse the "assert_param" macro in the
* HAL drivers code
*/
/* #define USE_FULL_ASSERT 1U */

View File

@@ -24,7 +24,7 @@
#ifdef __cplusplus
extern "C" {
#endif
#endif
/* Private includes ----------------------------------------------------------*/
/* USER CODE BEGIN Includes */
@@ -56,6 +56,8 @@ void SVC_Handler(void);
void DebugMon_Handler(void);
void PendSV_Handler(void);
void SysTick_Handler(void);
void DMA2_Channel6_IRQHandler(void);
void DCMI_IRQHandler(void);
/* USER CODE BEGIN EFP */
/* USER CODE END EFP */

View File

@@ -0,0 +1,60 @@
/**
******************************************************************************
* File Name : TIM.h
* Description : This file provides code for the configuration
* of the TIM instances.
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Define to prevent recursive inclusion -------------------------------------*/
#ifndef __tim_H
#define __tim_H
#ifdef __cplusplus
extern "C" {
#endif
/* Includes ------------------------------------------------------------------*/
#include "main.h"
/* USER CODE BEGIN Includes */
/* USER CODE END Includes */
extern TIM_HandleTypeDef htim4;
/* USER CODE BEGIN Private defines */
/* USER CODE END Private defines */
void MX_TIM4_Init(void);
void HAL_TIM_MspPostInit(TIM_HandleTypeDef *htim);
/* USER CODE BEGIN Prototypes */
/* USER CODE END Prototypes */
#ifdef __cplusplus
}
#endif
#endif /*__ tim_H */
/**
* @}
*/
/**
* @}
*/
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -6,7 +6,7 @@
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2019 STMicroelectronics.
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
@@ -44,6 +44,7 @@ void MX_USART1_UART_Init(void);
void MX_USART2_UART_Init(void);
void MX_USART3_UART_Init(void);
/* USER CODE BEGIN Prototypes */
/* USER CODE END Prototypes */

View File

@@ -0,0 +1,208 @@
/**
******************************************************************************
* File Name : DCMI.c
* Description : This file provides code for the configuration
* of the DCMI instances.
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Includes ------------------------------------------------------------------*/
#include "dcmi.h"
/* USER CODE BEGIN 0 */
/* USER CODE END 0 */
DCMI_HandleTypeDef hdcmi;
DMA_HandleTypeDef hdma_dcmi;
/* DCMI init function */
void MX_DCMI_Init(void)
{
hdcmi.Instance = DCMI;
hdcmi.Init.SynchroMode = DCMI_SYNCHRO_HARDWARE;
hdcmi.Init.PCKPolarity = DCMI_PCKPOLARITY_RISING;
hdcmi.Init.VSPolarity = DCMI_VSPOLARITY_LOW;
hdcmi.Init.HSPolarity = DCMI_HSPOLARITY_LOW;
hdcmi.Init.CaptureRate = DCMI_CR_ALL_FRAME;
hdcmi.Init.ExtendedDataMode = DCMI_EXTEND_DATA_8B;
hdcmi.Init.JPEGMode = DCMI_JPEG_ENABLE;
hdcmi.Init.ByteSelectMode = DCMI_BSM_ALL;
hdcmi.Init.ByteSelectStart = DCMI_OEBS_ODD;
hdcmi.Init.LineSelectMode = DCMI_LSM_ALL;
hdcmi.Init.LineSelectStart = DCMI_OELS_ODD;
if (HAL_DCMI_Init(&hdcmi) != HAL_OK)
{
Error_Handler();
}
}
void HAL_DCMI_MspInit(DCMI_HandleTypeDef* dcmiHandle)
{
GPIO_InitTypeDef GPIO_InitStruct = {0};
if(dcmiHandle->Instance==DCMI)
{
/* USER CODE BEGIN DCMI_MspInit 0 */
/* USER CODE END DCMI_MspInit 0 */
/* DCMI clock enable */
__HAL_RCC_DCMI_CLK_ENABLE();
__HAL_RCC_GPIOE_CLK_ENABLE();
__HAL_RCC_GPIOA_CLK_ENABLE();
__HAL_RCC_GPIOD_CLK_ENABLE();
__HAL_RCC_GPIOC_CLK_ENABLE();
__HAL_RCC_GPIOB_CLK_ENABLE();
/**DCMI GPIO Configuration
PE4 ------> DCMI_D4
PE5 ------> DCMI_D6
PE6 ------> DCMI_D7
PA4 ------> DCMI_HSYNC
PD9 ------> DCMI_PIXCLK
PC6 ------> DCMI_D0
PC7 ------> DCMI_D1
PC8 ------> DCMI_D2
PC9 ------> DCMI_D3
PD3 ------> DCMI_D5
PB7 ------> DCMI_VSYNC
*/
GPIO_InitStruct.Pin = GPIO_PIN_4|GPIO_PIN_5|GPIO_PIN_6;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
GPIO_InitStruct.Alternate = GPIO_AF10_DCMI;
HAL_GPIO_Init(GPIOE, &GPIO_InitStruct);
GPIO_InitStruct.Pin = GPIO_PIN_4;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
GPIO_InitStruct.Alternate = GPIO_AF10_DCMI;
HAL_GPIO_Init(GPIOA, &GPIO_InitStruct);
GPIO_InitStruct.Pin = GPIO_PIN_9;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
GPIO_InitStruct.Alternate = GPIO_AF10_DCMI;
HAL_GPIO_Init(GPIOD, &GPIO_InitStruct);
GPIO_InitStruct.Pin = GPIO_PIN_6|GPIO_PIN_7|GPIO_PIN_8;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
GPIO_InitStruct.Alternate = GPIO_AF10_DCMI;
HAL_GPIO_Init(GPIOC, &GPIO_InitStruct);
GPIO_InitStruct.Pin = GPIO_PIN_9;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
GPIO_InitStruct.Alternate = GPIO_AF4_DCMI;
HAL_GPIO_Init(GPIOC, &GPIO_InitStruct);
GPIO_InitStruct.Pin = GPIO_PIN_3;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
GPIO_InitStruct.Alternate = GPIO_AF4_DCMI;
HAL_GPIO_Init(GPIOD, &GPIO_InitStruct);
GPIO_InitStruct.Pin = GPIO_PIN_7;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
GPIO_InitStruct.Alternate = GPIO_AF10_DCMI;
HAL_GPIO_Init(GPIOB, &GPIO_InitStruct);
/* DCMI DMA Init */
/* DCMI Init */
hdma_dcmi.Instance = DMA2_Channel6;
hdma_dcmi.Init.Request = DMA_REQUEST_0;
hdma_dcmi.Init.Direction = DMA_PERIPH_TO_MEMORY;
hdma_dcmi.Init.PeriphInc = DMA_PINC_DISABLE;
hdma_dcmi.Init.MemInc = DMA_MINC_ENABLE;
hdma_dcmi.Init.PeriphDataAlignment = DMA_PDATAALIGN_WORD;
hdma_dcmi.Init.MemDataAlignment = DMA_MDATAALIGN_WORD;
hdma_dcmi.Init.Mode = DMA_CIRCULAR;
hdma_dcmi.Init.Priority = DMA_PRIORITY_HIGH;
if (HAL_DMA_Init(&hdma_dcmi) != HAL_OK)
{
Error_Handler();
}
__HAL_LINKDMA(dcmiHandle,DMA_Handle,hdma_dcmi);
/* DCMI interrupt Init */
HAL_NVIC_SetPriority(DCMI_IRQn, 0, 0);
HAL_NVIC_EnableIRQ(DCMI_IRQn);
/* USER CODE BEGIN DCMI_MspInit 1 */
/* USER CODE END DCMI_MspInit 1 */
}
}
void HAL_DCMI_MspDeInit(DCMI_HandleTypeDef* dcmiHandle)
{
if(dcmiHandle->Instance==DCMI)
{
/* USER CODE BEGIN DCMI_MspDeInit 0 */
/* USER CODE END DCMI_MspDeInit 0 */
/* Peripheral clock disable */
__HAL_RCC_DCMI_CLK_DISABLE();
/**DCMI GPIO Configuration
PE4 ------> DCMI_D4
PE5 ------> DCMI_D6
PE6 ------> DCMI_D7
PA4 ------> DCMI_HSYNC
PD9 ------> DCMI_PIXCLK
PC6 ------> DCMI_D0
PC7 ------> DCMI_D1
PC8 ------> DCMI_D2
PC9 ------> DCMI_D3
PD3 ------> DCMI_D5
PB7 ------> DCMI_VSYNC
*/
HAL_GPIO_DeInit(GPIOE, GPIO_PIN_4|GPIO_PIN_5|GPIO_PIN_6);
HAL_GPIO_DeInit(GPIOA, GPIO_PIN_4);
HAL_GPIO_DeInit(GPIOD, GPIO_PIN_9|GPIO_PIN_3);
HAL_GPIO_DeInit(GPIOC, GPIO_PIN_6|GPIO_PIN_7|GPIO_PIN_8|GPIO_PIN_9);
HAL_GPIO_DeInit(GPIOB, GPIO_PIN_7);
/* DCMI DMA DeInit */
HAL_DMA_DeInit(dcmiHandle->DMA_Handle);
/* DCMI interrupt Deinit */
HAL_NVIC_DisableIRQ(DCMI_IRQn);
/* USER CODE BEGIN DCMI_MspDeInit 1 */
/* USER CODE END DCMI_MspDeInit 1 */
}
}
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -0,0 +1,63 @@
/**
******************************************************************************
* File Name : dma.c
* Description : This file provides code for the configuration
* of all the requested memory to memory DMA transfers.
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Includes ------------------------------------------------------------------*/
#include "dma.h"
/* USER CODE BEGIN 0 */
/* USER CODE END 0 */
/*----------------------------------------------------------------------------*/
/* Configure DMA */
/*----------------------------------------------------------------------------*/
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/**
* Enable DMA controller clock
*/
void MX_DMA_Init(void)
{
/* DMA controller clock enable */
__HAL_RCC_DMA2_CLK_ENABLE();
/* DMA interrupt init */
/* DMA2_Channel6_IRQn interrupt configuration */
HAL_NVIC_SetPriority(DMA2_Channel6_IRQn, 0, 0);
HAL_NVIC_EnableIRQ(DMA2_Channel6_IRQn);
}
/* USER CODE BEGIN 2 */
/* USER CODE END 2 */
/**
* @}
*/
/**
* @}
*/
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -6,7 +6,7 @@
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2019 STMicroelectronics.
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
@@ -30,9 +30,9 @@
/* USER CODE END 1 */
/** Configure pins as
* Analog
* Input
/** Configure pins as
* Analog
* Input
* Output
* EVENT_OUT
* EXTI
@@ -43,30 +43,33 @@ void MX_GPIO_Init(void)
GPIO_InitTypeDef GPIO_InitStruct = {0};
/* GPIO Ports Clock Enable */
__HAL_RCC_GPIOE_CLK_ENABLE();
__HAL_RCC_GPIOC_CLK_ENABLE();
__HAL_RCC_GPIOH_CLK_ENABLE();
__HAL_RCC_GPIOA_CLK_ENABLE();
__HAL_RCC_GPIOB_CLK_ENABLE();
__HAL_RCC_GPIOD_CLK_ENABLE();
__HAL_RCC_GPIOG_CLK_ENABLE();
HAL_PWREx_EnableVddIO2();
__HAL_RCC_GPIOA_CLK_ENABLE();
__HAL_RCC_GPIOD_CLK_ENABLE();
/*Configure GPIO pin Output Level */
HAL_GPIO_WritePin(GPIOB, LD3_Pin|LD2_Pin, GPIO_PIN_RESET);
HAL_GPIO_WritePin(GPIOB, GPIO_PIN_12|GPIO_PIN_14, GPIO_PIN_RESET);
/*Configure GPIO pin : PtPin */
GPIO_InitStruct.Pin = B1_Pin;
GPIO_InitStruct.Mode = GPIO_MODE_IT_RISING;
GPIO_InitStruct.Pull = GPIO_NOPULL;
HAL_GPIO_Init(B1_GPIO_Port, &GPIO_InitStruct);
/*Configure GPIO pin Output Level */
HAL_GPIO_WritePin(GPIOA, GPIO_PIN_11|GPIO_PIN_12, GPIO_PIN_RESET);
/*Configure GPIO pins : PBPin PBPin */
GPIO_InitStruct.Pin = LD3_Pin|LD2_Pin;
/*Configure GPIO pins : PB12 PB14 */
GPIO_InitStruct.Pin = GPIO_PIN_12|GPIO_PIN_14;
GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
HAL_GPIO_Init(GPIOB, &GPIO_InitStruct);
/*Configure GPIO pins : PA11 PA12 */
GPIO_InitStruct.Pin = GPIO_PIN_11|GPIO_PIN_12;
GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
HAL_GPIO_Init(GPIOA, &GPIO_InitStruct);
}

View File

@@ -0,0 +1,121 @@
/**
******************************************************************************
* File Name : I2C.c
* Description : This file provides code for the configuration
* of the I2C instances.
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Includes ------------------------------------------------------------------*/
#include "i2c.h"
/* USER CODE BEGIN 0 */
/* USER CODE END 0 */
I2C_HandleTypeDef hi2c1;
/* I2C1 init function */
void MX_I2C1_Init(void)
{
hi2c1.Instance = I2C1;
hi2c1.Init.Timing = 0x10909CEC;
hi2c1.Init.OwnAddress1 = 0;
hi2c1.Init.AddressingMode = I2C_ADDRESSINGMODE_7BIT;
hi2c1.Init.DualAddressMode = I2C_DUALADDRESS_DISABLE;
hi2c1.Init.OwnAddress2 = 0;
hi2c1.Init.OwnAddress2Masks = I2C_OA2_NOMASK;
hi2c1.Init.GeneralCallMode = I2C_GENERALCALL_DISABLE;
hi2c1.Init.NoStretchMode = I2C_NOSTRETCH_DISABLE;
if (HAL_I2C_Init(&hi2c1) != HAL_OK)
{
Error_Handler();
}
/** Configure Analogue filter
*/
if (HAL_I2CEx_ConfigAnalogFilter(&hi2c1, I2C_ANALOGFILTER_ENABLE) != HAL_OK)
{
Error_Handler();
}
/** Configure Digital filter
*/
if (HAL_I2CEx_ConfigDigitalFilter(&hi2c1, 0) != HAL_OK)
{
Error_Handler();
}
}
void HAL_I2C_MspInit(I2C_HandleTypeDef* i2cHandle)
{
GPIO_InitTypeDef GPIO_InitStruct = {0};
if(i2cHandle->Instance==I2C1)
{
/* USER CODE BEGIN I2C1_MspInit 0 */
/* USER CODE END I2C1_MspInit 0 */
__HAL_RCC_GPIOG_CLK_ENABLE();
HAL_PWREx_EnableVddIO2();
/**I2C1 GPIO Configuration
PG13 ------> I2C1_SDA
PG14 ------> I2C1_SCL
*/
GPIO_InitStruct.Pin = GPIO_PIN_13|GPIO_PIN_14;
GPIO_InitStruct.Mode = GPIO_MODE_AF_OD;
GPIO_InitStruct.Pull = GPIO_PULLUP;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
GPIO_InitStruct.Alternate = GPIO_AF4_I2C1;
HAL_GPIO_Init(GPIOG, &GPIO_InitStruct);
/* I2C1 clock enable */
__HAL_RCC_I2C1_CLK_ENABLE();
/* USER CODE BEGIN I2C1_MspInit 1 */
/* USER CODE END I2C1_MspInit 1 */
}
}
void HAL_I2C_MspDeInit(I2C_HandleTypeDef* i2cHandle)
{
if(i2cHandle->Instance==I2C1)
{
/* USER CODE BEGIN I2C1_MspDeInit 0 */
/* USER CODE END I2C1_MspDeInit 0 */
/* Peripheral clock disable */
__HAL_RCC_I2C1_CLK_DISABLE();
/**I2C1 GPIO Configuration
PG13 ------> I2C1_SDA
PG14 ------> I2C1_SCL
*/
HAL_GPIO_DeInit(GPIOG, GPIO_PIN_13);
HAL_GPIO_DeInit(GPIOG, GPIO_PIN_14);
/* USER CODE BEGIN I2C1_MspDeInit 1 */
/* USER CODE END I2C1_MspDeInit 1 */
}
}
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -22,4 +22,3 @@ int main(void)
osThreadCreate(osThread(application_entry), NULL); // Create TOS Tiny task
osKernelStart(); // Start TOS Tiny
}

View File

@@ -1,5 +1,12 @@
#include "mcu_init.h"
uint16_t camBuffer[OV2640_PIXEL_WIDTH*OV2640_PIXEL_HEIGHT];
uint8_t frame_flag = 0;
uint8_t tensor_flag = 0;
extern DCMI_HandleTypeDef hdcmi;
int fputc(int ch, FILE *f)
{
if (ch == '\n') {
@@ -29,9 +36,24 @@ void board_init(void)
HAL_Init();
SystemClock_Config();
MX_GPIO_Init();
MX_DMA_Init();
MX_LPUART1_UART_Init();
MX_USART2_UART_Init();
MX_USART3_UART_Init();
MX_DCMI_Init();
MX_I2C1_Init();
MX_SPI1_Init();
MX_TIM4_Init();
LCD_2IN4_Init();
OV2640_Init();
OV2640_RGB565_Mode();
OV2640_OutSize_Set(OV2640_PIXEL_WIDTH,OV2640_PIXEL_HEIGHT);
__HAL_DCMI_DISABLE_IT(&hdcmi, DCMI_IT_LINE | DCMI_IT_VSYNC);
if (HAL_DCMI_Start_DMA(&hdcmi, DCMI_MODE_CONTINUOUS, (uint32_t)camBuffer , (OV2640_PIXEL_WIDTH*OV2640_PIXEL_HEIGHT)/2))
{
Error_Handler();
}
//setup(); //tensorflow init
}
/**
@@ -44,7 +66,7 @@ void SystemClock_Config(void)
RCC_ClkInitTypeDef RCC_ClkInitStruct = {0};
RCC_PeriphCLKInitTypeDef PeriphClkInit = {0};
/** Initializes the CPU, AHB and APB busses clocks
/** Initializes the CPU, AHB and APB busses clocks
*/
RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_MSI;
RCC_OscInitStruct.MSIState = RCC_MSI_ON;
@@ -61,29 +83,29 @@ void SystemClock_Config(void)
{
Error_Handler();
}
/** Initializes the CPU, AHB and APB busses clocks
/** Initializes the CPU, AHB and APB busses clocks
*/
RCC_ClkInitStruct.ClockType = RCC_CLOCKTYPE_HCLK|RCC_CLOCKTYPE_SYSCLK
|RCC_CLOCKTYPE_PCLK1|RCC_CLOCKTYPE_PCLK2;
RCC_ClkInitStruct.SYSCLKSource = RCC_SYSCLKSOURCE_PLLCLK;
RCC_ClkInitStruct.AHBCLKDivider = RCC_SYSCLK_DIV1;
RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV2;
RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV1;
RCC_ClkInitStruct.APB2CLKDivider = RCC_HCLK_DIV1;
if (HAL_RCC_ClockConfig(&RCC_ClkInitStruct, FLASH_LATENCY_4) != HAL_OK)
{
Error_Handler();
}
PeriphClkInit.PeriphClockSelection = RCC_PERIPHCLK_USART2|RCC_PERIPHCLK_USART3
|RCC_PERIPHCLK_LPUART1;
PeriphClkInit.Usart2ClockSelection = RCC_USART2CLKSOURCE_PCLK1;
PeriphClkInit.PeriphClockSelection = RCC_PERIPHCLK_LPUART1|RCC_PERIPHCLK_USART2|RCC_PERIPHCLK_USART3|RCC_PERIPHCLK_I2C1;
PeriphClkInit.Lpuart1ClockSelection = RCC_LPUART1CLKSOURCE_PCLK1;
PeriphClkInit.Usart2ClockSelection = RCC_USART2CLKSOURCE_PCLK1;
PeriphClkInit.Usart3ClockSelection = RCC_USART3CLKSOURCE_PCLK1;
PeriphClkInit.Lpuart1ClockSelection = RCC_LPUART1CLKSOURCE_PCLK1;
PeriphClkInit.I2c1ClockSelection = RCC_I2C1CLKSOURCE_PCLK1;
if (HAL_RCCEx_PeriphCLKConfig(&PeriphClkInit) != HAL_OK)
{
Error_Handler();
}
/** Configure the main internal regulator output voltage
/** Configure the main internal regulator output voltage
*/
if (HAL_PWREx_ControlVoltageScaling(PWR_REGULATOR_VOLTAGE_SCALE1) != HAL_OK)
{
@@ -92,7 +114,12 @@ void SystemClock_Config(void)
}
/* USER CODE BEGIN 4 */
void HAL_DCMI_FrameEventCallback(DCMI_HandleTypeDef *hdcmi)
{
if(hdcmi->State == 2 && frame_flag != 1){
frame_flag = 1;
}
}
/* USER CODE END 4 */
/**
@@ -116,7 +143,7 @@ void Error_Handler(void)
* @retval None
*/
void assert_failed(char *file, uint32_t line)
{
{
/* USER CODE BEGIN 6 */
/* User can add his own implementation to report the file name and line number,
tex: printf("Wrong parameters value: file %s on line %d\r\n", file, line) */
@@ -125,3 +152,4 @@ void assert_failed(char *file, uint32_t line)
#endif /* USE_FULL_ASSERT */
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -0,0 +1,113 @@
/**
******************************************************************************
* File Name : SPI.c
* Description : This file provides code for the configuration
* of the SPI instances.
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Includes ------------------------------------------------------------------*/
#include "spi.h"
/* USER CODE BEGIN 0 */
/* USER CODE END 0 */
SPI_HandleTypeDef hspi1;
/* SPI1 init function */
void MX_SPI1_Init(void)
{
hspi1.Instance = SPI1;
hspi1.Init.Mode = SPI_MODE_MASTER;
hspi1.Init.Direction = SPI_DIRECTION_2LINES;
hspi1.Init.DataSize = SPI_DATASIZE_8BIT;
hspi1.Init.CLKPolarity = SPI_POLARITY_LOW;
hspi1.Init.CLKPhase = SPI_PHASE_1EDGE;
hspi1.Init.NSS = SPI_NSS_SOFT;
hspi1.Init.BaudRatePrescaler = SPI_BAUDRATEPRESCALER_2;
hspi1.Init.FirstBit = SPI_FIRSTBIT_MSB;
hspi1.Init.TIMode = SPI_TIMODE_DISABLE;
hspi1.Init.CRCCalculation = SPI_CRCCALCULATION_DISABLE;
hspi1.Init.CRCPolynomial = 7;
hspi1.Init.CRCLength = SPI_CRC_LENGTH_DATASIZE;
hspi1.Init.NSSPMode = SPI_NSS_PULSE_ENABLE;
if (HAL_SPI_Init(&hspi1) != HAL_OK)
{
Error_Handler();
}
}
void HAL_SPI_MspInit(SPI_HandleTypeDef* spiHandle)
{
GPIO_InitTypeDef GPIO_InitStruct = {0};
if(spiHandle->Instance==SPI1)
{
/* USER CODE BEGIN SPI1_MspInit 0 */
/* USER CODE END SPI1_MspInit 0 */
/* SPI1 clock enable */
__HAL_RCC_SPI1_CLK_ENABLE();
__HAL_RCC_GPIOA_CLK_ENABLE();
/**SPI1 GPIO Configuration
PA5 ------> SPI1_SCK
PA6 ------> SPI1_MISO
PA7 ------> SPI1_MOSI
*/
GPIO_InitStruct.Pin = GPIO_PIN_5|GPIO_PIN_6|GPIO_PIN_7;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
GPIO_InitStruct.Alternate = GPIO_AF5_SPI1;
HAL_GPIO_Init(GPIOA, &GPIO_InitStruct);
/* USER CODE BEGIN SPI1_MspInit 1 */
/* USER CODE END SPI1_MspInit 1 */
}
}
void HAL_SPI_MspDeInit(SPI_HandleTypeDef* spiHandle)
{
if(spiHandle->Instance==SPI1)
{
/* USER CODE BEGIN SPI1_MspDeInit 0 */
/* USER CODE END SPI1_MspDeInit 0 */
/* Peripheral clock disable */
__HAL_RCC_SPI1_CLK_DISABLE();
/**SPI1 GPIO Configuration
PA5 ------> SPI1_SCK
PA6 ------> SPI1_MISO
PA7 ------> SPI1_MOSI
*/
HAL_GPIO_DeInit(GPIOA, GPIO_PIN_5|GPIO_PIN_6|GPIO_PIN_7);
/* USER CODE BEGIN SPI1_MspDeInit 1 */
/* USER CODE END SPI1_MspDeInit 1 */
}
}
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -22,7 +22,6 @@
#include "main.h"
#include "stm32l4xx_it.h"
#include "tos_k.h"
//#include "tos_at.h"
/* Private includes ----------------------------------------------------------*/
/* USER CODE BEGIN Includes */
/* USER CODE END Includes */
@@ -58,6 +57,8 @@
/* USER CODE END 0 */
/* External variables --------------------------------------------------------*/
extern DMA_HandleTypeDef hdma_dcmi;
extern DCMI_HandleTypeDef hdcmi;
extern UART_HandleTypeDef hlpuart1;
extern UART_HandleTypeDef huart2;
extern UART_HandleTypeDef huart3;
@@ -173,7 +174,7 @@ void DebugMon_Handler(void)
__weak void PendSV_Handler(void)
{
/* USER CODE BEGIN PendSV_IRQn 0 */
/* USER CODE END PendSV_IRQn 0 */
/* USER CODE BEGIN PendSV_IRQn 1 */
@@ -189,15 +190,13 @@ void SysTick_Handler(void)
/* USER CODE END SysTick_IRQn 0 */
HAL_IncTick();
if (tos_knl_is_running())
{
tos_knl_irq_enter();
tos_tick_handler();
tos_knl_irq_leave();
}
//HAL_SYSTICK_IRQHandler();
/* USER CODE BEGIN SysTick_IRQn 1 */
if(tos_knl_is_running())
{
tos_knl_irq_enter();
tos_tick_handler();
tos_knl_irq_leave();
}
/* USER CODE END SysTick_IRQn 1 */
}
@@ -209,8 +208,33 @@ void SysTick_Handler(void)
/******************************************************************************/
/**
* @brief This function handles USART2 global interrupt.
* @brief This function handles DMA2 channel6 global interrupt.
*/
void DMA2_Channel6_IRQHandler(void)
{
/* USER CODE BEGIN DMA2_Channel6_IRQn 0 */
/* USER CODE END DMA2_Channel6_IRQn 0 */
HAL_DMA_IRQHandler(&hdma_dcmi);
/* USER CODE BEGIN DMA2_Channel6_IRQn 1 */
/* USER CODE END DMA2_Channel6_IRQn 1 */
}
/**
* @brief This function handles DCMI global interrupt.
*/
void DCMI_IRQHandler(void)
{
/* USER CODE BEGIN DCMI_IRQn 0 */
/* USER CODE END DCMI_IRQn 0 */
HAL_DCMI_IRQHandler(&hdcmi);
/* USER CODE BEGIN DCMI_IRQn 1 */
/* USER CODE END DCMI_IRQn 1 */
}
void USART2_IRQHandler(void)
{
/* USER CODE BEGIN USART2_IRQn 0 */
@@ -242,13 +266,13 @@ void USART3_IRQHandler(void)
void LPUART1_IRQHandler(void)
{
/* USER CODE BEGIN LPUART1_IRQn 0 */
tos_knl_irq_enter();
/* USER CODE END LPUART1_IRQn 0 */
tos_knl_irq_enter();
HAL_UART_IRQHandler(&hlpuart1);
tos_knl_irq_leave();
/* USER CODE BEGIN LPUART1_IRQn 1 */
tos_knl_irq_leave();
/* USER CODE END LPUART1_IRQn 1 */
}

View File

@@ -0,0 +1,136 @@
/**
******************************************************************************
* File Name : TIM.c
* Description : This file provides code for the configuration
* of the TIM instances.
******************************************************************************
* @attention
*
* <h2><center>&copy; Copyright (c) 2020 STMicroelectronics.
* All rights reserved.</center></h2>
*
* This software component is licensed by ST under BSD 3-Clause license,
* the "License"; You may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
* opensource.org/licenses/BSD-3-Clause
*
******************************************************************************
*/
/* Includes ------------------------------------------------------------------*/
#include "tim.h"
/* USER CODE BEGIN 0 */
/* USER CODE END 0 */
TIM_HandleTypeDef htim4;
/* TIM4 init function */
void MX_TIM4_Init(void)
{
TIM_ClockConfigTypeDef sClockSourceConfig = {0};
TIM_MasterConfigTypeDef sMasterConfig = {0};
TIM_OC_InitTypeDef sConfigOC = {0};
htim4.Instance = TIM4;
htim4.Init.Prescaler = 300;
htim4.Init.CounterMode = TIM_COUNTERMODE_UP;
htim4.Init.Period = 999;
htim4.Init.ClockDivision = TIM_CLOCKDIVISION_DIV1;
htim4.Init.AutoReloadPreload = TIM_AUTORELOAD_PRELOAD_DISABLE;
if (HAL_TIM_Base_Init(&htim4) != HAL_OK)
{
Error_Handler();
}
sClockSourceConfig.ClockSource = TIM_CLOCKSOURCE_INTERNAL;
if (HAL_TIM_ConfigClockSource(&htim4, &sClockSourceConfig) != HAL_OK)
{
Error_Handler();
}
if (HAL_TIM_PWM_Init(&htim4) != HAL_OK)
{
Error_Handler();
}
sMasterConfig.MasterOutputTrigger = TIM_TRGO_RESET;
sMasterConfig.MasterSlaveMode = TIM_MASTERSLAVEMODE_DISABLE;
if (HAL_TIMEx_MasterConfigSynchronization(&htim4, &sMasterConfig) != HAL_OK)
{
Error_Handler();
}
sConfigOC.OCMode = TIM_OCMODE_PWM1;
sConfigOC.Pulse = 0;
sConfigOC.OCPolarity = TIM_OCPOLARITY_HIGH;
sConfigOC.OCFastMode = TIM_OCFAST_DISABLE;
if (HAL_TIM_PWM_ConfigChannel(&htim4, &sConfigOC, TIM_CHANNEL_1) != HAL_OK)
{
Error_Handler();
}
HAL_TIM_MspPostInit(&htim4);
}
void HAL_TIM_Base_MspInit(TIM_HandleTypeDef* tim_baseHandle)
{
if(tim_baseHandle->Instance==TIM4)
{
/* USER CODE BEGIN TIM4_MspInit 0 */
/* USER CODE END TIM4_MspInit 0 */
/* TIM4 clock enable */
__HAL_RCC_TIM4_CLK_ENABLE();
/* USER CODE BEGIN TIM4_MspInit 1 */
/* USER CODE END TIM4_MspInit 1 */
}
}
void HAL_TIM_MspPostInit(TIM_HandleTypeDef* timHandle)
{
GPIO_InitTypeDef GPIO_InitStruct = {0};
if(timHandle->Instance==TIM4)
{
/* USER CODE BEGIN TIM4_MspPostInit 0 */
/* USER CODE END TIM4_MspPostInit 0 */
__HAL_RCC_GPIOB_CLK_ENABLE();
/**TIM4 GPIO Configuration
PB6 ------> TIM4_CH1
*/
GPIO_InitStruct.Pin = GPIO_PIN_6;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
GPIO_InitStruct.Alternate = GPIO_AF2_TIM4;
HAL_GPIO_Init(GPIOB, &GPIO_InitStruct);
/* USER CODE BEGIN TIM4_MspPostInit 1 */
/* USER CODE END TIM4_MspPostInit 1 */
}
}
void HAL_TIM_Base_MspDeInit(TIM_HandleTypeDef* tim_baseHandle)
{
if(tim_baseHandle->Instance==TIM4)
{
/* USER CODE BEGIN TIM4_MspDeInit 0 */
/* USER CODE END TIM4_MspDeInit 0 */
/* Peripheral clock disable */
__HAL_RCC_TIM4_CLK_DISABLE();
/* USER CODE BEGIN TIM4_MspDeInit 1 */
/* USER CODE END TIM4_MspDeInit 1 */
}
}
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE****/

View File

@@ -131,7 +131,7 @@ void HAL_UART_MspInit(UART_HandleTypeDef* uartHandle)
PG7 ------> LPUART1_TX
PG8 ------> LPUART1_RX
*/
GPIO_InitStruct.Pin = STLK_RX_Pin|STLK_TX_Pin;
GPIO_InitStruct.Pin = GPIO_PIN_7|GPIO_PIN_8;
GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
GPIO_InitStruct.Pull = GPIO_NOPULL;
GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
@@ -240,7 +240,7 @@ void HAL_UART_MspDeInit(UART_HandleTypeDef* uartHandle)
PG7 ------> LPUART1_TX
PG8 ------> LPUART1_RX
*/
HAL_GPIO_DeInit(GPIOG, STLK_RX_Pin|STLK_TX_Pin);
HAL_GPIO_DeInit(GPIOG, GPIO_PIN_7|GPIO_PIN_7);
/* LPUART1 interrupt Deinit */
HAL_NVIC_DisableIRQ(LPUART1_IRQn);

View File

@@ -1,139 +1,248 @@
#MicroXplorer Configuration settings - do not modify
File.Version=6
KeepUserPlacement=false
LPUART1.BaudRate=115200
LPUART1.IPParameters=WordLength,BaudRate
LPUART1.WordLength=UART_WORDLENGTH_8B
Mcu.Family=STM32L4
Mcu.IP0=LPUART1
Mcu.IP1=NVIC
Mcu.IP2=RCC
Mcu.IP3=SYS
Mcu.IPNb=4
Mcu.Name=STM32L496Z(E-G)Tx
Mcu.Package=LQFP144
Mcu.Pin0=PC14-OSC32_IN (PC14)
Mcu.Pin1=PC15-OSC32_OUT (PC15)
Mcu.Pin2=PG7
Mcu.Pin3=PG8
Mcu.Pin4=VP_SYS_VS_Systick
Mcu.PinsNb=5
Mcu.ThirdPartyNb=0
Mcu.UserConstants=
Mcu.UserName=STM32L496ZGTx
MxCube.Version=5.4.0
MxDb.Version=DB.5.0.40
NVIC.BusFault_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.DebugMonitor_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.ForceEnableDMAVector=true
NVIC.HardFault_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.MemoryManagement_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.NonMaskableInt_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.PendSV_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.PriorityGroup=NVIC_PRIORITYGROUP_4
NVIC.SVCall_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.SysTick_IRQn=true\:0\:0\:false\:false\:true\:false\:true
NVIC.UsageFault_IRQn=true\:0\:0\:false\:false\:true\:false\:false
PC14-OSC32_IN\ (PC14).Mode=LSE-External-Oscillator
PC14-OSC32_IN\ (PC14).Signal=RCC_OSC32_IN
PC15-OSC32_OUT\ (PC15).Mode=LSE-External-Oscillator
PC15-OSC32_OUT\ (PC15).Signal=RCC_OSC32_OUT
PCC.Checker=true
PCC.Line=STM32L4x6
PCC.MCU=STM32L496Z(E-G)Tx
PCC.PartNumber=STM32L496ZGTx
PCC.Seq0=0
PCC.Series=STM32L4
PCC.Temperature=25
PCC.Vdd=3.0
PG7.Locked=true
PG7.Mode=Asynchronous
PG7.Signal=LPUART1_TX
PG8.Locked=true
PG8.Mode=Asynchronous
PG8.Signal=LPUART1_RX
PinOutPanel.RotationAngle=0
ProjectManager.AskForMigrate=true
ProjectManager.BackupPrevious=false
ProjectManager.CompilerOptimize=6
ProjectManager.ComputerToolchain=false
ProjectManager.CoupleFile=true
ProjectManager.CustomerFirmwarePackage=
ProjectManager.DefaultFWLocation=true
ProjectManager.DeletePrevious=true
ProjectManager.DeviceId=STM32L496ZGTx
ProjectManager.FirmwarePackage=STM32Cube FW_L4 V1.14.0
ProjectManager.FreePins=false
ProjectManager.HalAssertFull=false
ProjectManager.HeapSize=0x200
ProjectManager.KeepUserCode=true
ProjectManager.LastFirmware=true
ProjectManager.LibraryCopy=0
ProjectManager.MainLocation=Src
ProjectManager.NoMain=false
ProjectManager.PreviousToolchain=
ProjectManager.ProjectBuild=false
ProjectManager.ProjectFileName=TencentOS_tiny.ioc
ProjectManager.ProjectName=TencentOS_tiny
ProjectManager.StackSize=0x400
ProjectManager.TargetToolchain=EWARM V8
ProjectManager.ToolChainLocation=
ProjectManager.UnderRoot=false
ProjectManager.functionlistsort=1-SystemClock_Config-RCC-false-HAL-false,2-MX_GPIO_Init-GPIO-false-HAL-true,3-MX_LPUART1_UART_Init-LPUART1-false-HAL-true
RCC.ADCFreq_Value=64000000
RCC.AHBFreq_Value=80000000
RCC.APB1Freq_Value=80000000
RCC.APB1TimFreq_Value=80000000
RCC.APB2Freq_Value=80000000
RCC.APB2TimFreq_Value=80000000
RCC.CortexFreq_Value=80000000
RCC.DFSDMFreq_Value=80000000
RCC.FCLKCortexFreq_Value=80000000
RCC.FamilyName=M
RCC.HCLKFreq_Value=80000000
RCC.HSE_VALUE=8000000
RCC.HSI48_VALUE=48000000
RCC.HSI_VALUE=16000000
RCC.I2C1Freq_Value=80000000
RCC.I2C2Freq_Value=80000000
RCC.I2C3Freq_Value=80000000
RCC.I2C4Freq_Value=80000000
RCC.IPParameters=ADCFreq_Value,AHBFreq_Value,APB1Freq_Value,APB1TimFreq_Value,APB2Freq_Value,APB2TimFreq_Value,CortexFreq_Value,DFSDMFreq_Value,FCLKCortexFreq_Value,FamilyName,HCLKFreq_Value,HSE_VALUE,HSI48_VALUE,HSI_VALUE,I2C1Freq_Value,I2C2Freq_Value,I2C3Freq_Value,I2C4Freq_Value,LPTIM1Freq_Value,LPTIM2Freq_Value,LPUART1Freq_Value,LSCOPinFreq_Value,LSI_VALUE,MCO1PinFreq_Value,MSI_VALUE,PLLN,PLLPoutputFreq_Value,PLLQoutputFreq_Value,PLLRCLKFreq_Value,PLLSAI1PoutputFreq_Value,PLLSAI1QoutputFreq_Value,PLLSAI1RoutputFreq_Value,PLLSAI2PoutputFreq_Value,PLLSAI2RoutputFreq_Value,PLLSourceVirtual,PWRFreq_Value,RNGFreq_Value,SAI1Freq_Value,SAI2Freq_Value,SDMMCFreq_Value,SWPMI1Freq_Value,SYSCLKFreq_VALUE,SYSCLKSource,UART4Freq_Value,UART5Freq_Value,USART1Freq_Value,USART2Freq_Value,USART3Freq_Value,USBFreq_Value,VCOInputFreq_Value,VCOOutputFreq_Value,VCOSAI1OutputFreq_Value,VCOSAI2OutputFreq_Value
RCC.LPTIM1Freq_Value=80000000
RCC.LPTIM2Freq_Value=80000000
RCC.LPUART1Freq_Value=80000000
RCC.LSCOPinFreq_Value=32000
RCC.LSI_VALUE=32000
RCC.MCO1PinFreq_Value=80000000
RCC.MSI_VALUE=4000000
RCC.PLLN=10
RCC.PLLPoutputFreq_Value=80000000
RCC.PLLQoutputFreq_Value=80000000
RCC.PLLRCLKFreq_Value=80000000
RCC.PLLSAI1PoutputFreq_Value=64000000
RCC.PLLSAI1QoutputFreq_Value=64000000
RCC.PLLSAI1RoutputFreq_Value=64000000
RCC.PLLSAI2PoutputFreq_Value=64000000
RCC.PLLSAI2RoutputFreq_Value=64000000
RCC.PLLSourceVirtual=RCC_PLLSOURCE_HSI
RCC.PWRFreq_Value=80000000
RCC.RNGFreq_Value=64000000
RCC.SAI1Freq_Value=64000000
RCC.SAI2Freq_Value=64000000
RCC.SDMMCFreq_Value=64000000
RCC.SWPMI1Freq_Value=80000000
RCC.SYSCLKFreq_VALUE=80000000
RCC.SYSCLKSource=RCC_SYSCLKSOURCE_PLLCLK
RCC.UART4Freq_Value=80000000
RCC.UART5Freq_Value=80000000
PG8.Mode=Asynchronous
PA6.Mode=Full_Duplex_Master
RCC.USART1Freq_Value=80000000
RCC.SAI1Freq_Value=64000000
RCC.CortexFreq_Value=80000000
ProjectManager.KeepUserCode=true
Mcu.UserName=STM32L496ZGTx
SPI1.VirtualType=VM_MASTER
PG8.Signal=LPUART1_RX
PG8.Locked=true
RCC.PLLSAI1RoutputFreq_Value=64000000
ProjectManager.functionlistsort=1-MX_GPIO_Init-GPIO-false-HAL-true,2-SystemClock_Config-RCC-false-HAL-false,3-MX_DMA_Init-DMA-false-HAL-true,4-MX_LPUART1_UART_Init-LPUART1-false-HAL-true,5-MX_DCMI_Init-DCMI-false-HAL-true,6-MX_I2C1_Init-I2C1-false-HAL-true,7-MX_SPI1_Init-SPI1-false-HAL-true,8-MX_TIM4_Init-TIM4-false-HAL-true
LPUART1.WordLength=UART_WORDLENGTH_8B
RCC.USART2Freq_Value=80000000
RCC.USART3Freq_Value=80000000
RCC.USBFreq_Value=64000000
RCC.VCOInputFreq_Value=16000000
RCC.VCOOutputFreq_Value=160000000
PA15\ (JTDI).Signal=SYS_JTDI
PC15-OSC32_OUT\ (PC15).Mode=LSE-External-Oscillator
PG13.Signal=I2C1_SDA
PD9.Mode=Slave_8_bits_External_Synchro
PinOutPanel.RotationAngle=0
RCC.MCO1PinFreq_Value=80000000
RCC.SYSCLKSource=RCC_SYSCLKSOURCE_PLLCLK
ProjectManager.StackSize=0x400
PC14-OSC32_IN\ (PC14).Mode=LSE-External-Oscillator
Dma.DCMI.0.MemDataAlignment=DMA_MDATAALIGN_WORD
RCC.I2C3Freq_Value=80000000
RCC.LPTIM1Freq_Value=80000000
Mcu.IP4=NVIC
Mcu.IP5=RCC
RCC.FCLKCortexFreq_Value=80000000
Mcu.IP2=I2C1
NVIC.SVCall_IRQn=true\:0\:0\:false\:false\:true\:false\:false
Mcu.IP3=LPUART1
Mcu.IP0=DCMI
PA12.Locked=true
PA14\ (JTCK/SWCLK).Signal=SYS_JTCK-SWCLK
PA15\ (JTDI).Mode=JTAG_4_pins
Mcu.IP1=DMA
Dma.DCMI.0.MemInc=DMA_MINC_ENABLE
PA12.Signal=GPIO_Output
Mcu.UserConstants=
RCC.VCOSAI1OutputFreq_Value=128000000
RCC.VCOSAI2OutputFreq_Value=128000000
VP_SYS_VS_Systick.Mode=SysTick
PA4.Mode=Slave_8_bits_External_Synchro
Dma.DCMI.0.PeriphDataAlignment=DMA_PDATAALIGN_WORD
RCC.SDMMCFreq_Value=64000000
Mcu.ThirdPartyNb=0
SPI1.Direction=SPI_DIRECTION_2LINES
RCC.HCLKFreq_Value=80000000
RCC.I2C4Freq_Value=80000000
Mcu.IPNb=9
ProjectManager.PreviousToolchain=
RCC.APB2TimFreq_Value=80000000
PB6.Signal=S_TIM4_CH1
PC7.Signal=DCMI_D1
SPI1.CalculateBaudRate=40.0 MBits/s
Mcu.Pin6=PA5
RCC.SAI2Freq_Value=64000000
Mcu.Pin7=PA6
PE5.Signal=DCMI_D6
Mcu.Pin8=PA7
Mcu.Pin9=PB12
RCC.AHBFreq_Value=80000000
Mcu.Pin0=PE4
Mcu.Pin1=PE5
GPIO.groupedBy=Group By Peripherals
Mcu.Pin2=PE6
Mcu.Pin3=PC14-OSC32_IN (PC14)
RCC.USART3Freq_Value=80000000
Mcu.Pin4=PC15-OSC32_OUT (PC15)
Mcu.Pin5=PA4
ProjectManager.ProjectBuild=false
RCC.HSE_VALUE=8000000
NVIC.UsageFault_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.DebugMonitor_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.SysTick_IRQn=true\:0\:0\:false\:false\:true\:false\:true
DCMI.PCKPolarity=DCMI_PCKPOLARITY_RISING
PG14.Mode=I2C
ProjectManager.FirmwarePackage=STM32Cube FW_L4 V1.14.0
MxDb.Version=DB.5.0.40
ProjectManager.BackupPrevious=false
RCC.VCOInputFreq_Value=16000000
SPI1.DataSize=SPI_DATASIZE_8BIT
File.Version=6
PC9.Mode=Slave_8_bits_External_Synchro
PB7.Signal=DCMI_VSYNC
PA14\ (JTCK/SWCLK).Mode=JTAG_4_pins
RCC.PLLRCLKFreq_Value=80000000
PG13.Mode=I2C
PB6.Locked=true
NVIC.PendSV_IRQn=true\:0\:0\:false\:false\:true\:false\:false
PE4.Mode=Slave_8_bits_External_Synchro
TIM4.Period=999
PA13\ (JTMS/SWDIO).Locked=true
PD3.Mode=Slave_8_bits_External_Synchro
PE4.Signal=DCMI_D4
Dma.RequestsNb=1
ProjectManager.HalAssertFull=false
ProjectManager.ProjectName=TencentOS_tiny
Mcu.Package=LQFP144
PA6.Signal=SPI1_MISO
Dma.DCMI.0.RequestParameters=Instance,Direction,PeriphInc,MemInc,PeriphDataAlignment,MemDataAlignment,Mode,Priority
PA5.Locked=true
Dma.DCMI.0.Direction=DMA_PERIPH_TO_MEMORY
ProjectManager.ToolChainLocation=
PA14\ (JTCK/SWCLK).Locked=true
RCC.LSI_VALUE=32000
VP_SYS_VS_Systick.Signal=SYS_VS_Systick
RCC.LSCOPinFreq_Value=32000
RCC.DFSDMFreq_Value=80000000
RCC.PLLPoutputFreq_Value=80000000
SH.S_TIM4_CH1.0=TIM4_CH1,PWM Generation1 CH1
RCC.APB1TimFreq_Value=80000000
NVIC.BusFault_IRQn=true\:0\:0\:false\:false\:true\:false\:false
RCC.LPUART1Freq_Value=80000000
Dma.Request0=DCMI
ProjectManager.CustomerFirmwarePackage=
RCC.HSI48_VALUE=48000000
PA5.Mode=Full_Duplex_Master
RCC.MSI_VALUE=4000000
RCC.PLLSourceVirtual=RCC_PLLSOURCE_HSI
TIM4.Channel-PWM\ Generation1\ CH1=TIM_CHANNEL_1
SH.S_TIM4_CH1.ConfNb=1
RCC.PLLQoutputFreq_Value=80000000
ProjectManager.ProjectFileName=TencentOS_tiny.ioc
PG7.Locked=true
PA7.Mode=Full_Duplex_Master
PG7.Signal=LPUART1_TX
PA13\ (JTMS/SWDIO).Mode=JTAG_4_pins
Mcu.PinsNb=31
ProjectManager.NoMain=false
SPI1.IPParameters=VirtualType,Mode,Direction,CalculateBaudRate,DataSize
RCC.SWPMI1Freq_Value=80000000
PD3.Signal=DCMI_D5
PC8.Signal=DCMI_D2
PC6.Signal=DCMI_D0
ProjectManager.DefaultFWLocation=true
PC15-OSC32_OUT\ (PC15).Signal=RCC_OSC32_OUT
PD9.Signal=DCMI_PIXCLK
PB12.Locked=true
ProjectManager.DeletePrevious=true
PB14.Locked=true
RCC.VCOSAI2OutputFreq_Value=128000000
LPUART1.IPParameters=WordLength,BaudRate
RCC.FamilyName=M
PA11.Locked=true
PB3\ (JTDO/TRACESWO).Locked=true
PB3\ (JTDO/TRACESWO).Signal=SYS_JTDO-SWO
VP_TIM4_VS_ClockSourceINT.Signal=TIM4_VS_ClockSourceINT
ProjectManager.TargetToolchain=EWARM V8
TIM4.IPParameters=Channel-PWM Generation1 CH1,Prescaler,Period
PC6.Mode=Slave_8_bits_External_Synchro
Dma.DCMI.0.Instance=DMA2_Channel6
Dma.DCMI.0.Mode=DMA_CIRCULAR
ProjectManager.RegisterCallBack=
RCC.USBFreq_Value=64000000
TIM4.Prescaler=300
PG7.Mode=Asynchronous
RCC.PLLSAI1PoutputFreq_Value=64000000
DCMI.JPEGMode=DCMI_JPEG_ENABLE
PB14.Signal=GPIO_Output
RCC.PLLSAI2RoutputFreq_Value=64000000
PA5.Signal=SPI1_SCK
PG14.Signal=I2C1_SCL
DCMI.IPParameters=JPEGMode,PCKPolarity
board=custom
RCC.VCOOutputFreq_Value=160000000
ProjectManager.LastFirmware=true
RCC.APB2Freq_Value=80000000
RCC.UART4Freq_Value=80000000
PE6.Mode=Slave_8_bits_External_Synchro
MxCube.Version=5.4.0
RCC.I2C1Freq_Value=80000000
SPI1.Mode=SPI_MODE_MASTER
RCC.RNGFreq_Value=64000000
PE5.Mode=Slave_8_bits_External_Synchro
RCC.PLLSAI1QoutputFreq_Value=64000000
Mcu.Pin30=VP_TIM4_VS_ClockSourceINT
RCC.ADCFreq_Value=64000000
VP_SYS_VS_Systick.Mode=SysTick
NVIC.NonMaskableInt_IRQn=true\:0\:0\:false\:false\:true\:false\:false
NVIC.DMA2_Channel6_IRQn=true\:0\:0\:false\:false\:true\:false\:true
PE6.Signal=DCMI_D7
RCC.UART5Freq_Value=80000000
ProjectManager.FreePins=false
RCC.IPParameters=ADCFreq_Value,AHBFreq_Value,APB1Freq_Value,APB1TimFreq_Value,APB2Freq_Value,APB2TimFreq_Value,CortexFreq_Value,DFSDMFreq_Value,FCLKCortexFreq_Value,FamilyName,HCLKFreq_Value,HSE_VALUE,HSI48_VALUE,HSI_VALUE,I2C1Freq_Value,I2C2Freq_Value,I2C3Freq_Value,I2C4Freq_Value,LPTIM1Freq_Value,LPTIM2Freq_Value,LPUART1Freq_Value,LSCOPinFreq_Value,LSI_VALUE,MCO1PinFreq_Value,MSI_VALUE,PLLN,PLLPoutputFreq_Value,PLLQoutputFreq_Value,PLLRCLKFreq_Value,PLLSAI1PoutputFreq_Value,PLLSAI1QoutputFreq_Value,PLLSAI1RoutputFreq_Value,PLLSAI2PoutputFreq_Value,PLLSAI2RoutputFreq_Value,PLLSourceVirtual,PWRFreq_Value,RNGFreq_Value,SAI1Freq_Value,SAI2Freq_Value,SDMMCFreq_Value,SWPMI1Freq_Value,SYSCLKFreq_VALUE,SYSCLKSource,UART4Freq_Value,UART5Freq_Value,USART1Freq_Value,USART2Freq_Value,USART3Freq_Value,USBFreq_Value,VCOInputFreq_Value,VCOOutputFreq_Value,VCOSAI1OutputFreq_Value,VCOSAI2OutputFreq_Value
ProjectManager.AskForMigrate=false
Mcu.Name=STM32L496Z(E-G)Tx
RCC.LPTIM2Freq_Value=80000000
Mcu.Pin26=PB3 (JTDO/TRACESWO)
Mcu.Pin27=PB6
LPUART1.BaudRate=115200
Mcu.Pin24=PG13
ProjectManager.UnderRoot=false
Mcu.Pin25=PG14
Mcu.IP8=TIM4
Mcu.Pin28=PB7
Mcu.IP6=SPI1
PC8.Mode=Slave_8_bits_External_Synchro
Mcu.Pin29=VP_SYS_VS_Systick
Mcu.IP7=SYS
ProjectManager.CoupleFile=true
PA4.Signal=DCMI_HSYNC
RCC.SYSCLKFreq_VALUE=80000000
Mcu.Pin22=PA15 (JTDI)
Mcu.Pin23=PD3
Mcu.Pin20=PA13 (JTMS/SWDIO)
Mcu.Pin21=PA14 (JTCK/SWCLK)
NVIC.ForceEnableDMAVector=true
RCC.PLLSAI2PoutputFreq_Value=64000000
KeepUserPlacement=false
PC14-OSC32_IN\ (PC14).Signal=RCC_OSC32_IN
NVIC.MemoryManagement_IRQn=true\:0\:0\:false\:false\:true\:false\:false
ProjectManager.CompilerOptimize=6
PB7.Mode=Slave_8_bits_External_Synchro
PA11.Signal=GPIO_Output
ProjectManager.HeapSize=0x200
Mcu.Pin15=PC7
NVIC.HardFault_IRQn=true\:0\:0\:false\:false\:true\:false\:false
Mcu.Pin16=PC8
Mcu.Pin13=PG8
Mcu.Pin14=PC6
Mcu.Pin19=PA12
ProjectManager.ComputerToolchain=false
Mcu.Pin17=PC9
RCC.HSI_VALUE=16000000
Mcu.Pin18=PA11
VP_TIM4_VS_ClockSourceINT.Mode=Internal
NVIC.PriorityGroup=NVIC_PRIORITYGROUP_4
Mcu.Pin11=PD9
Mcu.Pin12=PG7
RCC.PLLN=10
PB3\ (JTDO/TRACESWO).Mode=JTAG_4_pins
Mcu.Pin10=PB14
RCC.PWRFreq_Value=80000000
Dma.DCMI.0.PeriphInc=DMA_PINC_DISABLE
PC9.Signal=DCMI_D3
NVIC.DCMI_IRQn=true\:0\:0\:false\:false\:true\:true\:true
PC7.Mode=Slave_8_bits_External_Synchro
RCC.I2C2Freq_Value=80000000
RCC.APB1Freq_Value=80000000
ProjectManager.DeviceId=STM32L496ZGTx
PB12.Signal=GPIO_Output
ProjectManager.LibraryCopy=0
PA13\ (JTMS/SWDIO).Signal=SYS_JTMS-SWDIO
Dma.DCMI.0.Priority=DMA_PRIORITY_HIGH
PA7.Signal=SPI1_MOSI

View File

@@ -0,0 +1,20 @@
## TencentOS-tiny_Person_Detection_Demo
### 1. 目录结构:
- TencentOS-tiny\board\NUCLEO_STM32L496ZG\BSP\Hardware : **外设驱动代码**
- TencentOS-tiny\examples\tflitemicro_person_detection : **Demo任务函数**
- TencentOS-tiny\board\NUCLEO_STM32L496ZG\KEIL\tflitemicro_person_detection : **keil工程**
- TencentOS-tiny\components\tflite_micro\tensorflow : **tflite_micro代码**
### 2. 完成的工作:
- 使用STM32CubeMX选择与TOS同版本的固件库重新生成外设初始化代码
- TOS、摄像头和LCD工作都正常工作
### 3. 未完成的工作:
- tflite_micro 以component的形式加到工程
- 串口重定向用了TOS的方法retarget.c还没有引入工程
- 变量名、函数名还没有按照TOS的风格完全统一

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,813 @@
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<Project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="project_projx.xsd">
<SchemaVersion>2.1</SchemaVersion>
<Header>### uVision Project, (C) Keil Software</Header>
<Targets>
<Target>
<TargetName>TencentOS_tiny</TargetName>
<ToolsetNumber>0x4</ToolsetNumber>
<ToolsetName>ARM-ADS</ToolsetName>
<pCCUsed>5060750::V5.06 update 6 (build 750)::.\ARMCC</pCCUsed>
<uAC6>0</uAC6>
<TargetOption>
<TargetCommonOption>
<Device>STM32L496ZGTx</Device>
<Vendor>STMicroelectronics</Vendor>
<PackID>Keil.STM32L4xx_DFP.2.4.0</PackID>
<PackURL>http://www.keil.com/pack/</PackURL>
<Cpu>IRAM(0x20000000-0x2004FFFF) IROM(0x8000000-0x80FFFFF) CLOCK(8000000) FPU2 CPUTYPE("Cortex-M4")</Cpu>
<FlashUtilSpec></FlashUtilSpec>
<StartupFile></StartupFile>
<FlashDriverDll></FlashDriverDll>
<DeviceId></DeviceId>
<RegisterFile></RegisterFile>
<MemoryEnv></MemoryEnv>
<Cmp></Cmp>
<Asm></Asm>
<Linker></Linker>
<OHString></OHString>
<InfinionOptionDll></InfinionOptionDll>
<SLE66CMisc></SLE66CMisc>
<SLE66AMisc></SLE66AMisc>
<SLE66LinkerMisc></SLE66LinkerMisc>
<SFDFile>$$Device:STM32L496ZGTx$CMSIS\SVD\STM32L4x6.svd</SFDFile>
<bCustSvd>0</bCustSvd>
<UseEnv>0</UseEnv>
<BinPath></BinPath>
<IncludePath></IncludePath>
<LibPath></LibPath>
<RegisterFilePath></RegisterFilePath>
<DBRegisterFilePath></DBRegisterFilePath>
<TargetStatus>
<Error>0</Error>
<ExitCodeStop>0</ExitCodeStop>
<ButtonStop>0</ButtonStop>
<NotGenerated>0</NotGenerated>
<InvalidFlash>1</InvalidFlash>
</TargetStatus>
<OutputDirectory>TencentOS_tiny\</OutputDirectory>
<OutputName>TencentOS_tiny</OutputName>
<CreateExecutable>1</CreateExecutable>
<CreateLib>0</CreateLib>
<CreateHexFile>1</CreateHexFile>
<DebugInformation>1</DebugInformation>
<BrowseInformation>1</BrowseInformation>
<ListingPath></ListingPath>
<HexFormatSelection>1</HexFormatSelection>
<Merge32K>0</Merge32K>
<CreateBatchFile>0</CreateBatchFile>
<BeforeCompile>
<RunUserProg1>0</RunUserProg1>
<RunUserProg2>0</RunUserProg2>
<UserProg1Name></UserProg1Name>
<UserProg2Name></UserProg2Name>
<UserProg1Dos16Mode>0</UserProg1Dos16Mode>
<UserProg2Dos16Mode>0</UserProg2Dos16Mode>
<nStopU1X>0</nStopU1X>
<nStopU2X>0</nStopU2X>
</BeforeCompile>
<BeforeMake>
<RunUserProg1>0</RunUserProg1>
<RunUserProg2>0</RunUserProg2>
<UserProg1Name></UserProg1Name>
<UserProg2Name></UserProg2Name>
<UserProg1Dos16Mode>0</UserProg1Dos16Mode>
<UserProg2Dos16Mode>0</UserProg2Dos16Mode>
<nStopB1X>0</nStopB1X>
<nStopB2X>0</nStopB2X>
</BeforeMake>
<AfterMake>
<RunUserProg1>0</RunUserProg1>
<RunUserProg2>0</RunUserProg2>
<UserProg1Name></UserProg1Name>
<UserProg2Name></UserProg2Name>
<UserProg1Dos16Mode>0</UserProg1Dos16Mode>
<UserProg2Dos16Mode>0</UserProg2Dos16Mode>
<nStopA1X>0</nStopA1X>
<nStopA2X>0</nStopA2X>
</AfterMake>
<SelectedForBatchBuild>0</SelectedForBatchBuild>
<SVCSIdString></SVCSIdString>
</TargetCommonOption>
<CommonProperty>
<UseCPPCompiler>0</UseCPPCompiler>
<RVCTCodeConst>0</RVCTCodeConst>
<RVCTZI>0</RVCTZI>
<RVCTOtherData>0</RVCTOtherData>
<ModuleSelection>0</ModuleSelection>
<IncludeInBuild>1</IncludeInBuild>
<AlwaysBuild>0</AlwaysBuild>
<GenerateAssemblyFile>0</GenerateAssemblyFile>
<AssembleAssemblyFile>0</AssembleAssemblyFile>
<PublicsOnly>0</PublicsOnly>
<StopOnExitCode>3</StopOnExitCode>
<CustomArgument></CustomArgument>
<IncludeLibraryModules></IncludeLibraryModules>
<ComprImg>0</ComprImg>
</CommonProperty>
<DllOption>
<SimDllName>SARMCM3.DLL</SimDllName>
<SimDllArguments>-REMAP -MPU</SimDllArguments>
<SimDlgDll>DCM.DLL</SimDlgDll>
<SimDlgDllArguments>-pCM4</SimDlgDllArguments>
<TargetDllName>SARMCM3.DLL</TargetDllName>
<TargetDllArguments>-MPU</TargetDllArguments>
<TargetDlgDll>TCM.DLL</TargetDlgDll>
<TargetDlgDllArguments>-pCM4</TargetDlgDllArguments>
</DllOption>
<DebugOption>
<OPTHX>
<HexSelection>1</HexSelection>
<HexRangeLowAddress>0</HexRangeLowAddress>
<HexRangeHighAddress>0</HexRangeHighAddress>
<HexOffset>0</HexOffset>
<Oh166RecLen>16</Oh166RecLen>
</OPTHX>
</DebugOption>
<Utilities>
<Flash1>
<UseTargetDll>1</UseTargetDll>
<UseExternalTool>0</UseExternalTool>
<RunIndependent>0</RunIndependent>
<UpdateFlashBeforeDebugging>1</UpdateFlashBeforeDebugging>
<Capability>1</Capability>
<DriverSelection>4107</DriverSelection>
</Flash1>
<bUseTDR>1</bUseTDR>
<Flash2>STLink\ST-LINKIII-KEIL_SWO.dll</Flash2>
<Flash3></Flash3>
<Flash4></Flash4>
<pFcarmOut></pFcarmOut>
<pFcarmGrp></pFcarmGrp>
<pFcArmRoot></pFcArmRoot>
<FcArmLst>0</FcArmLst>
</Utilities>
<TargetArmAds>
<ArmAdsMisc>
<GenerateListings>0</GenerateListings>
<asHll>1</asHll>
<asAsm>1</asAsm>
<asMacX>1</asMacX>
<asSyms>1</asSyms>
<asFals>1</asFals>
<asDbgD>1</asDbgD>
<asForm>1</asForm>
<ldLst>0</ldLst>
<ldmm>1</ldmm>
<ldXref>1</ldXref>
<BigEnd>0</BigEnd>
<AdsALst>1</AdsALst>
<AdsACrf>1</AdsACrf>
<AdsANop>0</AdsANop>
<AdsANot>0</AdsANot>
<AdsLLst>1</AdsLLst>
<AdsLmap>1</AdsLmap>
<AdsLcgr>1</AdsLcgr>
<AdsLsym>1</AdsLsym>
<AdsLszi>1</AdsLszi>
<AdsLtoi>1</AdsLtoi>
<AdsLsun>1</AdsLsun>
<AdsLven>1</AdsLven>
<AdsLsxf>1</AdsLsxf>
<RvctClst>0</RvctClst>
<GenPPlst>0</GenPPlst>
<AdsCpuType>"Cortex-M4"</AdsCpuType>
<RvctDeviceName></RvctDeviceName>
<mOS>0</mOS>
<uocRom>0</uocRom>
<uocRam>0</uocRam>
<hadIROM>1</hadIROM>
<hadIRAM>1</hadIRAM>
<hadXRAM>0</hadXRAM>
<uocXRam>0</uocXRam>
<RvdsVP>2</RvdsVP>
<RvdsMve>0</RvdsMve>
<RvdsCdeCp>0</RvdsCdeCp>
<hadIRAM2>0</hadIRAM2>
<hadIROM2>0</hadIROM2>
<StupSel>8</StupSel>
<useUlib>1</useUlib>
<EndSel>0</EndSel>
<uLtcg>0</uLtcg>
<nSecure>0</nSecure>
<RoSelD>3</RoSelD>
<RwSelD>3</RwSelD>
<CodeSel>0</CodeSel>
<OptFeed>0</OptFeed>
<NoZi1>0</NoZi1>
<NoZi2>0</NoZi2>
<NoZi3>0</NoZi3>
<NoZi4>0</NoZi4>
<NoZi5>0</NoZi5>
<Ro1Chk>0</Ro1Chk>
<Ro2Chk>0</Ro2Chk>
<Ro3Chk>0</Ro3Chk>
<Ir1Chk>1</Ir1Chk>
<Ir2Chk>0</Ir2Chk>
<Ra1Chk>0</Ra1Chk>
<Ra2Chk>0</Ra2Chk>
<Ra3Chk>0</Ra3Chk>
<Im1Chk>1</Im1Chk>
<Im2Chk>0</Im2Chk>
<OnChipMemories>
<Ocm1>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm1>
<Ocm2>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm2>
<Ocm3>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm3>
<Ocm4>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm4>
<Ocm5>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm5>
<Ocm6>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm6>
<IRAM>
<Type>0</Type>
<StartAddress>0x20000000</StartAddress>
<Size>0x50000</Size>
</IRAM>
<IROM>
<Type>1</Type>
<StartAddress>0x8000000</StartAddress>
<Size>0x100000</Size>
</IROM>
<XRAM>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</XRAM>
<OCR_RVCT1>
<Type>1</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT1>
<OCR_RVCT2>
<Type>1</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT2>
<OCR_RVCT3>
<Type>1</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT3>
<OCR_RVCT4>
<Type>1</Type>
<StartAddress>0x8000000</StartAddress>
<Size>0x100000</Size>
</OCR_RVCT4>
<OCR_RVCT5>
<Type>1</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT5>
<OCR_RVCT6>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT6>
<OCR_RVCT7>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT7>
<OCR_RVCT8>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT8>
<OCR_RVCT9>
<Type>0</Type>
<StartAddress>0x20000000</StartAddress>
<Size>0x50000</Size>
</OCR_RVCT9>
<OCR_RVCT10>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT10>
</OnChipMemories>
<RvctStartVector></RvctStartVector>
</ArmAdsMisc>
<Cads>
<interw>1</interw>
<Optim>4</Optim>
<oTime>0</oTime>
<SplitLS>0</SplitLS>
<OneElfS>1</OneElfS>
<Strict>0</Strict>
<EnumInt>0</EnumInt>
<PlainCh>0</PlainCh>
<Ropi>0</Ropi>
<Rwpi>0</Rwpi>
<wLevel>2</wLevel>
<uThumb>0</uThumb>
<uSurpInc>0</uSurpInc>
<uC99>1</uC99>
<uGnu>0</uGnu>
<useXO>0</useXO>
<v6Lang>1</v6Lang>
<v6LangP>1</v6LangP>
<vShortEn>1</vShortEn>
<vShortWch>1</vShortWch>
<v6Lto>0</v6Lto>
<v6WtE>0</v6WtE>
<v6Rtti>0</v6Rtti>
<VariousControls>
<MiscControls></MiscControls>
<Define>USE_HAL_DRIVER,STM32L496xx</Define>
<Undefine></Undefine>
<IncludePath>..\..\BSP\Inc;..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Inc;..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Inc\Legacy;..\..\..\..\platform\vendor_bsp\st\CMSIS\Device\ST\STM32L4xx\Include;..\..\..\..\platform\vendor_bsp\st\CMSIS\Include;..\..\..\..\arch\arm\arm-v7m\common\include;..\..\..\..\arch\arm\arm-v7m\cortex-m4\armcc;..\..\..\..\kernel\core\include;..\..\..\..\kernel\pm\include;..\..\..\..\osal\cmsis_os;..\..\..\..\examples\hello_world;..\..\TOS_CONFIG;..\..\..\..\net\at\include;..\..\..\..\kernel\hal\include;..\..\BSP\Hardware\Inc</IncludePath>
</VariousControls>
</Cads>
<Aads>
<interw>1</interw>
<Ropi>0</Ropi>
<Rwpi>0</Rwpi>
<thumb>0</thumb>
<SplitLS>0</SplitLS>
<SwStkChk>0</SwStkChk>
<NoWarn>0</NoWarn>
<uSurpInc>0</uSurpInc>
<useXO>0</useXO>
<ClangAsOpt>4</ClangAsOpt>
<VariousControls>
<MiscControls></MiscControls>
<Define></Define>
<Undefine></Undefine>
<IncludePath></IncludePath>
</VariousControls>
</Aads>
<LDads>
<umfTarg>1</umfTarg>
<Ropi>0</Ropi>
<Rwpi>0</Rwpi>
<noStLib>0</noStLib>
<RepFail>1</RepFail>
<useFile>0</useFile>
<TextAddressRange>0x08000000</TextAddressRange>
<DataAddressRange>0x20000000</DataAddressRange>
<pXoBase></pXoBase>
<ScatterFile></ScatterFile>
<IncludeLibs></IncludeLibs>
<IncludeLibsPath></IncludeLibsPath>
<Misc></Misc>
<LinkerInputFile></LinkerInputFile>
<DisabledWarnings></DisabledWarnings>
</LDads>
</TargetArmAds>
</TargetOption>
<Groups>
<Group>
<GroupName>Application/MDK-ARM</GroupName>
<Files>
<File>
<FileName>startup_stm32l496xx.s</FileName>
<FileType>2</FileType>
<FilePath>startup_stm32l496xx.s</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>Application/User</GroupName>
<Files>
<File>
<FileName>main.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\main.c</FilePath>
</File>
<File>
<FileName>gpio.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\gpio.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_msp.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\stm32l4xx_hal_msp.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_it.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\stm32l4xx_it.c</FilePath>
</File>
<File>
<FileName>sys.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\sys.c</FilePath>
</File>
<File>
<FileName>usart.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\usart.c</FilePath>
</File>
<File>
<FileName>mcu_init.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\mcu_init.c</FilePath>
</File>
<File>
<FileName>dcmi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\dcmi.c</FilePath>
</File>
<File>
<FileName>dma.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\dma.c</FilePath>
</File>
<File>
<FileName>i2c.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\i2c.c</FilePath>
</File>
<File>
<FileName>spi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\spi.c</FilePath>
</File>
<File>
<FileName>tim.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\tim.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>Drivers/STM32L4xx_HAL_Driver</GroupName>
<Files>
<File>
<FileName>stm32l4xx_hal_uart.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_uart.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_uart_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_uart_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_i2c.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_i2c.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_i2c_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_i2c_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_rcc.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_rcc.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_rcc_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_rcc_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_flash.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_flash.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_flash_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_flash_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_flash_ramfunc.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_flash_ramfunc.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_gpio.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_gpio.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_dma.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_dma.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_dma_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_dma_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_pwr.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_pwr.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_pwr_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_pwr_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_cortex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_cortex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_exti.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_exti.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_tim.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_tim.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_tim_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_tim_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_dcmi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_dcmi.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_spi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_spi.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_spi_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_spi_ex.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>Drivers/CMSIS</GroupName>
<Files>
<File>
<FileName>system_stm32l4xx.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\system_stm32l4xx.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>tos/arch</GroupName>
<Files>
<File>
<FileName>tos_cpu.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\arch\arm\arm-v7m\common\tos_cpu.c</FilePath>
</File>
<File>
<FileName>port_c.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\arch\arm\arm-v7m\cortex-m4\armcc\port_c.c</FilePath>
</File>
<File>
<FileName>port_s.S</FileName>
<FileType>2</FileType>
<FilePath>..\..\..\..\arch\arm\arm-v7m\cortex-m4\armcc\port_s.S</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>tos/kernel</GroupName>
<Files>
<File>
<FileName>tos_binary_heap.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_binary_heap.c</FilePath>
</File>
<File>
<FileName>tos_char_fifo.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_char_fifo.c</FilePath>
</File>
<File>
<FileName>tos_completion.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_completion.c</FilePath>
</File>
<File>
<FileName>tos_countdownlatch.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_countdownlatch.c</FilePath>
</File>
<File>
<FileName>tos_event.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_event.c</FilePath>
</File>
<File>
<FileName>tos_global.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_global.c</FilePath>
</File>
<File>
<FileName>tos_mail_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_mail_queue.c</FilePath>
</File>
<File>
<FileName>tos_message_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_message_queue.c</FilePath>
</File>
<File>
<FileName>tos_mmblk.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_mmblk.c</FilePath>
</File>
<File>
<FileName>tos_mmheap.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_mmheap.c</FilePath>
</File>
<File>
<FileName>tos_mutex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_mutex.c</FilePath>
</File>
<File>
<FileName>tos_pend.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_pend.c</FilePath>
</File>
<File>
<FileName>tos_priority_mail_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_priority_mail_queue.c</FilePath>
</File>
<File>
<FileName>tos_priority_message_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_priority_message_queue.c</FilePath>
</File>
<File>
<FileName>tos_priority_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_priority_queue.c</FilePath>
</File>
<File>
<FileName>tos_ring_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_ring_queue.c</FilePath>
</File>
<File>
<FileName>tos_robin.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_robin.c</FilePath>
</File>
<File>
<FileName>tos_sched.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_sched.c</FilePath>
</File>
<File>
<FileName>tos_sem.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_sem.c</FilePath>
</File>
<File>
<FileName>tos_sys.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_sys.c</FilePath>
</File>
<File>
<FileName>tos_task.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_task.c</FilePath>
</File>
<File>
<FileName>tos_tick.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_tick.c</FilePath>
</File>
<File>
<FileName>tos_time.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_time.c</FilePath>
</File>
<File>
<FileName>tos_timer.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_timer.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>tos/cmsis_os</GroupName>
<Files>
<File>
<FileName>cmsis_os.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\osal\cmsis_os\cmsis_os.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>hal</GroupName>
<Files>
<File>
<FileName>lcd_config.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\lcd_config.c</FilePath>
</File>
<File>
<FileName>lcd_2inch4.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\lcd_2inch4.c</FilePath>
</File>
<File>
<FileName>ov2640.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\ov2640.c</FilePath>
</File>
<File>
<FileName>sccb.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\sccb.c</FilePath>
</File>
<File>
<FileName>delay.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\delay.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>examples</GroupName>
<Files>
<File>
<FileName>tflitemicro_person_detection.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\examples\tflitemicro_person_detection\tflitemicro_person_detection.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>::CMSIS</GroupName>
</Group>
</Groups>
</Target>
</Targets>
<RTE>
<apis/>
<components>
<component Cclass="CMSIS" Cgroup="CORE" Cvendor="ARM" Cversion="4.3.0" condition="CMSIS Core">
<package name="CMSIS" schemaVersion="1.3" url="http://www.keil.com/pack/" vendor="ARM" version="4.5.0"/>
<targetInfos>
<targetInfo name="TencentOS_tiny"/>
</targetInfos>
</component>
</components>
<files/>
</RTE>
<LayerInfo>
<Layers>
<Layer>
<LayName>&lt;Project Info&gt;</LayName>
<LayDesc></LayDesc>
<LayUrl></LayUrl>
<LayKeys></LayKeys>
<LayCat></LayCat>
<LayLic></LayLic>
<LayTarg>0</LayTarg>
<LayPrjMark>1</LayPrjMark>
</Layer>
</Layers>
</LayerInfo>
</Project>

View File

@@ -0,0 +1,450 @@
;*******************************************************************************
;* File Name : startup_stm32l496xx.s
;* Author : MCD Application Team
;* Description : STM32L496xx Ultra Low Power devices vector table for MDK-ARM toolchain.
;* This module performs:
;* - Set the initial SP
;* - Set the initial PC == Reset_Handler
;* - Set the vector table entries with the exceptions ISR address
;* - Branches to __main in the C library (which eventually
;* calls main()).
;* After Reset the Cortex-M4 processor is in Thread mode,
;* priority is Privileged, and the Stack is set to Main.
;* <<< Use Configuration Wizard in Context Menu >>>
;*******************************************************************************
;*
;* <h2><center>&copy; Copyright (c) 2017 STMicroelectronics.
;* All rights reserved.</center></h2>
;*
;* This software component is licensed by ST under BSD 3-Clause license,
;* the "License"; You may not use this file except in compliance with the
;* License. You may obtain a copy of the License at:
;* opensource.org/licenses/BSD-3-Clause
;*
;*******************************************************************************
;
; Amount of memory (in bytes) allocated for Stack
; Tailor this value to your application needs
; <h> Stack Configuration
; <o> Stack Size (in Bytes) <0x0-0xFFFFFFFF:8>
; </h>
Stack_Size EQU 0x400
AREA STACK, NOINIT, READWRITE, ALIGN=3
Stack_Mem SPACE Stack_Size
__initial_sp
; <h> Heap Configuration
; <o> Heap Size (in Bytes) <0x0-0xFFFFFFFF:8>
; </h>
Heap_Size EQU 0x200
AREA HEAP, NOINIT, READWRITE, ALIGN=3
__heap_base
Heap_Mem SPACE Heap_Size
__heap_limit
PRESERVE8
THUMB
; Vector Table Mapped to Address 0 at Reset
AREA RESET, DATA, READONLY
EXPORT __Vectors
EXPORT __Vectors_End
EXPORT __Vectors_Size
__Vectors DCD __initial_sp ; Top of Stack
DCD Reset_Handler ; Reset Handler
DCD NMI_Handler ; NMI Handler
DCD HardFault_Handler ; Hard Fault Handler
DCD MemManage_Handler ; MPU Fault Handler
DCD BusFault_Handler ; Bus Fault Handler
DCD UsageFault_Handler ; Usage Fault Handler
DCD 0 ; Reserved
DCD 0 ; Reserved
DCD 0 ; Reserved
DCD 0 ; Reserved
DCD SVC_Handler ; SVCall Handler
DCD DebugMon_Handler ; Debug Monitor Handler
DCD 0 ; Reserved
DCD PendSV_Handler ; PendSV Handler
DCD SysTick_Handler ; SysTick Handler
; External Interrupts
DCD WWDG_IRQHandler ; Window WatchDog
DCD PVD_PVM_IRQHandler ; PVD/PVM1/PVM2/PVM3/PVM4 through EXTI Line detection
DCD TAMP_STAMP_IRQHandler ; Tamper and TimeStamps through the EXTI line
DCD RTC_WKUP_IRQHandler ; RTC Wakeup through the EXTI line
DCD FLASH_IRQHandler ; FLASH
DCD RCC_IRQHandler ; RCC
DCD EXTI0_IRQHandler ; EXTI Line0
DCD EXTI1_IRQHandler ; EXTI Line1
DCD EXTI2_IRQHandler ; EXTI Line2
DCD EXTI3_IRQHandler ; EXTI Line3
DCD EXTI4_IRQHandler ; EXTI Line4
DCD DMA1_Channel1_IRQHandler ; DMA1 Channel 1
DCD DMA1_Channel2_IRQHandler ; DMA1 Channel 2
DCD DMA1_Channel3_IRQHandler ; DMA1 Channel 3
DCD DMA1_Channel4_IRQHandler ; DMA1 Channel 4
DCD DMA1_Channel5_IRQHandler ; DMA1 Channel 5
DCD DMA1_Channel6_IRQHandler ; DMA1 Channel 6
DCD DMA1_Channel7_IRQHandler ; DMA1 Channel 7
DCD ADC1_2_IRQHandler ; ADC1, ADC2
DCD CAN1_TX_IRQHandler ; CAN1 TX
DCD CAN1_RX0_IRQHandler ; CAN1 RX0
DCD CAN1_RX1_IRQHandler ; CAN1 RX1
DCD CAN1_SCE_IRQHandler ; CAN1 SCE
DCD EXTI9_5_IRQHandler ; External Line[9:5]s
DCD TIM1_BRK_TIM15_IRQHandler ; TIM1 Break and TIM15
DCD TIM1_UP_TIM16_IRQHandler ; TIM1 Update and TIM16
DCD TIM1_TRG_COM_TIM17_IRQHandler ; TIM1 Trigger and Commutation and TIM17
DCD TIM1_CC_IRQHandler ; TIM1 Capture Compare
DCD TIM2_IRQHandler ; TIM2
DCD TIM3_IRQHandler ; TIM3
DCD TIM4_IRQHandler ; TIM4
DCD I2C1_EV_IRQHandler ; I2C1 Event
DCD I2C1_ER_IRQHandler ; I2C1 Error
DCD I2C2_EV_IRQHandler ; I2C2 Event
DCD I2C2_ER_IRQHandler ; I2C2 Error
DCD SPI1_IRQHandler ; SPI1
DCD SPI2_IRQHandler ; SPI2
DCD USART1_IRQHandler ; USART1
DCD USART2_IRQHandler ; USART2
DCD USART3_IRQHandler ; USART3
DCD EXTI15_10_IRQHandler ; External Line[15:10]
DCD RTC_Alarm_IRQHandler ; RTC Alarm (A and B) through EXTI Line
DCD DFSDM1_FLT3_IRQHandler ; DFSDM1 Filter 3 global Interrupt
DCD TIM8_BRK_IRQHandler ; TIM8 Break Interrupt
DCD TIM8_UP_IRQHandler ; TIM8 Update Interrupt
DCD TIM8_TRG_COM_IRQHandler ; TIM8 Trigger and Commutation Interrupt
DCD TIM8_CC_IRQHandler ; TIM8 Capture Compare Interrupt
DCD ADC3_IRQHandler ; ADC3 global Interrupt
DCD FMC_IRQHandler ; FMC
DCD SDMMC1_IRQHandler ; SDMMC1
DCD TIM5_IRQHandler ; TIM5
DCD SPI3_IRQHandler ; SPI3
DCD UART4_IRQHandler ; UART4
DCD UART5_IRQHandler ; UART5
DCD TIM6_DAC_IRQHandler ; TIM6 and DAC1&2 underrun errors
DCD TIM7_IRQHandler ; TIM7
DCD DMA2_Channel1_IRQHandler ; DMA2 Channel 1
DCD DMA2_Channel2_IRQHandler ; DMA2 Channel 2
DCD DMA2_Channel3_IRQHandler ; DMA2 Channel 3
DCD DMA2_Channel4_IRQHandler ; DMA2 Channel 4
DCD DMA2_Channel5_IRQHandler ; DMA2 Channel 5
DCD DFSDM1_FLT0_IRQHandler ; DFSDM1 Filter 0 global Interrupt
DCD DFSDM1_FLT1_IRQHandler ; DFSDM1 Filter 1 global Interrupt
DCD DFSDM1_FLT2_IRQHandler ; DFSDM1 Filter 2 global Interrupt
DCD COMP_IRQHandler ; COMP Interrupt
DCD LPTIM1_IRQHandler ; LP TIM1 interrupt
DCD LPTIM2_IRQHandler ; LP TIM2 interrupt
DCD OTG_FS_IRQHandler ; USB OTG FS
DCD DMA2_Channel6_IRQHandler ; DMA2 Channel 6
DCD DMA2_Channel7_IRQHandler ; DMA2 Channel 7
DCD LPUART1_IRQHandler ; LP UART1 interrupt
DCD QUADSPI_IRQHandler ; Quad SPI global interrupt
DCD I2C3_EV_IRQHandler ; I2C3 event
DCD I2C3_ER_IRQHandler ; I2C3 error
DCD SAI1_IRQHandler ; Serial Audio Interface 1 global interrupt
DCD SAI2_IRQHandler ; Serial Audio Interface 2 global interrupt
DCD SWPMI1_IRQHandler ; Serial Wire Interface 1 global interrupt
DCD TSC_IRQHandler ; Touch Sense Controller global interrupt
DCD LCD_IRQHandler ; LCD global interrupt
DCD 0 ; Reserved
DCD RNG_IRQHandler ; RNG global interrupt
DCD FPU_IRQHandler ; FPU
DCD CRS_IRQHandler ; CRS error
DCD I2C4_EV_IRQHandler ; I2C4 event
DCD I2C4_ER_IRQHandler ; I2C4 error
DCD DCMI_IRQHandler ; DCMI global interrupt
DCD CAN2_TX_IRQHandler ; CAN2 TX
DCD CAN2_RX0_IRQHandler ; CAN2 RX0
DCD CAN2_RX1_IRQHandler ; CAN2 RX1
DCD CAN2_SCE_IRQHandler ; CAN2 SCE
DCD DMA2D_IRQHandler ; DMA2D global interrupt
__Vectors_End
__Vectors_Size EQU __Vectors_End - __Vectors
AREA |.text|, CODE, READONLY
; Reset handler
Reset_Handler PROC
EXPORT Reset_Handler [WEAK]
IMPORT SystemInit
IMPORT __main
LDR R0, =SystemInit
BLX R0
LDR R0, =__main
BX R0
ENDP
; Dummy Exception Handlers (infinite loops which can be modified)
NMI_Handler PROC
EXPORT NMI_Handler [WEAK]
B .
ENDP
HardFault_Handler\
PROC
EXPORT HardFault_Handler [WEAK]
B .
ENDP
MemManage_Handler\
PROC
EXPORT MemManage_Handler [WEAK]
B .
ENDP
BusFault_Handler\
PROC
EXPORT BusFault_Handler [WEAK]
B .
ENDP
UsageFault_Handler\
PROC
EXPORT UsageFault_Handler [WEAK]
B .
ENDP
SVC_Handler PROC
EXPORT SVC_Handler [WEAK]
B .
ENDP
DebugMon_Handler\
PROC
EXPORT DebugMon_Handler [WEAK]
B .
ENDP
PendSV_Handler PROC
EXPORT PendSV_Handler [WEAK]
B .
ENDP
SysTick_Handler PROC
EXPORT SysTick_Handler [WEAK]
B .
ENDP
Default_Handler PROC
EXPORT WWDG_IRQHandler [WEAK]
EXPORT PVD_PVM_IRQHandler [WEAK]
EXPORT TAMP_STAMP_IRQHandler [WEAK]
EXPORT RTC_WKUP_IRQHandler [WEAK]
EXPORT FLASH_IRQHandler [WEAK]
EXPORT RCC_IRQHandler [WEAK]
EXPORT EXTI0_IRQHandler [WEAK]
EXPORT EXTI1_IRQHandler [WEAK]
EXPORT EXTI2_IRQHandler [WEAK]
EXPORT EXTI3_IRQHandler [WEAK]
EXPORT EXTI4_IRQHandler [WEAK]
EXPORT DMA1_Channel1_IRQHandler [WEAK]
EXPORT DMA1_Channel2_IRQHandler [WEAK]
EXPORT DMA1_Channel3_IRQHandler [WEAK]
EXPORT DMA1_Channel4_IRQHandler [WEAK]
EXPORT DMA1_Channel5_IRQHandler [WEAK]
EXPORT DMA1_Channel6_IRQHandler [WEAK]
EXPORT DMA1_Channel7_IRQHandler [WEAK]
EXPORT ADC1_2_IRQHandler [WEAK]
EXPORT CAN1_TX_IRQHandler [WEAK]
EXPORT CAN1_RX0_IRQHandler [WEAK]
EXPORT CAN1_RX1_IRQHandler [WEAK]
EXPORT CAN1_SCE_IRQHandler [WEAK]
EXPORT EXTI9_5_IRQHandler [WEAK]
EXPORT TIM1_BRK_TIM15_IRQHandler [WEAK]
EXPORT TIM1_UP_TIM16_IRQHandler [WEAK]
EXPORT TIM1_TRG_COM_TIM17_IRQHandler [WEAK]
EXPORT TIM1_CC_IRQHandler [WEAK]
EXPORT TIM2_IRQHandler [WEAK]
EXPORT TIM3_IRQHandler [WEAK]
EXPORT TIM4_IRQHandler [WEAK]
EXPORT I2C1_EV_IRQHandler [WEAK]
EXPORT I2C1_ER_IRQHandler [WEAK]
EXPORT I2C2_EV_IRQHandler [WEAK]
EXPORT I2C2_ER_IRQHandler [WEAK]
EXPORT SPI1_IRQHandler [WEAK]
EXPORT SPI2_IRQHandler [WEAK]
EXPORT USART1_IRQHandler [WEAK]
EXPORT USART2_IRQHandler [WEAK]
EXPORT USART3_IRQHandler [WEAK]
EXPORT EXTI15_10_IRQHandler [WEAK]
EXPORT RTC_Alarm_IRQHandler [WEAK]
EXPORT DFSDM1_FLT3_IRQHandler [WEAK]
EXPORT TIM8_BRK_IRQHandler [WEAK]
EXPORT TIM8_UP_IRQHandler [WEAK]
EXPORT TIM8_TRG_COM_IRQHandler [WEAK]
EXPORT TIM8_CC_IRQHandler [WEAK]
EXPORT ADC3_IRQHandler [WEAK]
EXPORT FMC_IRQHandler [WEAK]
EXPORT SDMMC1_IRQHandler [WEAK]
EXPORT TIM5_IRQHandler [WEAK]
EXPORT SPI3_IRQHandler [WEAK]
EXPORT UART4_IRQHandler [WEAK]
EXPORT UART5_IRQHandler [WEAK]
EXPORT TIM6_DAC_IRQHandler [WEAK]
EXPORT TIM7_IRQHandler [WEAK]
EXPORT DMA2_Channel1_IRQHandler [WEAK]
EXPORT DMA2_Channel2_IRQHandler [WEAK]
EXPORT DMA2_Channel3_IRQHandler [WEAK]
EXPORT DMA2_Channel4_IRQHandler [WEAK]
EXPORT DMA2_Channel5_IRQHandler [WEAK]
EXPORT DFSDM1_FLT0_IRQHandler [WEAK]
EXPORT DFSDM1_FLT1_IRQHandler [WEAK]
EXPORT DFSDM1_FLT2_IRQHandler [WEAK]
EXPORT COMP_IRQHandler [WEAK]
EXPORT LPTIM1_IRQHandler [WEAK]
EXPORT LPTIM2_IRQHandler [WEAK]
EXPORT OTG_FS_IRQHandler [WEAK]
EXPORT DMA2_Channel6_IRQHandler [WEAK]
EXPORT DMA2_Channel7_IRQHandler [WEAK]
EXPORT LPUART1_IRQHandler [WEAK]
EXPORT QUADSPI_IRQHandler [WEAK]
EXPORT I2C3_EV_IRQHandler [WEAK]
EXPORT I2C3_ER_IRQHandler [WEAK]
EXPORT SAI1_IRQHandler [WEAK]
EXPORT SAI2_IRQHandler [WEAK]
EXPORT SWPMI1_IRQHandler [WEAK]
EXPORT TSC_IRQHandler [WEAK]
EXPORT LCD_IRQHandler [WEAK]
EXPORT RNG_IRQHandler [WEAK]
EXPORT FPU_IRQHandler [WEAK]
EXPORT CRS_IRQHandler [WEAK]
EXPORT I2C4_EV_IRQHandler [WEAK]
EXPORT I2C4_ER_IRQHandler [WEAK]
EXPORT DCMI_IRQHandler [WEAK]
EXPORT CAN2_TX_IRQHandler [WEAK]
EXPORT CAN2_RX0_IRQHandler [WEAK]
EXPORT CAN2_RX1_IRQHandler [WEAK]
EXPORT CAN2_SCE_IRQHandler [WEAK]
EXPORT DMA2D_IRQHandler [WEAK]
WWDG_IRQHandler
PVD_PVM_IRQHandler
TAMP_STAMP_IRQHandler
RTC_WKUP_IRQHandler
FLASH_IRQHandler
RCC_IRQHandler
EXTI0_IRQHandler
EXTI1_IRQHandler
EXTI2_IRQHandler
EXTI3_IRQHandler
EXTI4_IRQHandler
DMA1_Channel1_IRQHandler
DMA1_Channel2_IRQHandler
DMA1_Channel3_IRQHandler
DMA1_Channel4_IRQHandler
DMA1_Channel5_IRQHandler
DMA1_Channel6_IRQHandler
DMA1_Channel7_IRQHandler
ADC1_2_IRQHandler
CAN1_TX_IRQHandler
CAN1_RX0_IRQHandler
CAN1_RX1_IRQHandler
CAN1_SCE_IRQHandler
EXTI9_5_IRQHandler
TIM1_BRK_TIM15_IRQHandler
TIM1_UP_TIM16_IRQHandler
TIM1_TRG_COM_TIM17_IRQHandler
TIM1_CC_IRQHandler
TIM2_IRQHandler
TIM3_IRQHandler
TIM4_IRQHandler
I2C1_EV_IRQHandler
I2C1_ER_IRQHandler
I2C2_EV_IRQHandler
I2C2_ER_IRQHandler
SPI1_IRQHandler
SPI2_IRQHandler
USART1_IRQHandler
USART2_IRQHandler
USART3_IRQHandler
EXTI15_10_IRQHandler
RTC_Alarm_IRQHandler
DFSDM1_FLT3_IRQHandler
TIM8_BRK_IRQHandler
TIM8_UP_IRQHandler
TIM8_TRG_COM_IRQHandler
TIM8_CC_IRQHandler
ADC3_IRQHandler
FMC_IRQHandler
SDMMC1_IRQHandler
TIM5_IRQHandler
SPI3_IRQHandler
UART4_IRQHandler
UART5_IRQHandler
TIM6_DAC_IRQHandler
TIM7_IRQHandler
DMA2_Channel1_IRQHandler
DMA2_Channel2_IRQHandler
DMA2_Channel3_IRQHandler
DMA2_Channel4_IRQHandler
DMA2_Channel5_IRQHandler
DFSDM1_FLT0_IRQHandler
DFSDM1_FLT1_IRQHandler
DFSDM1_FLT2_IRQHandler
COMP_IRQHandler
LPTIM1_IRQHandler
LPTIM2_IRQHandler
OTG_FS_IRQHandler
DMA2_Channel6_IRQHandler
DMA2_Channel7_IRQHandler
LPUART1_IRQHandler
QUADSPI_IRQHandler
I2C3_EV_IRQHandler
I2C3_ER_IRQHandler
SAI1_IRQHandler
SAI2_IRQHandler
SWPMI1_IRQHandler
TSC_IRQHandler
LCD_IRQHandler
RNG_IRQHandler
FPU_IRQHandler
CRS_IRQHandler
I2C4_EV_IRQHandler
I2C4_ER_IRQHandler
DCMI_IRQHandler
CAN2_TX_IRQHandler
CAN2_RX0_IRQHandler
CAN2_RX1_IRQHandler
CAN2_SCE_IRQHandler
DMA2D_IRQHandler
B .
ENDP
ALIGN
;*******************************************************************************
; User Stack and Heap initialization
;*******************************************************************************
IF :DEF:__MICROLIB
EXPORT __initial_sp
EXPORT __heap_base
EXPORT __heap_limit
ELSE
IMPORT __use_two_region_memory
EXPORT __user_initial_stackheap
__user_initial_stackheap
LDR R0, = Heap_Mem
LDR R1, =(Stack_Mem + Stack_Size)
LDR R2, = (Heap_Mem + Heap_Size)
LDR R3, = Stack_Mem
BX LR
ALIGN
ENDIF
END
;************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE*****

View File

@@ -0,0 +1,70 @@
// File: STM32L43x_44x_45x_46x.dbgconf
// Version: 1.0.0
// Note: refer to STM32L43xxx STM32L44xxx STM32L45xxx STM32L46xxx Reference manual (RM0394)
// refer to STM32L431xx STM32L432xx STM32L433xx STM32L442xx STM32L443xx STM32L451xx STM32L452xx STM32L462xx datasheets
// <<< Use Configuration Wizard in Context Menu >>>
// <h> Debug MCU configuration register (DBGMCU_CR)
// <o.2> DBG_STANDBY <i> Debug Standby mode
// <o.1> DBG_STOP <i> Debug Stop mode
// <o.0> DBG_SLEEP <i> Debug Sleep mode
// </h>
DbgMCU_CR = 0x00000007;
// <h> Debug MCU APB1 freeze register1 (DBGMCU_APB1FZR1)
// <i> Reserved bits must be kept at reset value
// <o.31> DBG_LPTIM1_STOP <i> LPTIM1 counter stopped when core is halted
// <o.25> DBG_CAN1_STOP <i> bxCAN1 stopped when core is halted
// <o.23> DBG_I2C3_STOP <i> I2C3 SMBUS timeout counter stopped when core is halted
// <o.22> DBG_I2C2_STOP <i> I2C2 SMBUS timeout counter stopped when core is halted
// <o.21> DBG_I2C1_STOP <i> I2C1 SMBUS timeout counter stopped when core is halted
// <o.12> DBG_IWDG_STOP <i> Independent watchdog counter stopped when core is halted
// <o.11> DBG_WWDG_STOP <i> Window watchdog counter stopped when core is halted
// <o.10> DBG_RTC_STOP <i> RTC counter stopped when core is halted
// <o.5> DBG_TIM7_STOP <i> TIM7 counter stopped when core is halted
// <o.4> DBG_TIM6_STOP <i> TIM6 counter stopped when core is halted
// <o.0> DBG_TIM2_STOP <i> TIM2 counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz1 = 0x00000000;
// <h> Debug MCU APB1 freeze register 2 (DBGMCU_APB1FZR2)
// <i> Reserved bits must be kept at reset value
// <o.5> DBG_LPTIM2_STOP <i> LPTIM2 counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz2 = 0x00000000;
// <h> Debug MCU APB2 freeze register (DBGMCU_APB2FZR)
// <i> Reserved bits must be kept at reset value
// <o.17> DBG_TIM16_STOP <i> TIM16 counter stopped when core is halted
// <o.16> DBG_TIM15_STOP <i> TIM15 counter stopped when core is halted
// <o.11> DBG_TIM1_STOP <i> TIM1 counter stopped when core is halted
// </h>
DbgMCU_APB2_Fz = 0x00000000;
// <h> TPIU Pin Routing (TRACECLK fixed on Pin PE2)
// <i> TRACECLK: Pin PE2
// <o1> TRACED0
// <i> ETM Trace Data 0
// <0x00040003=> Pin PE3
// <0x00020001=> Pin PC1
// <o2> TRACED1
// <i> ETM Trace Data 1
// <0x00040004=> Pin PE4
// <0x0002000A=> Pin PC10
// <o3> TRACED2
// <i> ETM Trace Data 2
// <0x00040005=> Pin PE5
// <0x00030002=> Pin PD2
// <o4> TRACED3
// <i> ETM Trace Data 3
// <0x00040006=> Pin PE6
// <0x0002000C=> Pin PC12
// </h>
TraceClk_Pin = 0x00040002;
TraceD0_Pin = 0x00040003;
TraceD1_Pin = 0x00040004;
TraceD2_Pin = 0x00040005;
TraceD3_Pin = 0x00040006;
// <<< end of configuration section >>>

View File

@@ -0,0 +1,21 @@
/*
* Auto generated Run-Time-Environment Configuration File
* *** Do not modify ! ***
*
* Project: 'TencentOS_tiny'
* Target: 'TencentOS_tiny'
*/
#ifndef RTE_COMPONENTS_H
#define RTE_COMPONENTS_H
/*
* Define the Device Header File:
*/
#define CMSIS_device_header "stm32l4xx.h"
#endif /* RTE_COMPONENTS_H */

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,70 @@
// File: STM32L43x_44x_45x_46x.dbgconf
// Version: 1.0.0
// Note: refer to STM32L43xxx STM32L44xxx STM32L45xxx STM32L46xxx Reference manual (RM0394)
// refer to STM32L431xx STM32L432xx STM32L433xx STM32L442xx STM32L443xx STM32L451xx STM32L452xx STM32L462xx datasheets
// <<< Use Configuration Wizard in Context Menu >>>
// <h> Debug MCU configuration register (DBGMCU_CR)
// <o.2> DBG_STANDBY <i> Debug Standby mode
// <o.1> DBG_STOP <i> Debug Stop mode
// <o.0> DBG_SLEEP <i> Debug Sleep mode
// </h>
DbgMCU_CR = 0x00000007;
// <h> Debug MCU APB1 freeze register1 (DBGMCU_APB1FZR1)
// <i> Reserved bits must be kept at reset value
// <o.31> DBG_LPTIM1_STOP <i> LPTIM1 counter stopped when core is halted
// <o.25> DBG_CAN1_STOP <i> bxCAN1 stopped when core is halted
// <o.23> DBG_I2C3_STOP <i> I2C3 SMBUS timeout counter stopped when core is halted
// <o.22> DBG_I2C2_STOP <i> I2C2 SMBUS timeout counter stopped when core is halted
// <o.21> DBG_I2C1_STOP <i> I2C1 SMBUS timeout counter stopped when core is halted
// <o.12> DBG_IWDG_STOP <i> Independent watchdog counter stopped when core is halted
// <o.11> DBG_WWDG_STOP <i> Window watchdog counter stopped when core is halted
// <o.10> DBG_RTC_STOP <i> RTC counter stopped when core is halted
// <o.5> DBG_TIM7_STOP <i> TIM7 counter stopped when core is halted
// <o.4> DBG_TIM6_STOP <i> TIM6 counter stopped when core is halted
// <o.0> DBG_TIM2_STOP <i> TIM2 counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz1 = 0x00000000;
// <h> Debug MCU APB1 freeze register 2 (DBGMCU_APB1FZR2)
// <i> Reserved bits must be kept at reset value
// <o.5> DBG_LPTIM2_STOP <i> LPTIM2 counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz2 = 0x00000000;
// <h> Debug MCU APB2 freeze register (DBGMCU_APB2FZR)
// <i> Reserved bits must be kept at reset value
// <o.17> DBG_TIM16_STOP <i> TIM16 counter stopped when core is halted
// <o.16> DBG_TIM15_STOP <i> TIM15 counter stopped when core is halted
// <o.11> DBG_TIM1_STOP <i> TIM1 counter stopped when core is halted
// </h>
DbgMCU_APB2_Fz = 0x00000000;
// <h> TPIU Pin Routing (TRACECLK fixed on Pin PE2)
// <i> TRACECLK: Pin PE2
// <o1> TRACED0
// <i> ETM Trace Data 0
// <0x00040003=> Pin PE3
// <0x00020001=> Pin PC1
// <o2> TRACED1
// <i> ETM Trace Data 1
// <0x00040004=> Pin PE4
// <0x0002000A=> Pin PC10
// <o3> TRACED2
// <i> ETM Trace Data 2
// <0x00040005=> Pin PE5
// <0x00030002=> Pin PD2
// <o4> TRACED3
// <i> ETM Trace Data 3
// <0x00040006=> Pin PE6
// <0x0002000C=> Pin PC12
// </h>
TraceClk_Pin = 0x00040002;
TraceD0_Pin = 0x00040003;
TraceD1_Pin = 0x00040004;
TraceD2_Pin = 0x00040005;
TraceD3_Pin = 0x00040006;
// <<< end of configuration section >>>

View File

@@ -0,0 +1,21 @@
/*
* Auto generated Run-Time-Environment Configuration File
* *** Do not modify ! ***
*
* Project: 'TencentOS_tiny'
* Target: 'TencentOS_tiny'
*/
#ifndef RTE_COMPONENTS_H
#define RTE_COMPONENTS_H
/*
* Define the Device Header File:
*/
#define CMSIS_device_header "stm32l4xx.h"
#endif /* RTE_COMPONENTS_H */

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,139 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_PUBLIC_VERSION_H_
#define TENSORFLOW_CORE_PUBLIC_VERSION_H_
// TensorFlow uses semantic versioning, see http://semver.org/.
// Also update tensorflow/tensorflow.bzl and
// tensorflow/tools/pip_package/setup.py
#define TF_MAJOR_VERSION 2
#define TF_MINOR_VERSION 4
#define TF_PATCH_VERSION 0
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")
#define TF_VERSION_SUFFIX ""
#define TF_STR_HELPER(x) #x
#define TF_STR(x) TF_STR_HELPER(x)
// e.g. "0.5.0" or "0.6.0-alpha".
#define TF_VERSION_STRING \
(TF_STR(TF_MAJOR_VERSION) "." TF_STR(TF_MINOR_VERSION) "." TF_STR( \
TF_PATCH_VERSION) TF_VERSION_SUFFIX)
// GraphDef compatibility versions (the versions field in graph.proto).
//
// Each graph has producer and min_consumer versions, and each
// consumer has its own version and a min_producer. In addition, graphs can
// mark specific consumer versions as bad (to prevent bugs from executing).
// A consumer will execute a graph if the consumer's version is at least the
// graph's min_consumer, the graph's producer version is at least the consumer's
// min_producer, and the consumer version isn't specifically disallowed by the
// graph.
//
// By default, newly created graphs have producer version TF_GRAPH_DEF_VERSION
// min_consumer TF_GRAPH_DEF_MIN_CONSUMER, and no other bad consumer versions.
//
// Version history:
//
// 0. Graphs created before GraphDef versioning
// 1. First real version (2dec2015)
// 2. adjust_contrast only takes float, doesn't perform clamping (11dec2015)
// 3. Remove TileGrad, since it was equivalent to reduce_sum (30dec2015)
// 4. When support for this version is removed, we can safely make AttrValue
// parsing more strict with respect to empty list values (see
// 111635679, 7jan2016).
// 5. Graphs are wholly-validated during Session::Create() (7jan2016).
// 6. TensorFlow is scalar strict within Google (27jan2016).
// 7. Remove TopK in favor of TopKV2 (5feb2016).
// 8. Replace RandomCrop from C++ with pure Python (5feb2016).
// 9. Deprecate batch_norm_with_global_normalization (16feb2016).
// 10. Deprecate conv3d_backprop_{filter,input} (10jun2016).
// 11. Deprecate {batch}_self_adjoint_eig (3aug2016).
// 12. Graph consumers understand the node_def field of FunctionDef (22aug2016).
// 13. Deprecate multiple batch linear algebra ops (9sep2016).
// 14. Deprecate batch_matrix_* ops. (10sep2016).
// 15. Deprecate batch_fft_* ops. (14sep2016).
// 16. Deprecate tensor_array (v1) ops in favor of v2 (10nov2016).
// 17. Deprecate inv (11nov2016).
// 17. Expose reverse_v2 (10nov2016)
// 18. Add VariableV2 (30nov2016)
// 19. Deprecated ops created by models moved out of core SkipGram, NegTrain.
// (08dec2016)
// 20. Catch all version 1.0 changes to Python API generation. SplitV is now
// used for tf.split, ReverseV2 is now used by tf.reverse, ConcatV2 is
// now used by tf.concat. Graphs use flooring
// division and mod semantics. TensorArrayV3. (12dec2016)
// Also considered the version for when it is required for reduction
// ops' indices to be scalar or vector, and not higher rank.
// Some earlier graph def versions allowed this.
// 21. Dropped FunctionDef.Node support, switched to node_def introduced
// in version 12. (11jan2017)
// 22. Placeholder now can specify and enforce scalar and partial
// shapes, particularly when restoring a graph from GraphDef
// produced at version 22 or later. (04/10/2016)
// 23. Remove NonMaxSuppression in favor of NonMaxSuppressionV2.
// 24. Deprecate lookup ops (v1) ops in favor of v2 (30may2017)
// 25. Deprecate stack (v1) ops in favor of v2 (2017/6/15).
// 25. Deprecate RandomPoisson (v1) ops in favor of v2 (2017/10/25).
// 26. Add a bool 'stripped_default_attrs' to MetaInfoDef indicating
// whether default-valued attrs have been stripped from the nodes in the
// GraphDef. (7dec2017)
// 27. Deprecate TensorArray ops v2 in favor of v3 and deprecated io_ops
// deprecated in favor of V2 ops. (2018/01/23)
// 28. Deprecate MatrixExponential op in favor of Python implementation.
// (2018/08/21).
// (2019/02/15). Added `control_ret` field to FunctionDef proto, and
// `control_output` field to OpDef proto.
// 29. Deprecate StatefulStandardNormal op in favor of StatefulStandardNormalV2.
// (2019/03/25).
// (2019/04/17). Added `arg_attr` field to FunctionDefProto.
// 30. (2019/05/09) First date based GraphDef version. GraphDef
// versions advance by 1 each day after this point.
#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
#define TF_GRAPH_DEF_VERSION 485 // Updated: 2020/8/6
// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
//
// The checkpoint versions have the same semantics as GraphDef versions, but the
// numbering scheme is separate. We have no plans to ever deprecate checkpoint
// versions, but it's good to have this in place in case we ever need to.
//
// Version history:
//
// 0. Checkpoints saved before checkpoint versioning.
// 1. First real version (10feb2015).
#define TF_CHECKPOINT_VERSION_MIN_PRODUCER 0
#define TF_CHECKPOINT_VERSION_MIN_CONSUMER 0
#define TF_CHECKPOINT_VERSION 1
/// Version query functions (defined in generated version_info.cc)
// Host compiler version (declared elsewhere to be __VERSION__)
extern const char* tf_compiler_version();
// The git commit designator when tensorflow was built
// If no git repository, this will be "internal".
extern const char* tf_git_version();
// Value of the _GLIBCXX_USE_CXX11_ABI flag, or 0 if it's not set.
extern int tf_cxx11_abi_flag();
// Returns 1 if build is monolithic, or 0 otherwise.
extern int tf_monolithic_build();
#endif // TENSORFLOW_CORE_PUBLIC_VERSION_H_

View File

@@ -0,0 +1,472 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
// number of dimensions.
#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
// TODO(aselle): Consider using "if this then that" for testing.
// Useful placeholder to put in otherwise empty structs to avoid size warnings.
typedef struct {
char dummy;
} EmptyStructPlaceholder;
// IMPORTANT: All new members of structs must be added at the end to ensure
// backwards compatibility.
// Possible padding types (for convolutions)
typedef enum {
kTfLitePaddingUnknown = 0,
kTfLitePaddingSame,
kTfLitePaddingValid,
} TfLitePadding;
typedef enum {
kTfLiteMirrorPaddingUnknown = 0,
kTfLiteMirrorPaddingReflect,
kTfLiteMirrorPaddingSymmetric,
} TfLiteMirrorPaddingMode;
// TODO(b/130259536): We should move this out of builtin_op_data.
typedef struct {
int width;
int height;
int width_offset;
int height_offset;
} TfLitePaddingValues;
typedef struct {
TfLiteMirrorPaddingMode mode;
} TfLiteMirrorPaddingParams;
// Possible fused activation functions.
// TODO(aselle): rename to TfLiteActivation
typedef enum {
kTfLiteActNone = 0,
kTfLiteActRelu,
kTfLiteActReluN1To1, // min(max(-1, x), 1)
kTfLiteActRelu1 = kTfLiteActReluN1To1, // kTfLiteActRelu1 will be deprecated.
kTfLiteActRelu6, // min(max(0, x), 6)
kTfLiteActTanh,
kTfLiteActSignBit,
kTfLiteActSigmoid,
} TfLiteFusedActivation;
typedef struct {
// Parameters for CONV_2D version 1.
TfLitePadding padding;
int stride_width;
int stride_height;
TfLiteFusedActivation activation;
// Parameters for CONV_2D version 2.
// Note: Version 2 supports dilation values not equal to 1.
int dilation_width_factor;
int dilation_height_factor;
} TfLiteConvParams;
typedef struct {
TfLitePadding padding;
int stride_width;
int stride_height;
int filter_width;
int filter_height;
TfLiteFusedActivation activation;
struct {
TfLitePaddingValues padding;
} computed;
} TfLitePoolParams;
typedef struct {
// Parameters for DepthwiseConv version 1 or above.
TfLitePadding padding;
int stride_width;
int stride_height;
// `depth_multiplier` is redundant. It's used by CPU kernels in
// TensorFlow 2.0 or below, but ignored in versions above.
//
// The information can be deduced from the shape of input and the shape of
// weights. Since the TFLiteConverter toolchain doesn't support partially
// specified shapes, relying on `depth_multiplier` stops us from supporting
// graphs with dynamic shape tensors.
//
// Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
// field.
int depth_multiplier;
TfLiteFusedActivation activation;
// Parameters for DepthwiseConv version 2 or above.
int dilation_width_factor;
int dilation_height_factor;
} TfLiteDepthwiseConvParams;
typedef struct {
int rank;
TfLiteFusedActivation activation;
// Parameter for SVDF version 4.
bool asymmetric_quantize_inputs;
} TfLiteSVDFParams;
typedef struct {
TfLiteFusedActivation activation;
// Parameter for RNN version 3.
bool asymmetric_quantize_inputs;
} TfLiteRNNParams;
typedef struct {
bool time_major;
TfLiteFusedActivation activation;
// Parameter for Sequence RNN version 3.
bool asymmetric_quantize_inputs;
} TfLiteSequenceRNNParams;
typedef struct {
bool time_major;
TfLiteFusedActivation activation;
bool merge_outputs;
// Parameter for Bidirectional RNN verison 3.
bool asymmetric_quantize_inputs;
} TfLiteBidirectionalSequenceRNNParams;
typedef enum {
kTfLiteFullyConnectedWeightsFormatDefault = 0,
kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
} TfLiteFullyConnectedWeightsFormat;
typedef struct {
// Parameters for FullyConnected version 1 or above.
TfLiteFusedActivation activation;
// Parameters for FullyConnected version 2 or above.
TfLiteFullyConnectedWeightsFormat weights_format;
// Parameters for FullyConnected version 5 or above.
// If set to true, then the number of dimensions in the input and the output
// tensors are the same. Furthermore, all but the last dimension of the input
// and output shapes will be equal.
bool keep_num_dims;
// Parameters for FullyConnected version 7 or above.
// If set to true and the weights are quantized, then non constant inputs
// are quantized at evaluation time with asymmetric quantization.
bool asymmetric_quantize_inputs;
} TfLiteFullyConnectedParams;
typedef enum {
kTfLiteLshProjectionUnknown = 0,
kTfLiteLshProjectionSparse = 1,
kTfLiteLshProjectionDense = 2,
} TfLiteLSHProjectionType;
typedef struct {
TfLiteLSHProjectionType type;
} TfLiteLSHProjectionParams;
typedef struct {
float beta;
} TfLiteSoftmaxParams;
typedef struct {
int axis;
TfLiteFusedActivation activation;
} TfLiteConcatenationParams;
typedef struct {
TfLiteFusedActivation activation;
// Parameter added for the version 4.
bool pot_scale_int16;
} TfLiteAddParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteSpaceToBatchNDParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteBatchToSpaceNDParams;
typedef struct {
bool adj_x;
bool adj_y;
} TfLiteBatchMatMulParams;
typedef struct {
TfLiteFusedActivation activation;
} TfLiteMulParams;
typedef struct {
TfLiteFusedActivation activation;
// Parameter added for the version 5.
bool pot_scale_int16;
} TfLiteSubParams;
typedef struct {
TfLiteFusedActivation activation;
} TfLiteDivParams;
typedef struct {
TfLiteFusedActivation activation;
} TfLiteL2NormParams;
typedef struct {
int radius;
float bias;
float alpha;
float beta;
} TfLiteLocalResponseNormParams;
typedef enum {
kTfLiteLSTMFullKernel = 0,
kTfLiteLSTMBasicKernel
} TfLiteLSTMKernelType;
typedef struct {
// Parameters for LSTM version 1.
TfLiteFusedActivation activation;
float cell_clip;
float proj_clip;
// Parameters for LSTM version 2.
// kTfLiteLSTMBasicKernel is only supported in version 2 or above.
TfLiteLSTMKernelType kernel_type;
// Parameters for LSTM version 4.
bool asymmetric_quantize_inputs;
} TfLiteLSTMParams;
typedef struct {
// Parameters needed for the underlying LSTM.
TfLiteFusedActivation activation;
float cell_clip;
float proj_clip;
// If set to true then the first dimension is time, otherwise batch.
bool time_major;
// Parameter for unidirectional sequence RNN version 3.
bool asymmetric_quantize_inputs;
} TfLiteUnidirectionalSequenceLSTMParams;
typedef struct {
// Parameters supported by version 1:
// Parameters inherited for the LSTM kernel.
TfLiteFusedActivation activation;
float cell_clip;
float proj_clip;
// If true, store the outputs of both directions in the first output.
bool merge_outputs;
// Parameters supported by version 2:
// If set to true then the first dimension is time, otherwise batch.
bool time_major;
// Parameters supported by version 4:
// If set to true, then hybrid ops use asymmetric quantization for inputs.
bool asymmetric_quantize_inputs;
} TfLiteBidirectionalSequenceLSTMParams;
typedef struct {
bool align_corners;
// half_pixel_centers assumes pixels are of half the actual dimensions, and
// yields more accurate resizes. Corresponds to the same argument for the
// original TensorFlow op in TF2.0.
bool half_pixel_centers;
} TfLiteResizeBilinearParams;
typedef struct {
bool align_corners;
bool half_pixel_centers;
} TfLiteResizeNearestNeighborParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLitePadParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLitePadV2Params;
typedef struct {
// TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
// For now we will fix the maximum possible number of dimensions.
int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
int num_dimensions;
} TfLiteReshapeParams;
typedef struct {
int ngram_size;
int max_skip_size;
bool include_all_ngrams;
} TfLiteSkipGramParams;
typedef struct {
int block_size;
} TfLiteSpaceToDepthParams;
typedef struct {
int block_size;
} TfLiteDepthToSpaceParams;
typedef struct {
TfLiteType in_data_type;
TfLiteType out_data_type;
} TfLiteCastParams;
typedef enum {
kTfLiteCombinerTypeSum = 0,
kTfLiteCombinerTypeMean = 1,
kTfLiteCombinerTypeSqrtn = 2,
} TfLiteCombinerType;
typedef struct {
TfLiteCombinerType combiner;
} TfLiteEmbeddingLookupSparseParams;
typedef struct {
int axis;
} TfLiteGatherParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteTransposeParams;
typedef struct {
bool keep_dims;
} TfLiteReducerParams;
typedef struct {
int num_splits;
} TfLiteSplitParams;
typedef struct {
int num_splits;
} TfLiteSplitVParams;
typedef struct {
// TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
// For now we will fix the maximum possible number of dimensions.
int squeeze_dims[8];
int num_squeeze_dims;
} TfLiteSqueezeParams;
typedef struct {
int begin_mask;
int end_mask;
int ellipsis_mask;
int new_axis_mask;
int shrink_axis_mask;
} TfLiteStridedSliceParams;
typedef struct {
TfLiteType output_type;
} TfLiteArgMaxParams;
typedef struct {
TfLiteType output_type;
} TfLiteArgMinParams;
typedef struct {
TfLitePadding padding;
int stride_width;
int stride_height;
} TfLiteTransposeConvParams;
typedef struct {
bool validate_indices;
} TfLiteSparseToDenseParams;
typedef struct {
TfLiteType out_type;
} TfLiteShapeParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteRankParams;
typedef struct {
// Parameters supported by version 1:
float min;
float max;
int num_bits;
// Parameters supported by version 2:
bool narrow_range;
} TfLiteFakeQuantParams;
typedef struct {
int values_count;
int axis;
} TfLitePackParams;
typedef struct {
int axis;
} TfLiteOneHotParams;
typedef struct {
int num;
int axis;
} TfLiteUnpackParams;
typedef struct {
float alpha;
} TfLiteLeakyReluParams;
typedef struct {
TfLiteType index_out_type;
} TfLiteUniqueParams;
typedef struct {
int seq_dim;
int batch_dim;
} TfLiteReverseSequenceParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteMatrixDiagParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteMatrixSetDiagParams;
typedef struct {
int then_subgraph_index;
int else_subgraph_index;
} TfLiteIfParams;
typedef struct {
int cond_subgraph_index;
int body_subgraph_index;
} TfLiteWhileParams;
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_

View File

@@ -0,0 +1,232 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#ifndef TF_LITE_STATIC_MEMORY
#include <stdlib.h>
#include <string.h>
#endif // TF_LITE_STATIC_MEMORY
int TfLiteIntArrayGetSizeInBytes(int size) {
static TfLiteIntArray dummy;
return sizeof(dummy) + sizeof(dummy.data[0]) * size;
}
int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) {
if (a == b) return 1;
if (a == NULL || b == NULL) return 0;
return TfLiteIntArrayEqualsArray(a, b->size, b->data);
}
int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
const int b_data[]) {
if (a == NULL) return (b_size == 0);
if (a->size != b_size) return 0;
int i = 0;
for (; i < a->size; i++)
if (a->data[i] != b_data[i]) return 0;
return 1;
}
#ifndef TF_LITE_STATIC_MEMORY
TfLiteIntArray* TfLiteIntArrayCreate(int size) {
TfLiteIntArray* ret =
(TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
ret->size = size;
return ret;
}
TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) {
if (!src) return NULL;
TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
if (ret) {
memcpy(ret->data, src->data, src->size * sizeof(int));
}
return ret;
}
void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); }
#endif // TF_LITE_STATIC_MEMORY
int TfLiteFloatArrayGetSizeInBytes(int size) {
static TfLiteFloatArray dummy;
return sizeof(dummy) + sizeof(dummy.data[0]) * size;
}
#ifndef TF_LITE_STATIC_MEMORY
TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
TfLiteFloatArray* ret =
(TfLiteFloatArray*)malloc(TfLiteFloatArrayGetSizeInBytes(size));
ret->size = size;
return ret;
}
void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
void TfLiteTensorDataFree(TfLiteTensor* t) {
if (t->allocation_type == kTfLiteDynamic ||
t->allocation_type == kTfLitePersistentRo) {
free(t->data.raw);
}
t->data.raw = NULL;
}
void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
if (quantization->type == kTfLiteAffineQuantization) {
TfLiteAffineQuantization* q_params =
(TfLiteAffineQuantization*)(quantization->params);
if (q_params->scale) {
TfLiteFloatArrayFree(q_params->scale);
q_params->scale = NULL;
}
if (q_params->zero_point) {
TfLiteIntArrayFree(q_params->zero_point);
q_params->zero_point = NULL;
}
free(q_params);
}
quantization->params = NULL;
quantization->type = kTfLiteNoQuantization;
}
void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
if (sparsity == NULL) {
return;
}
if (sparsity->traversal_order) {
TfLiteIntArrayFree(sparsity->traversal_order);
sparsity->traversal_order = NULL;
}
if (sparsity->block_map) {
TfLiteIntArrayFree(sparsity->block_map);
sparsity->block_map = NULL;
}
if (sparsity->dim_metadata) {
int i = 0;
for (; i < sparsity->dim_metadata_size; i++) {
TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
if (metadata.format == kTfLiteDimSparseCSR) {
TfLiteIntArrayFree(metadata.array_segments);
metadata.array_segments = NULL;
TfLiteIntArrayFree(metadata.array_indices);
metadata.array_indices = NULL;
}
}
free(sparsity->dim_metadata);
sparsity->dim_metadata = NULL;
}
free(sparsity);
}
void TfLiteTensorFree(TfLiteTensor* t) {
TfLiteTensorDataFree(t);
if (t->dims) TfLiteIntArrayFree(t->dims);
t->dims = NULL;
if (t->dims_signature) {
TfLiteIntArrayFree((TfLiteIntArray *) t->dims_signature);
}
t->dims_signature = NULL;
TfLiteQuantizationFree(&t->quantization);
TfLiteSparsityFree(t->sparsity);
t->sparsity = NULL;
}
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
TfLiteQuantizationParams quantization, char* buffer,
size_t size, TfLiteAllocationType allocation_type,
const void* allocation, bool is_variable,
TfLiteTensor* tensor) {
TfLiteTensorFree(tensor);
tensor->type = type;
tensor->name = name;
tensor->dims = dims;
tensor->params = quantization;
tensor->data.raw = buffer;
tensor->bytes = size;
tensor->allocation_type = allocation_type;
tensor->allocation = allocation;
tensor->is_variable = is_variable;
tensor->quantization.type = kTfLiteNoQuantization;
tensor->quantization.params = NULL;
}
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
if (tensor->allocation_type != kTfLiteDynamic &&
tensor->allocation_type != kTfLitePersistentRo) {
return;
}
// TODO(b/145340303): Tensor data should be aligned.
if (!tensor->data.raw) {
tensor->data.raw = malloc(num_bytes);
} else if (num_bytes > tensor->bytes) {
tensor->data.raw = realloc(tensor->data.raw, num_bytes);
}
tensor->bytes = num_bytes;
}
#endif // TF_LITE_STATIC_MEMORY
const char* TfLiteTypeGetName(TfLiteType type) {
switch (type) {
case kTfLiteNoType:
return "NOTYPE";
case kTfLiteFloat32:
return "FLOAT32";
case kTfLiteInt16:
return "INT16";
case kTfLiteInt32:
return "INT32";
case kTfLiteUInt8:
return "UINT8";
case kTfLiteInt8:
return "INT8";
case kTfLiteInt64:
return "INT64";
case kTfLiteBool:
return "BOOL";
case kTfLiteComplex64:
return "COMPLEX64";
case kTfLiteComplex128:
return "COMPLEX128";
case kTfLiteString:
return "STRING";
case kTfLiteFloat16:
return "FLOAT16";
case kTfLiteFloat64:
return "FLOAT64";
}
return "Unknown type";
}
TfLiteDelegate TfLiteDelegateCreate() {
TfLiteDelegate d = {
.data_ = NULL,
.Prepare = NULL,
.CopyFromBufferHandle = NULL,
.CopyToBufferHandle = NULL,
.FreeBufferHandle = NULL,
.flags = kTfLiteDelegateFlagsNone,
};
return d;
}

View File

@@ -0,0 +1,936 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This file defines common C types and APIs for implementing operations,
// delegates and other constructs in TensorFlow Lite. The actual operations and
// delegates can be defined using C++, but the interface between the interpreter
// and the operations are C.
//
// Summary of abstractions
// TF_LITE_ENSURE - Self-sufficient error checking
// TfLiteStatus - Status reporting
// TfLiteIntArray - stores tensor shapes (dims),
// TfLiteContext - allows an op to access the tensors
// TfLiteTensor - tensor (a multidimensional array)
// TfLiteNode - a single node or operation
// TfLiteRegistration - the implementation of a conceptual operation.
// TfLiteDelegate - allows delegation of nodes to alternative backends.
//
// Some abstractions in this file are created and managed by Interpreter.
//
// NOTE: The order of values in these structs are "semi-ABI stable". New values
// should be added only to the end of structs and never reordered.
#ifndef TENSORFLOW_LITE_C_COMMON_H_
#define TENSORFLOW_LITE_C_COMMON_H_
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
typedef enum TfLiteStatus {
kTfLiteOk = 0,
kTfLiteError = 1,
kTfLiteDelegateError = 2
} TfLiteStatus;
// The list of external context types known to TF Lite. This list exists solely
// to avoid conflicts and to ensure ops can share the external contexts they
// need. Access to the external contexts is controlled by one of the
// corresponding support files.
typedef enum TfLiteExternalContextType {
kTfLiteEigenContext = 0, // include eigen_support.h to use.
kTfLiteGemmLowpContext = 1, // include gemm_support.h to use.
kTfLiteEdgeTpuContext = 2, // Placeholder for Edge TPU support.
kTfLiteCpuBackendContext = 3, // include cpu_backend_context.h to use.
kTfLiteMaxExternalContexts = 4
} TfLiteExternalContextType;
// Forward declare so dependent structs and methods can reference these types
// prior to the struct definitions.
struct TfLiteContext;
struct TfLiteDelegate;
struct TfLiteRegistration;
// An external context is a collection of information unrelated to the TF Lite
// framework, but useful to a subset of the ops. TF Lite knows very little
// about about the actual contexts, but it keeps a list of them, and is able to
// refresh them if configurations like the number of recommended threads
// change.
typedef struct TfLiteExternalContext {
TfLiteExternalContextType type;
TfLiteStatus (*Refresh)(struct TfLiteContext* context);
} TfLiteExternalContext;
#define kTfLiteOptionalTensor (-1)
// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
// indices
typedef struct TfLiteIntArray {
int size;
// gcc 6.1+ have a bug where flexible members aren't properly handled
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
__GNUC_MINOR__ >= 1) || \
defined(HEXAGON)
int data[0];
#else
int data[];
#endif
} TfLiteIntArray;
// Given the size (number of elements) in a TfLiteIntArray, calculate its size
// in bytes.
int TfLiteIntArrayGetSizeInBytes(int size);
#ifndef TF_LITE_STATIC_MEMORY
// Create a array of a given `size` (uninitialized entries).
// This returns a pointer, that you must free using TfLiteIntArrayFree().
TfLiteIntArray* TfLiteIntArrayCreate(int size);
#endif
// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise.
int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b);
// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise.
int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
const int b_data[]);
#ifndef TF_LITE_STATIC_MEMORY
// Create a copy of an array passed as `src`.
// You are expected to free memory with TfLiteIntArrayFree
TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src);
// Free memory of array `a`.
void TfLiteIntArrayFree(TfLiteIntArray* a);
#endif // TF_LITE_STATIC_MEMORY
// Fixed size list of floats. Used for per-channel quantization.
typedef struct TfLiteFloatArray {
int size;
// gcc 6.1+ have a bug where flexible members aren't properly handled
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
// This also applies to the toolchain used for Qualcomm Hexagon DSPs.
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
__GNUC_MINOR__ >= 1
float data[0];
#else
float data[];
#endif
} TfLiteFloatArray;
// Given the size (number of elements) in a TfLiteFloatArray, calculate its size
// in bytes.
int TfLiteFloatArrayGetSizeInBytes(int size);
#ifndef TF_LITE_STATIC_MEMORY
// Create a array of a given `size` (uninitialized entries).
// This returns a pointer, that you must free using TfLiteFloatArrayFree().
TfLiteFloatArray* TfLiteFloatArrayCreate(int size);
// Free memory of array `a`.
void TfLiteFloatArrayFree(TfLiteFloatArray* a);
#endif // TF_LITE_STATIC_MEMORY
// Since we must not depend on any libraries, define a minimal subset of
// error macros while avoiding names that have pre-conceived meanings like
// assert and check.
// Try to make all reporting calls through TF_LITE_KERNEL_LOG rather than
// calling the context->ReportError function directly, so that message strings
// can be stripped out if the binary size needs to be severely optimized.
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#define TF_LITE_KERNEL_LOG(context, ...) \
do { \
(context)->ReportError((context), __VA_ARGS__); \
} while (false)
#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) \
do { \
if ((context) != nullptr) { \
(context)->ReportError((context), __VA_ARGS__); \
} \
} while (false)
#else // TF_LITE_STRIP_ERROR_STRINGS
#define TF_LITE_KERNEL_LOG(context, ...)
#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
#endif // TF_LITE_STRIP_ERROR_STRINGS
// Check whether value is true, and if not return kTfLiteError from
// the current function (and report the error string msg).
#define TF_LITE_ENSURE_MSG(context, value, msg) \
do { \
if (!(value)) { \
TF_LITE_KERNEL_LOG((context), __FILE__ " " msg); \
return kTfLiteError; \
} \
} while (0)
// Check whether the value `a` is true, and if not return kTfLiteError from
// the current function, while also reporting the location of the error.
#define TF_LITE_ENSURE(context, a) \
do { \
if (!(a)) { \
TF_LITE_KERNEL_LOG((context), "%s:%d %s was not true.", __FILE__, \
__LINE__, #a); \
return kTfLiteError; \
} \
} while (0)
#define TF_LITE_ENSURE_STATUS(a) \
do { \
const TfLiteStatus s = (a); \
if (s != kTfLiteOk) { \
return s; \
} \
} while (0)
// Check whether the value `a == b` is true, and if not return kTfLiteError from
// the current function, while also reporting the location of the error.
// `a` and `b` may be evaluated more than once, so no side effects or
// extremely expensive computations should be done.
// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
#define TF_LITE_ENSURE_EQ(context, a, b) \
do { \
if ((a) != (b)) { \
TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%d != %d)", __FILE__, \
__LINE__, #a, #b, (a), (b)); \
return kTfLiteError; \
} \
} while (0)
#define TF_LITE_ENSURE_TYPES_EQ(context, a, b) \
do { \
if ((a) != (b)) { \
TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%s != %s)", __FILE__, \
__LINE__, #a, #b, TfLiteTypeGetName(a), \
TfLiteTypeGetName(b)); \
return kTfLiteError; \
} \
} while (0)
#define TF_LITE_ENSURE_OK(context, status) \
do { \
const TfLiteStatus s = (status); \
if ((s) != kTfLiteOk) { \
return s; \
} \
} while (0)
// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
// library.
#ifdef SWIG
#define TFL_CAPI_EXPORT
#else
#if defined(_WIN32)
#ifdef TFL_COMPILE_LIBRARY
#define TFL_CAPI_EXPORT __declspec(dllexport)
#else
#define TFL_CAPI_EXPORT __declspec(dllimport)
#endif // TFL_COMPILE_LIBRARY
#else
#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
#endif // _WIN32
#endif // SWIG
// Single-precision complex data type compatible with the C99 definition.
typedef struct TfLiteComplex64 {
float re, im; // real and imaginary parts, respectively.
} TfLiteComplex64;
// Double-precision complex data type compatible with the C99 definition.
typedef struct TfLiteComplex128 {
double re, im; // real and imaginary parts, respectively.
} TfLiteComplex128;
// Half precision data type compatible with the C99 definition.
typedef struct TfLiteFloat16 {
uint16_t data;
} TfLiteFloat16;
// Types supported by tensor
typedef enum {
kTfLiteNoType = 0,
kTfLiteFloat32 = 1,
kTfLiteInt32 = 2,
kTfLiteUInt8 = 3,
kTfLiteInt64 = 4,
kTfLiteString = 5,
kTfLiteBool = 6,
kTfLiteInt16 = 7,
kTfLiteComplex64 = 8,
kTfLiteInt8 = 9,
kTfLiteFloat16 = 10,
kTfLiteFloat64 = 11,
kTfLiteComplex128 = 12,
} TfLiteType;
// Return the name of a given type, for error reporting purposes.
const char* TfLiteTypeGetName(TfLiteType type);
// SupportedQuantizationTypes.
typedef enum TfLiteQuantizationType {
// No quantization.
kTfLiteNoQuantization = 0,
// Affine quantization (with support for per-channel quantization).
// Corresponds to TfLiteAffineQuantization.
kTfLiteAffineQuantization = 1,
} TfLiteQuantizationType;
// Structure specifying the quantization used by the tensor, if-any.
typedef struct TfLiteQuantization {
// The type of quantization held by params.
TfLiteQuantizationType type;
// Holds a reference to one of the quantization param structures specified
// below.
void* params;
} TfLiteQuantization;
// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
// If per-layer quantization is specified this field will still be populated in
// addition to TfLiteAffineQuantization.
// Parameters for asymmetric quantization. Quantized values can be converted
// back to float using:
// real_value = scale * (quantized_value - zero_point)
typedef struct TfLiteQuantizationParams {
float scale;
int32_t zero_point;
} TfLiteQuantizationParams;
// Parameters for asymmetric quantization across a dimension (i.e per output
// channel quantization).
// quantized_dimension specifies which dimension the scales and zero_points
// correspond to.
// For a particular value in quantized_dimension, quantized values can be
// converted back to float using:
// real_value = scale * (quantized_value - zero_point)
typedef struct TfLiteAffineQuantization {
TfLiteFloatArray* scale;
TfLiteIntArray* zero_point;
int32_t quantized_dimension;
} TfLiteAffineQuantization;
/* A union of pointers that points to memory for a given tensor. */
typedef union TfLitePtrUnion {
/* Do not access these members directly, if possible, use
* GetTensorData<TYPE>(tensor) instead, otherwise only access .data, as other
* members are deprecated. */
int32_t* i32;
int64_t* i64;
float* f;
TfLiteFloat16* f16;
double* f64;
char* raw;
const char* raw_const;
uint8_t* uint8;
bool* b;
int16_t* i16;
TfLiteComplex64* c64;
TfLiteComplex128* c128;
int8_t* int8;
/* Only use this member. */
void* data;
} TfLitePtrUnion;
// Memory allocation strategies.
// * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
// * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
// and available during eval.
// * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
// only available during eval.
// * kTfLiteDynamic: Allocated during eval, or for string tensors.
// * kTfLitePersistentRo: Allocated and populated during prepare. This is
// useful for tensors that can be computed during prepare and treated
// as constant inputs for downstream ops (also in prepare).
typedef enum TfLiteAllocationType {
kTfLiteMemNone = 0,
kTfLiteMmapRo,
kTfLiteArenaRw,
kTfLiteArenaRwPersistent,
kTfLiteDynamic,
kTfLitePersistentRo,
} TfLiteAllocationType;
// The delegates should use zero or positive integers to represent handles.
// -1 is reserved from unallocated status.
typedef int TfLiteBufferHandle;
enum {
kTfLiteNullBufferHandle = -1,
};
// Storage format of each dimension in a sparse tensor.
typedef enum TfLiteDimensionType {
kTfLiteDimDense = 0,
kTfLiteDimSparseCSR,
} TfLiteDimensionType;
// Metadata to encode each dimension in a sparse tensor.
typedef struct TfLiteDimensionMetadata {
TfLiteDimensionType format;
int dense_size;
TfLiteIntArray* array_segments;
TfLiteIntArray* array_indices;
} TfLiteDimensionMetadata;
// Parameters used to encode a sparse tensor. For detailed explanation of each
// field please refer to lite/schema/schema.fbs.
typedef struct TfLiteSparsity {
TfLiteIntArray* traversal_order;
TfLiteIntArray* block_map;
TfLiteDimensionMetadata* dim_metadata;
int dim_metadata_size;
} TfLiteSparsity;
// An tensor in the interpreter system which is a wrapper around a buffer of
// data including a dimensionality (or NULL if not currently defined).
#ifndef TF_LITE_STATIC_MEMORY
typedef struct TfLiteTensor {
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
TfLiteType type;
// A union of data pointers. The appropriate type should be used for a typed
// tensor based on `type`.
TfLitePtrUnion data;
// A pointer to a structure representing the dimensionality interpretation
// that the buffer should have. NOTE: the product of elements of `dims`
// and the element datatype size should be equal to `bytes` below.
TfLiteIntArray* dims;
// Quantization information.
TfLiteQuantizationParams params;
// How memory is mapped
// kTfLiteMmapRo: Memory mapped read only.
// i.e. weights
// kTfLiteArenaRw: Arena allocated read write memory
// (i.e. temporaries, outputs).
TfLiteAllocationType allocation_type;
// The number of bytes required to store the data of this Tensor. I.e.
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
// type is kTfLiteFloat32 and dims = {3, 2} then
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
size_t bytes;
// An opaque pointer to a tflite::MMapAllocation
const void* allocation;
// Null-terminated name of this tensor.
const char* name;
// The delegate which knows how to handle `buffer_handle`.
// WARNING: This is an experimental interface that is subject to change.
struct TfLiteDelegate* delegate;
// An integer buffer handle that can be handled by `delegate`.
// The value is valid only when delegate is not null.
// WARNING: This is an experimental interface that is subject to change.
TfLiteBufferHandle buffer_handle;
// If the delegate uses its own buffer (e.g. GPU memory), the delegate is
// responsible to set data_is_stale to true.
// `delegate->CopyFromBufferHandle` can be called to copy the data from
// delegate buffer.
// WARNING: This is an // experimental interface that is subject to change.
bool data_is_stale;
// True if the tensor is a variable.
bool is_variable;
// Quantization information. Replaces params field above.
TfLiteQuantization quantization;
// Parameters used to encode a sparse tensor.
// This is optional. The field is NULL if a tensor is dense.
// WARNING: This is an experimental interface that is subject to change.
TfLiteSparsity* sparsity;
// Optional. Encodes shapes with unknown dimensions with -1. This field is
// only populated when unknown dimensions exist in a read-write tensor (i.e.
// an input or output tensor). (e.g. `dims` contains [1, 1, 1, 3] and
// `dims_signature` contains [1, -1, -1, 3]).
const TfLiteIntArray* dims_signature;
} TfLiteTensor;
// A structure representing an instance of a node.
// This structure only exhibits the inputs, outputs and user defined data, not
// other features like the type.
typedef struct TfLiteNode {
// Inputs to this node expressed as indices into the simulator's tensors.
TfLiteIntArray* inputs;
// Outputs to this node expressed as indices into the simulator's tensors.
TfLiteIntArray* outputs;
// intermediate tensors to this node expressed as indices into the simulator's
// tensors.
TfLiteIntArray* intermediates;
// Temporary tensors uses during the computations. This usually contains no
// tensors, but ops are allowed to change that if they need scratch space of
// any sort.
TfLiteIntArray* temporaries;
// Opaque data provided by the node implementer through `Registration.init`.
void* user_data;
// Opaque data provided to the node if the node is a builtin. This is usually
// a structure defined in builtin_op_data.h
void* builtin_data;
// Custom initial data. This is the opaque data provided in the flatbuffer.
// WARNING: This is an experimental interface that is subject to change.
const void* custom_initial_data;
int custom_initial_data_size;
// The pointer to the delegate. This is non-null only when the node is
// created by calling `interpreter.ModifyGraphWithDelegate`.
// WARNING: This is an experimental interface that is subject to change.
struct TfLiteDelegate* delegate;
} TfLiteNode;
#else // defined(TF_LITE_STATIC_MEMORY)?
// NOTE: This flag is opt-in only at compile time.
//
// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
// contains only the minimum fields required to initialize and prepare a micro
// inference graph. The fields in this struct have been ordered from
// largest-to-smallest for optimal struct sizeof.
//
// This struct does not use:
// - allocation
// - buffer_handle
// - data_is_stale
// - delegate
// - dims_signature
// - name
// - sparsity
typedef struct TfLiteTensor {
// TODO(b/155784997): Consider consolidating these quantization fields:
// Quantization information. Replaces params field above.
TfLiteQuantization quantization;
// Quantization information.
TfLiteQuantizationParams params;
// A union of data pointers. The appropriate type should be used for a typed
// tensor based on `type`.
TfLitePtrUnion data;
// A pointer to a structure representing the dimensionality interpretation
// that the buffer should have. NOTE: the product of elements of `dims`
// and the element datatype size should be equal to `bytes` below.
TfLiteIntArray* dims;
// The number of bytes required to store the data of this Tensor. I.e.
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
// type is kTfLiteFloat32 and dims = {3, 2} then
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
size_t bytes;
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
TfLiteType type;
// How memory is mapped
// kTfLiteMmapRo: Memory mapped read only.
// i.e. weights
// kTfLiteArenaRw: Arena allocated read write memory
// (i.e. temporaries, outputs).
TfLiteAllocationType allocation_type;
// True if the tensor is a variable.
bool is_variable;
} TfLiteTensor;
// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains
// only the minimum fields required to represent a node.
//
// This struct does not use:
// - delegate
// - intermediates
// - temporaries
typedef struct TfLiteNode {
// Inputs to this node expressed as indices into the simulator's tensors.
TfLiteIntArray* inputs;
// Outputs to this node expressed as indices into the simulator's tensors.
TfLiteIntArray* outputs;
// Opaque data provided by the node implementer through `Registration.init`.
void* user_data;
// Opaque data provided to the node if the node is a builtin. This is usually
// a structure defined in builtin_op_data.h
void* builtin_data;
// Custom initial data. This is the opaque data provided in the flatbuffer.
// WARNING: This is an experimental interface that is subject to change.
const void* custom_initial_data;
int custom_initial_data_size;
} TfLiteNode;
#endif // TF_LITE_STATIC_MEMORY
// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount
// of information required for a kernel to run during TfLiteRegistration::Eval.
// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM
// builds with this flag by default internally.
typedef struct TfLiteEvalTensor {
// A union of data pointers. The appropriate type should be used for a typed
// tensor based on `type`.
TfLitePtrUnion data;
// A pointer to a structure representing the dimensionality interpretation
// that the buffer should have.
TfLiteIntArray* dims;
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
TfLiteType type;
} TfLiteEvalTensor;
#ifndef TF_LITE_STATIC_MEMORY
// Free data memory of tensor `t`.
void TfLiteTensorDataFree(TfLiteTensor* t);
// Free quantization data.
void TfLiteQuantizationFree(TfLiteQuantization* quantization);
// Free sparsity parameters.
void TfLiteSparsityFree(TfLiteSparsity* sparsity);
// Free memory of tensor `t`.
void TfLiteTensorFree(TfLiteTensor* t);
// Set all of a tensor's fields (and free any previously allocated data).
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
TfLiteQuantizationParams quantization, char* buffer,
size_t size, TfLiteAllocationType allocation_type,
const void* allocation, bool is_variable,
TfLiteTensor* tensor);
// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
// types other than kTfLiteDynamic will be ignored.
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
#endif // TF_LITE_STATIC_MEMORY
// WARNING: This is an experimental interface that is subject to change.
//
// Currently, TfLiteDelegateParams has to be allocated in a way that it's
// trivially destructable. It will be stored as `builtin_data` field in
// `TfLiteNode` of the delegate node.
//
// See also the `CreateDelegateParams` function in `interpreter.cc` details.
typedef struct TfLiteDelegateParams {
struct TfLiteDelegate* delegate;
TfLiteIntArray* nodes_to_replace;
TfLiteIntArray* input_tensors;
TfLiteIntArray* output_tensors;
} TfLiteDelegateParams;
typedef struct TfLiteContext {
// Number of tensors in the context.
size_t tensors_size;
// The execution plan contains a list of the node indices in execution
// order. execution_plan->size is the current number of nodes. And,
// execution_plan->data[0] is the first node that needs to be run.
// TfLiteDelegates can traverse the current execution plan by iterating
// through each member of this array and using GetNodeAndRegistration() to
// access details about a node. i.e.
// TfLiteIntArray* execution_plan;
// TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
// for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
// int node_index = execution_plan->data[exec_index];
// TfLiteNode* node;
// TfLiteRegistration* reg;
// context->GetNodeAndRegistration(context, node_index, &node, &reg);
// }
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
TfLiteIntArray** execution_plan);
// An array of tensors in the interpreter context (of length `tensors_size`)
TfLiteTensor* tensors;
// opaque full context ptr (an opaque c++ data structure)
void* impl_;
// Request memory pointer be resized. Updates dimensions on the tensor.
// NOTE: ResizeTensor takes ownership of newSize.
TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor* tensor,
TfLiteIntArray* new_size);
// Request that an error be reported with format string msg.
void (*ReportError)(struct TfLiteContext*, const char* msg, ...);
// Add `tensors_to_add` tensors, preserving pre-existing Tensor entries. If
// non-null, the value pointed to by `first_new_tensor_index` will be set to
// the index of the first new tensor.
TfLiteStatus (*AddTensors)(struct TfLiteContext*, int tensors_to_add,
int* first_new_tensor_index);
// Get a Tensor node by node_index.
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*GetNodeAndRegistration)(
struct TfLiteContext*, int node_index, TfLiteNode** node,
struct TfLiteRegistration** registration);
// Replace ops with one or more stub delegate operations. This function
// does not take ownership of `nodes_to_replace`.
TfLiteStatus (*ReplaceNodeSubsetsWithDelegateKernels)(
struct TfLiteContext*, struct TfLiteRegistration registration,
const TfLiteIntArray* nodes_to_replace, struct TfLiteDelegate* delegate);
// Number of threads that are recommended to subsystems like gemmlowp and
// eigen.
int recommended_num_threads;
// Access external contexts by type.
// WARNING: This is an experimental interface that is subject to change.
TfLiteExternalContext* (*GetExternalContext)(struct TfLiteContext*,
TfLiteExternalContextType);
// Set the value of a external context. Does not take ownership of the
// pointer.
// WARNING: This is an experimental interface that is subject to change.
void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType,
TfLiteExternalContext*);
// Flag for allowing float16 precision for FP32 calculation.
// default: false.
// WARNING: This is an experimental API and subject to change.
bool allow_fp32_relax_to_fp16;
// Pointer to the op-level profiler, if set; nullptr otherwise.
void* profiler;
// Allocate persistent buffer which has the same life time as the interpreter.
// Returns nullptr on failure.
// The memory is allocated from heap for TFL, and from tail in TFLM.
// This method is only available in Init or Prepare stage.
// WARNING: This is an experimental interface that is subject to change.
void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes);
// Allocate a buffer which will be deallocated right after invoke phase.
// The memory is allocated from heap in TFL, and from volatile arena in TFLM.
// This method is only available in invoke stage.
// NOTE: If possible use RequestScratchBufferInArena method to avoid memory
// allocation during inference time.
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*AllocateBufferForEval)(struct TfLiteContext* ctx, size_t bytes,
void** ptr);
// Request a scratch buffer in the arena through static memory planning.
// This method is only available in Prepare stage and the buffer is allocated
// by the interpreter between Prepare and Eval stage. In Eval stage,
// GetScratchBuffer API can be used to fetch the address.
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*RequestScratchBufferInArena)(struct TfLiteContext* ctx,
size_t bytes, int* buffer_idx);
// Get the scratch buffer pointer.
// This method is only available in Eval stage.
// WARNING: This is an experimental interface that is subject to change.
void* (*GetScratchBuffer)(struct TfLiteContext* ctx, int buffer_idx);
// Resize the memory pointer of the `tensor`. This method behaves the same as
// `ResizeTensor`, except that it makes a copy of the shape array internally
// so the shape array could be deallocated right afterwards.
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*ResizeTensorExplicit)(struct TfLiteContext* ctx,
TfLiteTensor* tensor, int dims,
const int* shape);
// This method provides a preview of post-delegation partitioning. Each
// TfLiteDelegateParams in the referenced array corresponds to one instance of
// the delegate kernel.
// Example usage:
//
// TfLiteIntArray* nodes_to_replace = ...;
// TfLiteDelegateParams* params_array;
// int num_partitions = 0;
// TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
// context, delegate, nodes_to_replace, &params_array, &num_partitions));
// for (int idx = 0; idx < num_partitions; idx++) {
// const auto& partition_params = params_array[idx];
// ...
// }
//
// NOTE: The context owns the memory referenced by partition_params_array. It
// will be cleared with another call to PreviewDelegateParitioning, or after
// TfLiteDelegateParams::Prepare returns.
//
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*PreviewDelegatePartitioning)(
struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
TfLiteDelegateParams** partition_params_array, int* num_partitions);
// Returns a TfLiteTensor struct for a given index.
// WARNING: This is an experimental interface that is subject to change.
// WARNING: This method may not be available on all platforms.
TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context,
int tensor_idx);
// Returns a TfLiteEvalTensor struct for a given index.
// WARNING: This is an experimental interface that is subject to change.
// WARNING: This method may not be available on all platforms.
TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
int tensor_idx);
} TfLiteContext;
typedef struct TfLiteRegistration {
// Initializes the op from serialized data.
// If a built-in op:
// `buffer` is the op's params data (TfLiteLSTMParams*).
// `length` is zero.
// If custom op:
// `buffer` is the op's `custom_options`.
// `length` is the size of the buffer.
//
// Returns a type-punned (i.e. void*) opaque data (e.g. a primitive pointer
// or an instance of a struct).
//
// The returned pointer will be stored with the node in the `user_data` field,
// accessible within prepare and invoke functions below.
// NOTE: if the data is already in the desired format, simply implement this
// function to return `nullptr` and implement the free function to be a no-op.
void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
// The pointer `buffer` is the data previously returned by an init invocation.
void (*free)(TfLiteContext* context, void* buffer);
// prepare is called when the inputs this node depends on have been resized.
// context->ResizeTensor() can be called to request output tensors to be
// resized.
//
// Returns kTfLiteOk on success.
TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
// Execute the node (should read node->inputs and output to node->outputs).
// Returns kTfLiteOk on success.
TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
// profiling_string is called during summarization of profiling information
// in order to group executions together. Providing a value here will cause a
// given op to appear multiple times is the profiling report. This is
// particularly useful for custom ops that can perform significantly
// different calculations depending on their `user-data`.
const char* (*profiling_string)(const TfLiteContext* context,
const TfLiteNode* node);
// Builtin codes. If this kernel refers to a builtin this is the code
// of the builtin. This is so we can do marshaling to other frameworks like
// NN API.
// Note: It is the responsibility of the registration binder to set this
// properly.
int32_t builtin_code;
// Custom op name. If the op is a builtin, this will be null.
// Note: It is the responsibility of the registration binder to set this
// properly.
// WARNING: This is an experimental interface that is subject to change.
const char* custom_name;
// The version of the op.
// Note: It is the responsibility of the registration binder to set this
// properly.
int version;
} TfLiteRegistration;
// The flags used in `TfLiteDelegate`. Note that this is a bitmask, so the
// values should be 1, 2, 4, 8, ...etc.
typedef enum TfLiteDelegateFlags {
kTfLiteDelegateFlagsNone = 0,
// The flag is set if the delegate can handle dynamic sized tensors.
// For example, the output shape of a `Resize` op with non-constant shape
// can only be inferred when the op is invoked.
// In this case, the Delegate is responsible for calling
// `SetTensorToDynamic` to mark the tensor as a dynamic tensor, and calling
// `ResizeTensor` when invoking the op.
//
// If the delegate isn't capable to handle dynamic tensors, this flag need
// to be set to false.
kTfLiteDelegateFlagsAllowDynamicTensors = 1,
// This flag can be used by delegates (that allow dynamic tensors) to ensure
// applicable tensor shapes are automatically propagated in the case of tensor
// resizing.
// This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors
// of a delegate kernel will have correct shapes before its Prepare() method
// is called. The runtime leverages TFLite builtin ops in the original
// execution plan to propagate shapes.
//
// A few points to note:
// 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is
// false, this one is redundant since the delegate kernels are re-initialized
// every time tensors are resized.
// 2. Enabling this flag adds some overhead to AllocateTensors(), since extra
// work is required to prepare the original execution plan.
// 3. This flag requires that the original execution plan only have ops with
// valid registrations (and not 'dummy' custom ops like with Flex).
// WARNING: This feature is experimental and subject to change.
kTfLiteDelegateFlagsRequirePropagatedShapes = 2
} TfLiteDelegateFlags;
// WARNING: This is an experimental interface that is subject to change.
typedef struct TfLiteDelegate {
// Data that delegate needs to identify itself. This data is owned by the
// delegate. The delegate is owned in the user code, so the delegate is
// responsible for doing this when it is destroyed.
void* data_;
// Invoked by ModifyGraphWithDelegate. This prepare is called, giving the
// delegate a view of the current graph through TfLiteContext*. It typically
// will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels()
// to ask the TensorFlow lite runtime to create macro-nodes to represent
// delegated subgraphs of the original graph.
TfLiteStatus (*Prepare)(TfLiteContext* context,
struct TfLiteDelegate* delegate);
// Copy the data from delegate buffer handle into raw memory of the given
// 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
// long as it follows the rules for kTfLiteDynamic tensors, in which case this
// cannot be null.
TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
struct TfLiteDelegate* delegate,
TfLiteBufferHandle buffer_handle,
TfLiteTensor* tensor);
// Copy the data from raw memory of the given 'tensor' to delegate buffer
// handle. This can be null if the delegate doesn't use its own buffer.
TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context,
struct TfLiteDelegate* delegate,
TfLiteBufferHandle buffer_handle,
TfLiteTensor* tensor);
// Free the Delegate Buffer Handle. Note: This only frees the handle, but
// this doesn't release the underlying resource (e.g. textures). The
// resources are either owned by application layer or the delegate.
// This can be null if the delegate doesn't use its own buffer.
void (*FreeBufferHandle)(TfLiteContext* context,
struct TfLiteDelegate* delegate,
TfLiteBufferHandle* handle);
// Bitmask flags. See the comments in `TfLiteDelegateFlags`.
int64_t flags;
} TfLiteDelegate;
// Build a 'null' delegate, with all the fields properly set to their default
// values.
TfLiteDelegate TfLiteDelegateCreate();
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // TENSORFLOW_LITE_C_COMMON_H_

View File

@@ -0,0 +1,38 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/core/api/error_reporter.h"
#include <cstdarg>
namespace tflite {
int ErrorReporter::Report(const char* format, ...) {
va_list args;
va_start(args, format);
int code = Report(format, args);
va_end(args);
return code;
}
// TODO(aselle): Make the name of ReportError on context the same, so
// we can use the ensure functions w/o a context and w/ a reporter.
int ErrorReporter::ReportError(void*, const char* format, ...) {
va_list args;
va_start(args, format);
int code = Report(format, args);
va_end(args);
return code;
}
} // namespace tflite

View File

@@ -0,0 +1,59 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
#include <cstdarg>
namespace tflite {
/// A functor that reports error to supporting system. Invoked similar to
/// printf.
///
/// Usage:
/// ErrorReporter foo;
/// foo.Report("test %d", 5);
/// or
/// va_list args;
/// foo.Report("test %d", args); // where args is va_list
///
/// Subclass ErrorReporter to provide another reporting destination.
/// For example, if you have a GUI program, you might redirect to a buffer
/// that drives a GUI error log box.
class ErrorReporter {
public:
virtual ~ErrorReporter() {}
virtual int Report(const char* format, va_list args) = 0;
int Report(const char* format, ...);
int ReportError(void*, const char* format, ...);
};
} // namespace tflite
// You should not make bare calls to the error reporter, instead use the
// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
// stripped when the binary size has to be optimized. If you are looking to
// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
// every call will be stubbed out, taking no memory.
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#define TF_LITE_REPORT_ERROR(reporter, ...) \
do { \
static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
} while (false)
#else // TF_LITE_STRIP_ERROR_STRINGS
#define TF_LITE_REPORT_ERROR(reporter, ...)
#endif // TF_LITE_STRIP_ERROR_STRINGS
#endif // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,253 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
#define TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
// These functions transform codes and data structures that are defined in the
// flatbuffer serialization format into in-memory values that are used by the
// runtime API and interpreter.
#include <cstddef>
#include <new>
#include <type_traits>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Interface class for builtin data allocations.
class BuiltinDataAllocator {
public:
virtual void* Allocate(size_t size, size_t alignment_hint) = 0;
virtual void Deallocate(void* data) = 0;
// Allocate a structure, but make sure it is a POD structure that doesn't
// require constructors to run. The reason we do this, is that Interpreter's C
// extension part will take ownership so destructors will not be run during
// deallocation.
template <typename T>
T* AllocatePOD() {
// TODO(b/154346074): Change this to is_trivially_destructible when all
// platform targets support that properly.
static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
void* allocated_memory = this->Allocate(sizeof(T), alignof(T));
return new (allocated_memory) T;
}
virtual ~BuiltinDataAllocator() {}
};
// Parse the appropriate data out of the op.
//
// This handles builtin data explicitly as there are flatbuffer schemas.
// If it returns kTfLiteOk, it passes the data out with `builtin_data`. The
// calling function has to pass in an allocator object, and this allocator
// will be called to reserve space for the output data. If the calling
// function's allocator reserves memory on the heap, then it's the calling
// function's responsibility to free it.
// If it returns kTfLiteError, `builtin_data` will be `nullptr`.
TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
// Converts the tensor data type used in the flat buffer to the representation
// used by the runtime.
TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
ErrorReporter* error_reporter);
TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseConcatenation(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseFullyConnected(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseGreaterEqual(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseL2Normalization(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseStridedSlice(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
} // namespace tflite
#endif // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_

View File

@@ -0,0 +1,66 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/core/api/op_resolver.h"
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
namespace tflite {
TfLiteStatus GetRegistrationFromOpCode(
const OperatorCode* opcode, const OpResolver& op_resolver,
ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
TfLiteStatus status = kTfLiteOk;
*registration = nullptr;
auto builtin_code = opcode->builtin_code();
int version = opcode->version();
if (builtin_code > BuiltinOperator_MAX ||
builtin_code < BuiltinOperator_MIN) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Op builtin_code out of range: %d. Are you using old TFLite binary "
"with newer model?",
builtin_code);
status = kTfLiteError;
} else if (builtin_code != BuiltinOperator_CUSTOM) {
*registration = op_resolver.FindOp(builtin_code, version);
if (*registration == nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Didn't find op for builtin opcode '%s' version '%d'\n",
EnumNameBuiltinOperator(builtin_code), version);
status = kTfLiteError;
}
} else if (!opcode->custom_code()) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Operator with CUSTOM builtin_code has no custom_code.\n");
status = kTfLiteError;
} else {
const char* name = opcode->custom_code()->c_str();
*registration = op_resolver.FindOp(name, version);
if (*registration == nullptr) {
// Do not report error for unresolved custom op, we do the final check
// while preparing ops.
status = kTfLiteError;
}
}
return status;
}
} // namespace tflite

View File

@@ -0,0 +1,48 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
/// Abstract interface that returns TfLiteRegistrations given op codes or custom
/// op names. This is the mechanism that ops being referenced in the flatbuffer
/// model are mapped to executable function pointers (TfLiteRegistrations).
class OpResolver {
public:
/// Finds the op registration for a builtin operator by enum code.
virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
int version) const = 0;
/// Finds the op registration of a custom operator by op name.
virtual const TfLiteRegistration* FindOp(const char* op,
int version) const = 0;
virtual ~OpResolver() {}
};
// Handles the logic for converting between an OperatorCode structure extracted
// from a flatbuffer and information about a registered operator
// implementation.
TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
const OpResolver& op_resolver,
ErrorReporter* error_reporter,
const TfLiteRegistration** registration);
} // namespace tflite
#endif // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_

View File

@@ -0,0 +1,194 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_CORE_API_PROFILER_H_
#define TENSORFLOW_LITE_CORE_API_PROFILER_H_
#include <cstdint>
namespace tflite {
// A simple utility for enabling profiled event tracing in TensorFlow Lite.
class Profiler {
public:
// As certain Profiler instance might be only interested in certain event
// types, we define each event type value to allow a Profiler to use
// bitmasking bitwise operations to determine whether an event should be
// recorded or not.
enum class EventType {
// Default event type, the metadata field has no special significance.
DEFAULT = 1,
// The event is an operator invocation and the event_metadata field is the
// index of operator node.
OPERATOR_INVOKE_EVENT = 2,
// The event is an invocation for an internal operator of a TFLite delegate.
// The event_metadata field is the index of operator node that's specific to
// the delegate.
DELEGATE_OPERATOR_INVOKE_EVENT = 4,
// The event is a recording of runtime instrumentation such as the overall
// TFLite runtime status, the TFLite delegate status (if a delegate
// is applied), and the overall model inference latency etc.
// Note, the delegate status and overall status are stored as separate
// event_metadata fields. In particular, the delegate status is encoded
// as DelegateStatus::full_status().
GENERAL_RUNTIME_INSTRUMENTATION_EVENT = 8,
};
virtual ~Profiler() {}
// Signals the beginning of an event and returns a handle to the profile
// event. The `event_metadata1` and `event_metadata2` have different
// interpretations based on the actual Profiler instance and the `event_type`.
// For example, as for the 'SubgraphAwareProfiler' defined in
// lite/core/subgraph.h, when the event_type is OPERATOR_INVOKE_EVENT,
// `event_metadata1` represents the index of a TFLite node, and
// `event_metadata2` represents the index of the subgraph that this event
// comes from.
virtual uint32_t BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata1,
int64_t event_metadata2) = 0;
// Similar w/ the above, but `event_metadata2` defaults to 0.
uint32_t BeginEvent(const char* tag, EventType event_type,
int64_t event_metadata) {
return BeginEvent(tag, event_type, event_metadata, /*event_metadata2*/ 0);
}
// Signals an end to the specified profile event with 'event_metadata's, This
// is useful when 'event_metadata's are not available when the event begins
// or when one wants to overwrite the 'event_metadata's set at the beginning.
virtual void EndEvent(uint32_t event_handle, int64_t event_metadata1,
int64_t event_metadata2) {}
// Signals an end to the specified profile event.
virtual void EndEvent(uint32_t event_handle) = 0;
// Appends an event of type 'event_type' with 'tag' and 'event_metadata'
// which started at 'start' and ended at 'end'
// Note:
// In cases were ProfileSimmarizer and tensorflow::StatsCalculator are used
// they assume the value is in "usec", if in any case subclasses
// didn't put usec, then the values are not meaningful.
// TODO karimnosseir: Revisit and make the function more clear.
void AddEvent(const char* tag, EventType event_type, uint64_t start,
uint64_t end, int64_t event_metadata) {
AddEvent(tag, event_type, start, end, event_metadata,
/*event_metadata2*/ 0);
}
virtual void AddEvent(const char* tag, EventType event_type, uint64_t start,
uint64_t end, int64_t event_metadata1,
int64_t event_metadata2) {}
protected:
friend class ScopedProfile;
};
// Adds a profile event to `profiler` that begins with the construction
// of the object and ends when the object goes out of scope.
// The lifetime of tag should be at least the lifetime of `profiler`.
// `profiler` may be null, in which case nothing is profiled.
class ScopedProfile {
public:
ScopedProfile(Profiler* profiler, const char* tag,
Profiler::EventType event_type = Profiler::EventType::DEFAULT,
int64_t event_metadata = 0)
: profiler_(profiler), event_handle_(0) {
if (profiler) {
event_handle_ = profiler_->BeginEvent(tag, event_type, event_metadata);
}
}
~ScopedProfile() {
if (profiler_) {
profiler_->EndEvent(event_handle_);
}
}
protected:
Profiler* profiler_;
uint32_t event_handle_;
};
class ScopedOperatorProfile : public ScopedProfile {
public:
ScopedOperatorProfile(Profiler* profiler, const char* tag, int node_index)
: ScopedProfile(profiler, tag, Profiler::EventType::OPERATOR_INVOKE_EVENT,
static_cast<uint32_t>(node_index)) {}
};
class ScopedDelegateOperatorProfile : public ScopedProfile {
public:
ScopedDelegateOperatorProfile(Profiler* profiler, const char* tag,
int node_index)
: ScopedProfile(profiler, tag,
Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT,
static_cast<uint32_t>(node_index)) {}
};
class ScopedRuntimeInstrumentationProfile : public ScopedProfile {
public:
ScopedRuntimeInstrumentationProfile(Profiler* profiler, const char* tag)
: ScopedProfile(
profiler, tag,
Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, -1) {}
void set_runtime_status(int64_t delegate_status, int64_t interpreter_status) {
if (profiler_) {
delegate_status_ = delegate_status;
interpreter_status_ = interpreter_status;
}
}
~ScopedRuntimeInstrumentationProfile() {
if (profiler_) {
profiler_->EndEvent(event_handle_, delegate_status_, interpreter_status_);
}
}
private:
int64_t delegate_status_;
int64_t interpreter_status_;
};
} // namespace tflite
#define TFLITE_VARNAME_UNIQ_IMPL(name, ctr) name##ctr
#define TFLITE_VARNAME_UNIQ(name, ctr) TFLITE_VARNAME_UNIQ_IMPL(name, ctr)
#define TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler, tag) \
tflite::ScopedProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
(profiler), (tag))
#define TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler, tag, node_index) \
tflite::ScopedOperatorProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
(profiler), (tag), (node_index))
#define TFLITE_SCOPED_DELEGATE_OPERATOR_PROFILE(profiler, tag, node_index) \
tflite::ScopedDelegateOperatorProfile TFLITE_VARNAME_UNIQ( \
_profile_, __COUNTER__)((profiler), (tag), (node_index))
#define TFLITE_ADD_RUNTIME_INSTRUMENTATION_EVENT( \
profiler, tag, delegate_status, interpreter_status) \
do { \
if (!profiler) { \
const auto handle = profiler->BeginEvent( \
tag, Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, \
delegate_status, interpreter_status); \
profiler->EndEvent(handle); \
} \
} while (false);
#endif // TENSORFLOW_LITE_CORE_API_PROFILER_H_

View File

@@ -0,0 +1,50 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/core/api/tensor_utils.h"
#include <string.h>
#include "tensorflow/lite/c/common.h"
namespace tflite {
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
if (!tensor->is_variable) {
return kTfLiteOk;
}
// TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
// to the value of the buffer.
int value = 0;
if (tensor->type == kTfLiteInt8) {
value = tensor->params.zero_point;
}
// TODO(b/139446230): Provide a platform header to better handle these
// specific scenarios.
#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
defined(__i386) || defined(__x86__) || defined(__X86__) || \
defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
memset(tensor->data.raw, value, tensor->bytes);
#else
char* raw_ptr = tensor->data.raw;
for (size_t i = 0; i < tensor->bytes; ++i) {
*raw_ptr = value;
raw_ptr++;
}
#endif
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,28 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Resets a variable tensor to the default value.
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
} // namespace tflite
#endif // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_

View File

@@ -0,0 +1,956 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
#endif
#endif
#include <functional>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
constexpr int kReverseShift = -1;
inline void GetActivationMinMax(FusedActivationFunctionType ac,
float* output_activation_min,
float* output_activation_max) {
switch (ac) {
case FusedActivationFunctionType::kNone:
*output_activation_min = std::numeric_limits<float>::lowest();
*output_activation_max = std::numeric_limits<float>::max();
break;
case FusedActivationFunctionType::kRelu:
*output_activation_min = 0.f;
*output_activation_max = std::numeric_limits<float>::max();
break;
case FusedActivationFunctionType::kRelu1:
*output_activation_min = -1.f;
*output_activation_max = 1.f;
break;
case FusedActivationFunctionType::kRelu6:
*output_activation_min = 0.f;
*output_activation_max = 6.f;
break;
}
}
template <typename T>
inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
T output_activation_max) {
using std::max;
using std::min;
return min(max(x, output_activation_min), output_activation_max);
}
// Legacy function, left for compatibility only.
template <FusedActivationFunctionType Ac>
float ActivationFunction(float x) {
float output_activation_min, output_activation_max;
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
return ActivationFunctionWithMinMax(x, output_activation_min,
output_activation_max);
}
inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
const float* bias_data, int array_size,
float* array_data) {
// Note: see b/132215220: in May 2019 we thought it would be OK to replace
// this with the Eigen one-liner:
// return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
// This turned out to severely regress performance: +4ms (i.e. 8%) on
// MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now.
TFLITE_DCHECK_EQ((array_size % bias_size), 0);
#ifdef USE_NEON
float* array_ptr = array_data;
float* array_end_ptr = array_ptr + array_size;
const auto clamp_min_vec = vdupq_n_f32(clamp_min);
const auto clamp_max_vec = vdupq_n_f32(clamp_max);
for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
int i = 0;
for (; i <= bias_size - 16; i += 16) {
auto b0 = vld1q_f32(bias_data + i);
auto b1 = vld1q_f32(bias_data + i + 4);
auto b2 = vld1q_f32(bias_data + i + 8);
auto b3 = vld1q_f32(bias_data + i + 12);
auto a0 = vld1q_f32(array_ptr + i);
auto a1 = vld1q_f32(array_ptr + i + 4);
auto a2 = vld1q_f32(array_ptr + i + 8);
auto a3 = vld1q_f32(array_ptr + i + 12);
auto x0 = vaddq_f32(a0, b0);
auto x1 = vaddq_f32(a1, b1);
auto x2 = vaddq_f32(a2, b2);
auto x3 = vaddq_f32(a3, b3);
x0 = vmaxq_f32(clamp_min_vec, x0);
x1 = vmaxq_f32(clamp_min_vec, x1);
x2 = vmaxq_f32(clamp_min_vec, x2);
x3 = vmaxq_f32(clamp_min_vec, x3);
x0 = vminq_f32(clamp_max_vec, x0);
x1 = vminq_f32(clamp_max_vec, x1);
x2 = vminq_f32(clamp_max_vec, x2);
x3 = vminq_f32(clamp_max_vec, x3);
vst1q_f32(array_ptr + i, x0);
vst1q_f32(array_ptr + i + 4, x1);
vst1q_f32(array_ptr + i + 8, x2);
vst1q_f32(array_ptr + i + 12, x3);
}
for (; i <= bias_size - 4; i += 4) {
auto b = vld1q_f32(bias_data + i);
auto a = vld1q_f32(array_ptr + i);
auto x = vaddq_f32(a, b);
x = vmaxq_f32(clamp_min_vec, x);
x = vminq_f32(clamp_max_vec, x);
vst1q_f32(array_ptr + i, x);
}
for (; i < bias_size; i++) {
array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i],
clamp_min, clamp_max);
}
}
#else // not NEON
for (int array_offset = 0; array_offset < array_size;
array_offset += bias_size) {
for (int i = 0; i < bias_size; i++) {
array_data[array_offset + i] = ActivationFunctionWithMinMax(
array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
}
}
#endif
}
inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
int32_t x, int32_t quantized_multiplier, int left_shift) {
using gemmlowp::RoundingDivideByPOT;
using gemmlowp::SaturatingRoundingDoublingHighMul;
return RoundingDivideByPOT(
SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
}
inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
int32_t x, int32_t quantized_multiplier, int left_shift) {
using gemmlowp::SaturatingRoundingDoublingHighMul;
return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
quantized_multiplier);
}
inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
int32_t quantized_multiplier,
int shift) {
using gemmlowp::RoundingDivideByPOT;
using gemmlowp::SaturatingRoundingDoublingHighMul;
int left_shift = shift > 0 ? shift : 0;
int right_shift = shift > 0 ? 0 : -shift;
return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
x * (1 << left_shift), quantized_multiplier),
right_shift);
}
inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
int32_t quantized_multiplier,
int shift) {
// Inputs:
// - quantized_multiplier has fixed point at bit 31
// - shift is -31 to +7 (negative for right shift)
//
// Assumptions: The following input ranges are assumed
// - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1)
// - scaling is chosen so final scaled result fits in int32_t
// - input x is in the range -(1<<47) <= x < (1<<47)
assert(quantized_multiplier >= 0);
assert(shift >= -31 && shift < 8);
int32_t reduced_multiplier = (quantized_multiplier + (1 << 15)) >> 16;
int total_shift = 15 - shift;
x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
int32_t result = x >> total_shift;
return result;
}
template <typename T>
int CountLeadingZeros(T integer_input) {
static_assert(std::is_unsigned<T>::value,
"Only unsigned integer types handled.");
#if defined(__GNUC__)
return integer_input ? __builtin_clz(integer_input)
: std::numeric_limits<T>::digits;
#else
if (integer_input == 0) {
return std::numeric_limits<T>::digits;
}
const T one_in_leading_positive = static_cast<T>(1)
<< (std::numeric_limits<T>::digits - 1);
int leading_zeros = 0;
while (integer_input < one_in_leading_positive) {
integer_input <<= 1;
++leading_zeros;
}
return leading_zeros;
#endif
}
template <typename T>
inline int CountLeadingSignBits(T integer_input) {
static_assert(std::is_signed<T>::value, "Only signed integer types handled.");
#if defined(__GNUC__) && !defined(__clang__)
return integer_input ? __builtin_clrsb(integer_input)
: std::numeric_limits<T>::digits;
#else
using U = typename std::make_unsigned<T>::type;
return integer_input >= 0
? CountLeadingZeros(static_cast<U>(integer_input)) - 1
: integer_input != std::numeric_limits<T>::min()
? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
: 0;
#endif
}
// Use "count leading zeros" helper functions to do a fast Floor(log_2(x)).
template <typename Integer>
inline Integer FloorLog2(Integer n) {
static_assert(std::is_integral<Integer>::value, "");
static_assert(std::is_signed<Integer>::value, "");
static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, "");
TFLITE_CHECK_GT(n, 0);
if (sizeof(Integer) == 4) {
return 30 - CountLeadingSignBits(n);
} else {
return 62 - CountLeadingSignBits(n);
}
}
// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
// softmax
inline void gen_lut(const std::function<double(double)>& func, double min,
double max, int16_t* table, const int num) {
// size of table should equal to num + 1
// last element only for slope calculation
double step = (max - min) / (num - 1);
double half_step = step / 2.0;
for (int i = 0; i < num - 1; i++) {
double sample_val = TfLiteRound(func(min + i * step) * 32768.0);
double midpoint_interp_val =
TfLiteRound((func(min + (i + 1) * step) * 32768.0 +
TfLiteRound(func(min + i * step) * 32768.0)) /
2.0);
double midpoint_val =
TfLiteRound(func(min + i * step + half_step) * 32768.0);
double midpoint_err = midpoint_interp_val - midpoint_val;
double bias = TfLiteRound(midpoint_err / 2.0);
table[i] = std::min(std::max(sample_val - bias, -32768.0), 32767.0);
}
table[num - 1] =
std::min(std::max(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
}
// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
// 512 base value, lut[513] only for calculate slope
uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
assert(index < 512 && "LUT index out of range.");
int16_t offset = value & 0x7f;
// base and slope are Q0.15
int16_t base = lut[index];
int16_t slope = lut[index + 1] - lut[index];
// Q0.15 * Q0.7 = Q0.22
// Round and convert from Q0.22 to Q0.15
int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
// Q0.15 + Q0.15
return base + delta;
}
// Table of sigmoid(i/24) at 0.16 format - 256 elements.
// We use combined sigmoid and tanh look-up table, since
// tanh(x) = 2*sigmoid(2*x) -1.
// Both functions are symmetric, so the LUT table is only needed
// for the absolute value of the input.
static const uint16_t sigmoid_table_uint16[256] = {
32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498,
40149, 40794, 41432, 42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255,
46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409, 51865,
52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174,
56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288,
59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441,
61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886,
62990, 63090, 63186, 63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835,
63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405, 64450,
64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845,
64873, 64900, 64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097,
65115, 65132, 65149, 65164, 65179, 65194, 65208, 65221, 65234, 65246, 65258,
65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360,
65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425,
65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465,
65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491,
65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508,
65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65517, 65518,
65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525,
65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529,
65529, 65529, 65530, 65530, 65530, 65530, 65531, 65531, 65531, 65531, 65531,
65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533, 65533, 65533,
65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
65534, 65534, 65535};
// TODO(b/77858996): Add these to gemmlowp.
template <typename IntegerType>
IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
return a;
}
template <>
inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) {
std::int64_t a64 = a;
std::int64_t b64 = b;
std::int64_t sum = a64 + b64;
return static_cast<std::int32_t>(std::min(
static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
std::max(
static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
sum)));
}
template <typename tRawType, int tIntegerBits>
gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
SaturatingAddNonGemmlowp(a.raw(), b.raw()));
}
template <typename IntegerType>
IntegerType SaturatingSub(IntegerType a, IntegerType b) {
static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
return a;
}
template <>
inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) {
std::int32_t a32 = a;
std::int32_t b32 = b;
std::int32_t diff = a32 - b32;
return static_cast<std::int16_t>(
std::min(static_cast<int32_t>(32767),
std::max(static_cast<int32_t>(-32768), diff)));
}
template <>
inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) {
std::int64_t a64 = a;
std::int64_t b64 = b;
std::int64_t diff = a64 - b64;
return static_cast<std::int32_t>(std::min(
static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
std::max(
static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
diff)));
}
template <typename tRawType, int tIntegerBits>
gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
SaturatingSub(a.raw(), b.raw()));
}
// End section to be moved to gemmlowp.
template <typename IntegerType>
IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
if (exponent == 0) {
return x;
}
using ScalarIntegerType =
typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
const IntegerType min =
gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
const IntegerType max =
gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
const std::int32_t threshold =
((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
const IntegerType positive_mask =
gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
const IntegerType negative_mask =
gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
IntegerType result = gemmlowp::ShiftLeft(x, exponent);
result = gemmlowp::SelectUsingMask(positive_mask, max, result);
result = gemmlowp::SelectUsingMask(negative_mask, min, result);
return result;
}
// If we want to leave IntegerBits fixed, then multiplication
// by a power of two has to be saturating/rounding, not exact anymore.
template <typename tRawType, int tIntegerBits>
gemmlowp::FixedPoint<tRawType, tIntegerBits>
SaturatingRoundingMultiplyByPOTParam(
gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
}
// Convert int32_t multiplier to int16_t with rounding.
inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
int16_t* multiplier_int16_t) {
TFLITE_DCHECK_GE(multiplier_int32_t, 0);
static constexpr int32_t kRoundingOffset = 1 << 15;
if (multiplier_int32_t >=
std::numeric_limits<int32_t>::max() - kRoundingOffset) {
*multiplier_int16_t = std::numeric_limits<int16_t>::max();
return;
}
const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
*multiplier_int16_t = result;
TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
}
// Minimum output bits to accommodate log of maximum input range. It actually
// does not matter if one considers, say, [-64,64] or [-64,64).
//
// For example, run this through Octave:
// [0:127; ...
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
constexpr int min_log_x_output_bits(int input_bits) {
return input_bits > 90 ? 7
: input_bits > 44 ? 6
: input_bits > 21 ? 5
: input_bits > 10 ? 4
: input_bits > 4 ? 3
: input_bits > 1 ? 2
: 1;
}
// Although currently the name of this function says that it cannot handle
// values less than 1, in practice it can handle as low as 1/x_max, where
// x_max is the largest representable input. In other words, the output range
// is symmetric.
template <int OutputIntegerBits, int InputIntegerBits>
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
log_x_for_x_greater_than_or_equal_to_1_impl(
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
// The reason for accumulating the result with an extra bit of headroom is
// that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
// recip_denom will otherwise introduce an error.
static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
FixedPoint0, 1488522236, std::log(2.0));
const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
FixedPoint0, 1518500250, std::sqrt(0.5));
const FixedPoint0 one_quarter =
GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
FixedPoint0, 1057819769,
2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
const FixedPointAccum shifted_quarter =
gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
// Reinterpret the input value as Q0.31, because we will figure out the
// required shift "ourselves" instead of using, say, Rescale.
FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
// z_a_pow_2 = input_integer_bits - z_a_headroom;
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
FixedPoint0 r_a_tmp =
SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
const int32_t r_a_raw =
SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
// z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
// z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
// InputIntegerBits - z_b_headroom - 0.25);
const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
shifted_quarter);
// z_b is treated like z_a, but premultiplying by sqrt(0.5).
FixedPoint0 z_b = z_a * sqrt_half;
int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
const int32_t r_b_raw =
SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
shifted_quarter);
const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
FixedPoint0 q = r - sqrt_sqrt_half;
q = q + q;
const FixedPoint0 common_sq = q * q;
const FixedPoint0 num = q * r + q * common_sq * alpha_n;
const FixedPoint0 denom_minus_one_0 =
p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
const FixedPoint0 recip_denom =
one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
num_scaled * recip_denom);
}
template <int OutputIntegerBits, int InputIntegerBits>
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
log_x_for_x_greater_than_or_equal_to_1(
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
static_assert(
OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
"Output integer bits must be sufficient to accommodate logs of inputs.");
return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
InputIntegerBits>(
input_val);
}
inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
int* num_bits_over_unit) {
int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
// This is the number of bits to the left of the binary point above 1.0.
// Consider x=1.25. In that case shifted_scale=0.8 and
// no later adjustment will be needed.
*num_bits_over_unit = x_integer_digits - headroom_plus_one;
const int32_t shifted_sum_minus_one =
static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
(static_cast<uint32_t>(1) << 31));
gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
gemmlowp::one_over_one_plus_x_for_x_in_0_1(
gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
return shifted_scale.raw();
}
inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
int32_t* output_inv_sqrt,
int* output_shift) {
TFLITE_DCHECK_GE(input, 0);
if (input <= 1) {
// Handle the input value 1 separately to avoid overflow in that case
// in the general computation below (b/143972021). Also handle 0 as if it
// were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
// but rare/unrealistic input value. We can expect both to occur in some
// incompletely trained models, but probably not in fully trained models.
*output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
*output_shift = 0;
return;
}
TFLITE_DCHECK_GT(input, 1);
*output_shift = 11;
while (input >= (1 << 29)) {
input /= 4;
++*output_shift;
}
const unsigned max_left_shift_bits =
CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
*output_shift -= left_shift_bit_pairs;
input <<= 2 * left_shift_bit_pairs;
TFLITE_DCHECK_GE(input, (1 << 27));
TFLITE_DCHECK_LT(input, (1 << 29));
using gemmlowp::FixedPoint;
using gemmlowp::Rescale;
using gemmlowp::SaturatingRoundingMultiplyByPOT;
// Using 3 integer bits gives us enough room for the internal arithmetic in
// this Newton-Raphson iteration.
using F3 = FixedPoint<int32_t, 3>;
using F0 = FixedPoint<int32_t, 0>;
const F3 fixedpoint_input = F3::FromRaw(input >> 1);
const F3 fixedpoint_half_input =
SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
const F3 fixedpoint_half_three =
GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
// Newton-Raphson iteration
// Naive unoptimized starting guess: x = 1
F3 x = F3::One();
// Naive unoptimized number of iterations: 5
for (int i = 0; i < 5; i++) {
const F3 x3 = Rescale<3>(x * x * x);
x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
}
const F0 fixedpoint_half_sqrt_2 =
GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
x = x * fixedpoint_half_sqrt_2;
*output_inv_sqrt = x.raw();
if (*output_shift < 0) {
*output_inv_sqrt <<= -*output_shift;
*output_shift = 0;
}
// Convert right shift (right is positive) to left shift.
*output_shift *= reverse_shift;
}
// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
// BROADCASTING.
//
// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
// rectangular array of numbers.
//
// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
// However, as Dims<N> is to be deprecated, this class exists as an adaptor
// to enable simple unoptimized implementations of element-wise broadcasting
// operations.
template <int N>
struct NdArrayDesc {
// The "extent" of each dimension. Indices along dimension d must be in the
// half-open interval [0, extents[d]).
int extents[N];
// The number of *elements* (not bytes) between consecutive indices of each
// dimension.
int strides[N];
};
// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
// BROADCASTING.
//
// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
int i3) {
TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]);
TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]);
TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]);
TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]);
return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] +
i3 * desc.strides[3];
}
inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) {
return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
indexes[4] * desc.strides[4];
}
// Given the dimensions of the operands for an element-wise binary broadcast,
// adjusts them so that they can be directly iterated over with simple loops.
// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and
// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr.
//
// This function assumes that the two input shapes are compatible up to
// broadcasting and the shorter one has already been prepended with 1s to be the
// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64),
// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that
// Dims<N> refer to shapes in reverse order. In this case, input0_dims will be
// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1).
//
// When two shapes are compatible up to broadcasting, for each dimension d,
// the input extents are either equal, or one of them is 1.
//
// This function performs the following for each dimension d:
// - If the extents are equal, then do nothing since the loop that walks over
// both of the input arrays is correct.
// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1
// and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows
// array0 to be referenced *at any index* in dimension d and still access the
// same slice.
template <int N>
inline void NdArrayDescsForElementwiseBroadcast(const Dims<N>& input0_dims,
const Dims<N>& input1_dims,
NdArrayDesc<N>* desc0_out,
NdArrayDesc<N>* desc1_out) {
TFLITE_DCHECK(desc0_out != nullptr);
TFLITE_DCHECK(desc1_out != nullptr);
// Copy dims to desc.
for (int i = 0; i < N; ++i) {
desc0_out->extents[i] = input0_dims.sizes[i];
desc0_out->strides[i] = input0_dims.strides[i];
desc1_out->extents[i] = input1_dims.sizes[i];
desc1_out->strides[i] = input1_dims.strides[i];
}
// Walk over each dimension. If the extents are equal do nothing.
// Otherwise, set the desc with extent 1 to have extent equal to the other and
// stride 0.
for (int i = 0; i < N; ++i) {
const int extent0 = ArraySize(input0_dims, i);
const int extent1 = ArraySize(input1_dims, i);
if (extent0 != extent1) {
if (extent0 == 1) {
desc0_out->strides[i] = 0;
desc0_out->extents[i] = extent1;
} else {
TFLITE_DCHECK_EQ(extent1, 1);
desc1_out->strides[i] = 0;
desc1_out->extents[i] = extent0;
}
}
}
}
// Copies dims to desc, calculating strides.
template <int N>
inline void CopyDimsToDesc(const RuntimeShape& input_shape,
NdArrayDesc<N>* desc_out) {
int desc_stride = 1;
for (int i = N - 1; i >= 0; --i) {
desc_out->extents[i] = input_shape.Dims(i);
desc_out->strides[i] = desc_stride;
desc_stride *= input_shape.Dims(i);
}
}
template <int N>
inline void NdArrayDescsForElementwiseBroadcast(
const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) {
TFLITE_DCHECK(desc0_out != nullptr);
TFLITE_DCHECK(desc1_out != nullptr);
auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
// Copy dims to desc, calculating strides.
CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
// Walk over each dimension. If the extents are equal do nothing.
// Otherwise, set the desc with extent 1 to have extent equal to the other and
// stride 0.
for (int i = 0; i < N; ++i) {
const int extent0 = extended_input0_shape.Dims(i);
const int extent1 = extended_input1_shape.Dims(i);
if (extent0 != extent1) {
if (extent0 == 1) {
desc0_out->strides[i] = 0;
desc0_out->extents[i] = extent1;
} else {
TFLITE_DCHECK_EQ(extent1, 1);
desc1_out->strides[i] = 0;
desc1_out->extents[i] = extent0;
}
}
}
}
template <int N>
inline void NdArrayDescsForElementwiseBroadcast(
const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
const RuntimeShape& input2_shape, NdArrayDesc<N>* desc0_out,
NdArrayDesc<N>* desc1_out, NdArrayDesc<N>* desc2_out) {
TFLITE_DCHECK(desc0_out != nullptr);
TFLITE_DCHECK(desc1_out != nullptr);
TFLITE_DCHECK(desc2_out != nullptr);
auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
auto extended_input2_shape = RuntimeShape::ExtendedShape(N, input2_shape);
// Copy dims to desc, calculating strides.
CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
CopyDimsToDesc<N>(extended_input2_shape, desc2_out);
// Walk over each dimension. If the extents are equal do nothing.
// Otherwise, set the desc with extent 1 to have extent equal to the other and
// stride 0.
for (int i = 0; i < N; ++i) {
const int extent0 = extended_input0_shape.Dims(i);
const int extent1 = extended_input1_shape.Dims(i);
const int extent2 = extended_input2_shape.Dims(i);
int extent = extent0;
if (extent1 != 1) extent = extent1;
if (extent2 != 1) extent = extent2;
TFLITE_DCHECK(extent0 == 1 || extent0 == extent);
TFLITE_DCHECK(extent1 == 1 || extent1 == extent);
TFLITE_DCHECK(extent2 == 1 || extent2 == extent);
if (!(extent0 == extent1 && extent1 == extent2)) {
if (extent0 == 1) {
desc0_out->strides[i] = 0;
desc0_out->extents[i] = extent;
}
if (extent1 == 1) {
desc1_out->strides[i] = 0;
desc1_out->extents[i] = extent;
}
if (extent2 == 1) {
desc2_out->strides[i] = 0;
desc2_out->extents[i] = extent;
}
}
}
}
// Detailed implementation of NDOpsHelper, the indexes must be a zero array.
// This implementation is equivalent to N nested loops. Ex, if N=4, it can be
// re-writen as:
// for (int b = 0; b < output.extents[0]; ++b) {
// for (int y = 0; y < output.extents[1]; ++y) {
// for (int x = 0; x < output.extents[2]; ++x) {
// for (int c = 0; c < output.extents[3]; ++c) {
// calc({b,y,x,c});
// }
// }
// }
// }
template <int N, int DIM, typename Calc>
typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(
const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
}
}
template <int N, int DIM, typename Calc>
typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(
const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
calc(indexes);
}
}
// Execute the calc function in the innermost iteration based on the shape of
// the output. The calc function should take a single argument of type int[N].
template <int N, typename Calc>
inline void NDOpsHelper(const NdArrayDesc<N>& output, const Calc& calc) {
int indexes[N] = {0};
NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
}
// Copied from gemmlowp::RoundDown when we dropped direct dependency on
// gemmlowp.
//
// Returns the runtime argument rounded down to the nearest multiple of
// the fixed Modulus.
template <unsigned Modulus, typename Integer>
Integer RoundDown(Integer i) {
return i - (i % Modulus);
}
// Copied from gemmlowp::RoundUp when we dropped direct dependency on
// gemmlowp.
//
// Returns the runtime argument rounded up to the nearest multiple of
// the fixed Modulus.
template <unsigned Modulus, typename Integer>
Integer RoundUp(Integer i) {
return RoundDown<Modulus>(i + Modulus - 1);
}
// Copied from gemmlowp::CeilQuotient when we dropped direct dependency on
// gemmlowp.
//
// Returns the quotient a / b rounded up ('ceil') to the nearest integer.
template <typename Integer>
Integer CeilQuotient(Integer a, Integer b) {
return (a + b - 1) / b;
}
// This function is a copy of gemmlowp::HowManyThreads, copied when we dropped
// the direct dependency of internal/optimized/ on gemmlowp.
//
// It computes a reasonable number of threads to use for a GEMM of shape
// (rows, cols, depth).
//
// TODO(b/131910176): get rid of this function by switching each call site
// to its own more sensible logic for its own workload.
template <int KernelRows>
inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols,
int depth) {
// Early-exit in the default case where multi-threading is disabled.
if (max_num_threads == 1) {
return 1;
}
// Ensure that each thread has KernelRows rows to process, if at all possible.
int thread_count = std::min(max_num_threads, rows / KernelRows);
// Limit the number of threads according to the overall size of the problem.
if (thread_count > 1) {
// Empirically determined value.
static constexpr std::uint64_t min_cubic_size_per_thread = 64 * 1024;
// We can only multiply two out of three sizes without risking overflow
const std::uint64_t cubic_size =
std::uint64_t(rows) * std::uint64_t(cols) * std::uint64_t(depth);
thread_count = std::min(
thread_count, static_cast<int>(cubic_size / min_cubic_size_per_thread));
}
if (thread_count < 1) {
thread_count = 1;
}
assert(thread_count > 0 && thread_count <= max_num_threads);
return thread_count;
}
template <typename T>
void optimized_ops_preload_l1_stream(const T* ptr) {
#ifdef __GNUC__
// builtin offered by GCC-compatible compilers including clang
__builtin_prefetch(ptr, /* 0 means read */ 0, /* 0 means no locality */ 0);
#else
(void)ptr;
#endif
}
template <typename T>
void optimized_ops_preload_l1_keep(const T* ptr) {
#ifdef __GNUC__
// builtin offered by GCC-compatible compilers including clang
__builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
#else
(void)ptr;
#endif
}
template <typename T>
void optimized_ops_prefetch_write_l1_keep(const T* ptr) {
#ifdef __GNUC__
// builtin offered by GCC-compatible compilers including clang
__builtin_prefetch(ptr, /* 1 means write */ 1, /* 3 means high locality */ 3);
#else
(void)ptr;
#endif
}
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_

View File

@@ -0,0 +1,112 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
#include <cstdint>
#include "tensorflow/lite/kernels/op_macros.h"
#ifndef TFLITE_DCHECK
#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_EQ
#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_NE
#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_GE
#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_GT
#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_LE
#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_LT
#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
#ifndef TFLITE_CHECK
#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_EQ
#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_NE
#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_GE
#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_GT
#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_LE
#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_LT
#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TF_LITE_STATIC_MEMORY
// TODO(b/162019032): Consider removing these type-aliases.
using int8 = std::int8_t;
using uint8 = std::uint8_t;
using int16 = std::int16_t;
using uint16 = std::uint16_t;
using int32 = std::int32_t;
using uint32 = std::uint32_t;
#endif // !defined(TF_LITE_STATIC_MEMORY)
// TFLITE_DEPRECATED()
//
// Duplicated from absl/base/macros.h to avoid pulling in that library.
// Marks a deprecated class, struct, enum, function, method and variable
// declarations. The macro argument is used as a custom diagnostic message (e.g.
// suggestion of a better alternative).
//
// Example:
//
// class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
// TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
//
// Every usage of a deprecated entity will trigger a warning when compiled with
// clang's `-Wdeprecated-declarations` option. This option is turned off by
// default, but the warnings will be reported by clang-tidy.
#if defined(__clang__) && __cplusplus >= 201103L
#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
#endif
#ifndef TFLITE_DEPRECATED
#define TFLITE_DEPRECATED(message)
#endif
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_

View File

@@ -0,0 +1,40 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
#include <cmath>
namespace tflite {
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
defined(__ZEPHYR__)
#define TF_LITE_GLOBAL_STD_PREFIX
#else
#define TF_LITE_GLOBAL_STD_PREFIX std
#endif
#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
template <class T> \
inline T tf_name(const T x) { \
return TF_LITE_GLOBAL_STD_PREFIX::std_name(x); \
}
DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_

View File

@@ -0,0 +1,35 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
#include <cmath>
namespace tflite {
#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
inline float TfLiteMax(const float& x, const float& y) {
return std::max(x, y);
}
#else
template <class T>
inline T TfLiteMax(const T& x, const T& y) {
return std::fmax(x, y);
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_

View File

@@ -0,0 +1,35 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
#include <cmath>
namespace tflite {
#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
inline float TfLiteMin(const float& x, const float& y) {
return std::min(x, y);
}
#else
template <class T>
inline T TfLiteMin(const T& x, const T& y) {
return std::fmin(x, y);
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_

View File

@@ -0,0 +1,40 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#define USE_NEON
#include <arm_neon.h>
#endif
#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
#define USE_NEON
#include "NEON_2_SSE.h"
#endif
// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
// defined, PortableSomeFunc(args) otherwise.
#ifdef USE_NEON
// Always use Neon code
#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
#else
// No NEON available: Use Portable code
#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
#endif // defined(USE_NEON)
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_

View File

@@ -0,0 +1,395 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include <algorithm>
#include <cmath>
#include <limits>
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
namespace tflite {
namespace {
// These constants are used to manipulate the binary representation of doubles.
// Double-precision binary64 floating point format is:
// Bit | 63 | 62-52 | 51-0 |
// | Sign | Exponent | Fraction |
// To avoid 64-bit integers as much as possible, I break this into high and
// low 32-bit chunks. High is:
// Bit | 31 | 30-20 | 19-0 |
// | Sign | Exponent | High Fraction |
// Low is:
// Bit | 31-0 |
// | Low Fraction |
// We then access the components through logical bit-wise operations to
// extract the parts needed, with the positions and masks derived from the
// layout shown above.
constexpr uint64_t kSignMask = 0x8000000000000000LL;
constexpr uint64_t kExponentMask = 0x7ff0000000000000LL;
constexpr int32_t kExponentShift = 52;
constexpr int32_t kExponentBias = 1023;
constexpr uint32_t kExponentIsBadNum = 0x7ff;
constexpr uint64_t kFractionMask = 0x000fffffffc00000LL;
constexpr uint32_t kFractionShift = 22;
constexpr uint32_t kFractionRoundingMask = 0x003fffff;
constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
} // namespace
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
int* shift) {
if (double_multiplier == 0.) {
*quantized_multiplier = 0;
*shift = 0;
return;
}
#ifdef TFLITE_EMULATE_FLOAT
// If we're trying to avoid the use of floating-point instructions (for
// example on microcontrollers) then use an alternative implementation
// that only requires integer and bitwise operations. To enable this, you
// need to set the define during the build process for your platform.
int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
#else // TFLITE_EMULATE_FLOAT
const double q = std::frexp(double_multiplier, shift);
auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1ll << 31)));
#endif // TFLITE_EMULATE_FLOAT
TFLITE_CHECK(q_fixed <= (1ll << 31));
if (q_fixed == (1ll << 31)) {
q_fixed /= 2;
++*shift;
}
TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
// A shift amount smaller than -31 would cause all bits to be shifted out
// and thus all results would be zero. We implement that instead with
// q_fixed==0, so as to avoid hitting issues with right-shift
// operations with shift amounts greater than 31. Note that this happens
// roughly when abs(double_multiplier) < 2^-31 and the present handling means
// that we're effectively flushing tiny double_multiplier's to zero.
// We could conceivably handle values in the range (roughly) [32, 63]
// as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
// the present handling is just doing 'flush denormals to zero'. We could
// reconsider and actually generate nonzero denormals if a need arises.
if (*shift < -31) {
*shift = 0;
q_fixed = 0;
}
*quantized_multiplier = static_cast<int32_t>(q_fixed);
}
void QuantizeMultiplierGreaterThanOne(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift) {
TFLITE_CHECK_GT(double_multiplier, 1.);
QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift);
TFLITE_CHECK_GE(*left_shift, 0);
}
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift) {
TFLITE_CHECK_LT(double_multiplier, 1.);
TFLITE_CHECK_GT(double_multiplier, 0.);
int shift;
QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
TFLITE_CHECK_LE(shift, 0);
*left_shift = shift;
}
int64_t IntegerFrExp(double input, int* shift) {
// Make sure our assumptions about the double layout hold.
TFLITE_CHECK_EQ(8, sizeof(double));
// We want to access the bits of the input double value directly, which is
// tricky to do safely, so use a union to handle the casting.
union {
double double_value;
uint64_t double_as_uint;
} cast_union;
cast_union.double_value = input;
const uint64_t u = cast_union.double_as_uint;
// If the bitfield is all zeros apart from the sign bit, this is a normalized
// zero value, so return standard values for this special case.
if ((u & ~kSignMask) == 0) {
*shift = 0;
return 0;
}
// Deal with NaNs and Infs, which are always indicated with a fixed pattern in
// the exponent, and distinguished by whether the fractions are zero or
// non-zero.
const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift);
if (exponent_part == kExponentIsBadNum) {
*shift = std::numeric_limits<int>::max();
if (u & kFractionMask) {
// NaN, so just return zero (with the exponent set to INT_MAX).
return 0;
} else {
// Infinity, so return +/- INT_MAX.
if (u & kSignMask) {
return std::numeric_limits<int64_t>::min();
} else {
return std::numeric_limits<int64_t>::max();
}
}
}
// The shift is fairly easy to extract from the high bits of the double value,
// just by masking it out and applying a bias. The std::frexp() implementation
// always returns values between 0.5 and 1.0 though, whereas the exponent
// assumes 1.0 to 2.0 is the standard range, so I add on one to match that
// interface.
*shift = (exponent_part - kExponentBias) + 1;
// There's an implicit high bit in the double format definition, so make sure
// we include that at the top, and then reconstruct the rest of the fractional
// value from the remaining fragments.
int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift);
// We're cutting off some bits at the bottom, so to exactly match the standard
// frexp implementation here we'll apply rounding by adding one to the least
// significant bit of the result if the discarded portion is over half of the
// maximum.
if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) {
fraction += 1;
}
// Negate the fraction if the sign bit was set.
if (u & kSignMask) {
fraction *= -1;
}
return fraction;
}
double DoubleFromFractionAndShift(int64_t fraction, int shift) {
union {
double double_value;
uint64_t double_as_uint;
} result;
// Detect NaNs and infinities.
if (shift == std::numeric_limits<int>::max()) {
if (fraction == 0) {
return std::numeric_limits<double>::quiet_NaN();
} else if (fraction > 0) {
return std::numeric_limits<double>::infinity();
} else {
return -std::numeric_limits<double>::infinity();
}
}
// Return a normalized zero for a zero fraction.
if (fraction == 0) {
result.double_as_uint = 0;
return result.double_value;
}
bool is_negative = (fraction < 0);
int64_t encoded_fraction = is_negative ? -fraction : fraction;
int64_t encoded_shift = (shift - 1);
while (encoded_fraction < 0x40000000) {
encoded_fraction *= 2;
encoded_shift -= 1;
}
while (encoded_fraction > 0x80000000) {
encoded_fraction /= 2;
encoded_shift += 1;
}
encoded_fraction -= 0x40000000;
if (encoded_shift < -1022) {
encoded_shift = -1023;
} else if (encoded_shift > 1022) {
encoded_shift = 1023;
}
encoded_shift += kExponentBias;
uint64_t encoded_sign = is_negative ? kSignMask : 0;
result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) |
(encoded_fraction << kFractionShift);
return result.double_value;
}
double IntegerDoubleMultiply(double a, double b) {
int a_shift;
const int64_t a_fraction = IntegerFrExp(a, &a_shift);
int b_shift;
const int64_t b_fraction = IntegerFrExp(b, &b_shift);
// Detect NaNs and infinities.
if (a_shift == std::numeric_limits<int>::max() ||
(b_shift == std::numeric_limits<int>::max())) {
return std::numeric_limits<double>::quiet_NaN();
}
const int result_shift = a_shift + b_shift + 1;
const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
return DoubleFromFractionAndShift(result_fraction, result_shift);
}
int IntegerDoubleCompare(double a, double b) {
int a_shift;
const int64_t a_fraction = IntegerFrExp(a, &a_shift);
int b_shift;
const int64_t b_fraction = IntegerFrExp(b, &b_shift);
// Detect NaNs and infinities.
if (a_shift == std::numeric_limits<int>::max() ||
(b_shift == std::numeric_limits<int>::max())) {
return 1;
}
if ((a_fraction == 0) && (b_fraction < 0)) {
return 1;
} else if ((a_fraction < 0) && (b_fraction == 0)) {
return -1;
} else if (a_shift < b_shift) {
return -1;
} else if (a_shift > b_shift) {
return 1;
} else if (a_fraction < b_fraction) {
return -1;
} else if (a_fraction > b_fraction) {
return 1;
} else {
return 0;
}
}
void PreprocessSoftmaxScaling(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier, int* left_shift) {
// If the overall multiplier (input and beta) is large, then exp() of an
// input difference of 1 scaled by this will be large. In other words, we
// can cap the multiplier and know that, when it is used, the output will be
// (round to) zero wherever the input is not at the maximum value.
// If the overall scale is less than one, and input_integer_bits=0, then the
// result is double equivalent of Q0.31 (actually with more precision). Thus
// this generates a Q(input_integer_bits).(31-input_integer_bits)
// representation.
#ifdef TFLITE_EMULATE_FLOAT
const double input_beta = IntegerDoubleMultiply(beta, input_scale);
int shift;
int64_t fraction = IntegerFrExp(input_beta, &shift);
shift += (31 - input_integer_bits);
double input_beta_real_multiplier =
DoubleFromFractionAndShift(fraction, shift);
if (IntegerDoubleCompare(input_beta_real_multiplier, (1ll << 31) - 1.0) > 0) {
input_beta_real_multiplier = (1ll << 31) - 1.0;
}
#else // TFLITE_EMULATE_FLOAT
const double input_beta_real_multiplier = std::min(
beta * input_scale * (1 << (31 - input_integer_bits)), (1ll << 31) - 1.0);
#endif // TFLITE_EMULATE_FLOAT
QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
quantized_multiplier, left_shift);
}
void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier,
int* left_shift,
int32_t* reverse_scaling_divisor,
int* reverse_scaling_left_shift) {
PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
quantized_multiplier, left_shift);
// Also calculate what amounts to the inverse scaling factor for the input.
const double real_reverse_scaling_divisor =
(1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
reverse_scaling_divisor,
reverse_scaling_left_shift);
}
int CalculateInputRadius(int input_integer_bits, int input_left_shift,
int total_signed_bits) {
#ifdef TFLITE_EMULATE_FLOAT
int64_t result = (1 << input_integer_bits) - 1;
result <<= (total_signed_bits - input_integer_bits);
result >>= input_left_shift;
return result;
#else // TFLITE_EMULATE_FLOAT
const double max_input_rescaled =
1.0 * ((1 << input_integer_bits) - 1) *
(1ll << (total_signed_bits - input_integer_bits)) /
(1ll << input_left_shift);
// Tighten bound using floor. Suppose that we could use the exact value.
// After scaling the difference, the result would be at the maximum. Thus we
// must ensure that our value has lower magnitude.
return static_cast<int>(std::floor(max_input_rescaled));
#endif // TFLITE_EMULATE_FLOAT
}
void NudgeQuantizationRange(const float min, const float max,
const int quant_min, const int quant_max,
float* nudged_min, float* nudged_max,
float* nudged_scale) {
// This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
const float quant_min_float = static_cast<float>(quant_min);
const float quant_max_float = static_cast<float>(quant_max);
*nudged_scale = (max - min) / (quant_max_float - quant_min_float);
const float zero_point_from_min = quant_min_float - min / *nudged_scale;
uint16_t nudged_zero_point;
if (zero_point_from_min < quant_min_float) {
nudged_zero_point = static_cast<uint16_t>(quant_min);
} else if (zero_point_from_min > quant_max_float) {
nudged_zero_point = static_cast<uint16_t>(quant_max);
} else {
nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
}
*nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
*nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
}
void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
const float nudged_max, const float* input_data,
float* output_data, const float size) {
// This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
const float inv_nudged_scale = 1.0f / nudged_scale;
for (int i = 0; i < size; i++) {
const float src_val = input_data[i];
const float clamped = std::min(nudged_max, std::max(nudged_min, src_val));
const float clamped_shifted = clamped - nudged_min;
const float dst_val =
TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale +
nudged_min;
output_data[i] = dst_val;
}
}
bool CheckedLog2(const float x, int* log2_result) {
// Using TfLiteRound instead of std::round and std::log instead of
// std::log2 to work around these functions being missing in a toolchain
// used in some TensorFlow tests as of May 2018.
const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
const float x_log2_rounded = TfLiteRound(x_log2);
const float x_log2_fracpart = x_log2 - x_log2_rounded;
*log2_result = static_cast<int>(x_log2_rounded);
return std::abs(x_log2_fracpart) < 1e-3f;
}
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
int32_t* effective_scale_significand,
int* effective_shift) {
for (size_t i = 0; i < size; ++i) {
QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
&effective_shift[i]);
}
}
} // namespace tflite

View File

@@ -0,0 +1,292 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
#include <cmath>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
// Given the min and max values of a float array, return
// reasonable quantization parameters to use for this array.
template <typename T>
QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
bool narrow_range) {
const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
const T qmax = std::numeric_limits<T>::max();
const double qmin_double = qmin;
const double qmax_double = qmax;
// 0 should always be a representable value. Let's assume that the initial
// min,max range contains 0.
TFLITE_CHECK_LE(rmin, 0.);
TFLITE_CHECK_GE(rmax, 0.);
if (rmin == rmax) {
// Special case where the min,max range is a point. Should be {0}.
TFLITE_CHECK_EQ(rmin, 0.);
TFLITE_CHECK_EQ(rmax, 0.);
QuantizationParams quantization_params;
quantization_params.zero_point = 0;
quantization_params.scale = 0.;
return quantization_params;
}
// General case.
//
// First determine the scale.
const double scale = (rmax - rmin) / (qmax_double - qmin_double);
// Zero-point computation.
// First the initial floating-point computation. The zero-point can be
// determined from solving an affine equation for any known pair
// (real value, corresponding quantized value).
// We know two such pairs: (rmin, qmin) and (rmax, qmax).
// The arithmetic error on the zero point computed from either pair
// will be roughly machine_epsilon * (sum of absolute values of terms)
// so we want to use the variant that adds the smaller terms.
const double zero_point_from_min = qmin_double - rmin / scale;
const double zero_point_from_max = qmax_double - rmax / scale;
const double zero_point_from_min_error =
std::abs(qmin_double) + std::abs(rmin / scale);
const double zero_point_from_max_error =
std::abs(qmax_double) + std::abs(rmax / scale);
const double zero_point_double =
zero_point_from_min_error < zero_point_from_max_error
? zero_point_from_min
: zero_point_from_max;
// Now we need to nudge the zero point to be an integer
// (our zero points are integer, and this is motivated by the requirement
// to be able to represent the real value "0" exactly as a quantized value,
// which is required in multiple places, for example in Im2col with SAME
// padding).
T nudged_zero_point = 0;
if (zero_point_double < qmin_double) {
nudged_zero_point = qmin;
} else if (zero_point_double > qmax_double) {
nudged_zero_point = qmax;
} else {
nudged_zero_point = static_cast<T>(round(zero_point_double));
}
// The zero point should always be in the range of quantized value,
// [qmin, qmax].
TFLITE_CHECK_GE(nudged_zero_point, qmin);
TFLITE_CHECK_LE(nudged_zero_point, qmax);
// Finally, store the result nudged quantization params.
QuantizationParams quantization_params;
quantization_params.zero_point = nudged_zero_point;
quantization_params.scale = scale;
return quantization_params;
}
template <typename T>
QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
return ChooseQuantizationParams<T>(rmin, rmax, false);
}
// Converts a floating-point number to an integer. For all inputs x where
// static_cast<IntOut>(x) is legal according to the C++ standard, the result
// is identical to that cast (i.e. the result is x with its fractional part
// truncated whenever that is representable as IntOut).
//
// static_cast would cause undefined behavior for the following cases, which
// have well-defined behavior for this function:
//
// 1. If x is NaN, the result is zero.
//
// 2. If the truncated form of x is above the representable range of IntOut,
// the result is std::numeric_limits<IntOut>::max().
//
// 3. If the truncated form of x is below the representable range of IntOut,
// the result is std::numeric_limits<IntOut>::min().
//
// Note that cases #2 and #3 cover infinities as well as finite numbers.
//
// The range of FloatIn must include the range of IntOut, otherwise
// the results are undefined.
// TODO(sfeuz): Replace by absl::SafeCast once available.
template <class IntOut, class FloatIn>
IntOut SafeCast(FloatIn x) {
static_assert(!std::numeric_limits<FloatIn>::is_integer,
"FloatIn is integer");
static_assert(std::numeric_limits<IntOut>::is_integer,
"IntOut is not integer");
static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
// Special case NaN, for which the logic below doesn't work.
if (std::isnan(x)) {
return 0;
}
// Negative values all clip to zero for unsigned results.
if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
return 0;
}
// Handle infinities.
if (std::isinf(x)) {
return x < 0 ? std::numeric_limits<IntOut>::min()
: std::numeric_limits<IntOut>::max();
}
// Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
// unless x is zero in which case exp == 0. Note that this implies that the
// magnitude of x is strictly less than 2^exp.
int exp = 0;
std::frexp(x, &exp);
// Let N be the number of non-sign bits in the representation of IntOut. If
// the magnitude of x is strictly less than 2^N, the truncated version of x
// is representable as IntOut. The only representable integer for which this
// is not the case is kMin for signed types (i.e. -2^N), but that is covered
// by the fall-through below.
if (exp <= std::numeric_limits<IntOut>::digits) {
return x;
}
// Handle numbers with magnitude >= 2^N.
return x < 0 ? std::numeric_limits<IntOut>::min()
: std::numeric_limits<IntOut>::max();
}
// Decompose a double multiplier into a Q0.31 int32 representation of its
// significand, and shift representation of NEGATIVE its exponent ---
// this is intended as a RIGHT-shift.
//
// Restricted to the case where the multiplier < 1 (and non-negative).
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift);
// Decompose a double multiplier into a Q0.31 int32 representation of its
// significand, and shift representation of its exponent.
//
// Restricted to the case where the multiplier > 1.
void QuantizeMultiplierGreaterThanOne(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift);
// Decompose a double multiplier into a Q0.31 int32 representation of its
// significand, and shift representation of its exponent.
//
// Handles an arbitrary positive multiplier. The 'shift' output-value is
// basically the 'floating-point exponent' of the multiplier:
// Negative for a right-shift (when the multiplier is <1), positive for a
// left-shift (when the multiplier is >1)
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
int* shift);
// Splits a double input value into a returned fraction, and a shift value from
// the exponent, using only bitwise and integer operations to support
// microcontrollers and other environments without floating-point support.
//
// This is designed to be a replacement for how std::frexp() is used within the
// QuantizeMultiplier() function, and so has a different signature than the
// standard version, returning a 64-bit integer rather than a double. This
// result has a maximum value of 1<<31, with the fraction expressed as a
// proportion of that maximum.
//
// std::frexp() returns NaNs and infinities unmodified, but since we're
// returning integers that can't represent those values, instead we return
// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
// result in return values that end up truncating some bits at the end,
// reflecting the loss of precision inherent in denormalization.
int64_t IntegerFrExp(double input, int* shift);
// Converts an integer fraction in the format produced by IntegerFrExp (where
// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
// IEEE binary64 double format result. The implementation uses only integer and
// bitwise operators, so no floating point hardware support or emulation is
// needed. This is here so quantized operations can run non-time-critical
// preparation calculations on microcontrollers and other platforms without
// float support.
double DoubleFromFractionAndShift(int64_t fraction, int shift);
// Performs a multiplication of two numbers in double format, using only integer
// and bitwise instructions. This is aimed at supporting housekeeping functions
// for quantized operations on microcontrollers without floating-point hardware.
double IntegerDoubleMultiply(double a, double b);
// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
// greater than b. It is implemented using only integer and logical instructions
// so that it can be easily run on microcontrollers for quantized operations.
int IntegerDoubleCompare(double a, double b);
// This first creates a multiplier in a double equivalent of
// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
// precision in the double's fractional bits. It then splits the result into
// significand and exponent.
void PreprocessSoftmaxScaling(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier, int* left_shift);
// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier,
int* left_shift,
int32_t* reverse_scaling_divisor,
int* reverse_scaling_left_shift);
// Calculate the largest input that will result in a within-bounds intermediate
// result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words,
// it must not overflow before we reduce the value by multiplication by the
// input multiplier. The negative radius is used as the minimum difference in
// Softmax.
int CalculateInputRadius(int input_integer_bits, int input_left_shift,
int total_signed_bits = 31);
// Nudges a min/max quantization range to ensure zero is zero.
// Gymnastics with nudged zero point is to ensure that real zero maps to
// an integer, which is required for e.g. zero-padding in convolutional layers.
// Outputs nudged_min, nudged_max, nudged_scale.
void NudgeQuantizationRange(const float min, const float max,
const int quant_min, const int quant_max,
float* nudged_min, float* nudged_max,
float* nudged_scale);
// Fake quantizes (quantizes and dequantizes) input_data using the scale,
// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
const float nudged_max, const float* input_data,
float* output_data, const float size);
// If x is approximately a power of two (with any positive or negative
// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
// returns false.
bool CheckedLog2(const float x, int* log2_result);
// Decomposes an array of double multipliers into a Q0.31 int32 representation
// of its significand, and shift representation of its exponent.
//
// Handles an arbitrary multiplier. The 'shift' output-value is
// basically the 'floating-point exponent' of the multiplier:
// Negative for a right-shift (when the multiplier is <1), positive for a
// left-shift (when the multiplier is >1)
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
int32_t* effective_scale_significand,
int* effective_shift);
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_

View File

@@ -0,0 +1,454 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] + input2_data[i], params.quantized_activation_min,
params.quantized_activation_max);
}
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const float* input1_data,
const RuntimeShape& input2_shape, const float* input2_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
auto x = input1_data[i] + input2_data[i];
output_data[i] = ActivationFunctionWithMinMax(
x, params.float_activation_min, params.float_activation_max);
}
}
// Element-wise add that can often be used for inner loop of broadcast add as
// well as the non-broadcast add.
// This function is used for 8-bit as well as for 16-bit, but the accumulator
// is 32-bit for both cases. The overflow does not happen due to the
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
template <typename T>
inline void AddElementwise(int size, const ArithmeticParams& params,
const T* input1_data, const T* input2_data,
T* output_data) {
TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
for (int i = 0; i < size; ++i) {
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<T>(clamped_output);
}
}
// Scalar-broadcast add that can be used for inner loop of more general
// broadcast add, so that, for example, scalar-broadcast with batch will still
// be fast.
inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
uint8_t input1_data, const uint8_t* input2_data,
uint8_t* output_data) {
TFLITE_DCHECK_GT(params.input1_offset, -256);
TFLITE_DCHECK_GT(params.input2_offset, -256);
TFLITE_DCHECK_LT(params.input1_offset, 256);
TFLITE_DCHECK_LT(params.input2_offset, 256);
const int32_t input1_val = params.input1_offset + input1_data;
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
for (int i = 0; i < size; ++i) {
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const uint8_t* input1_data,
const RuntimeShape& input2_shape, const uint8_t* input2_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
TFLITE_DCHECK_GT(params.input1_offset, -256);
TFLITE_DCHECK_GT(params.input2_offset, -256);
TFLITE_DCHECK_LT(params.input1_offset, 256);
TFLITE_DCHECK_LT(params.input2_offset, 256);
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void AddGeneralParamScale(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int16_t* input1_data,
const RuntimeShape& input2_shape,
const int16_t* input2_data,
const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
int max_value = std::numeric_limits<int16_t>::max();
TFLITE_DCHECK_GT(params.input1_offset, -max_value);
TFLITE_DCHECK_GT(params.input2_offset, -max_value);
TFLITE_DCHECK_LT(params.input1_offset, max_value);
TFLITE_DCHECK_LT(params.input2_offset, max_value);
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int16_t* input1_data,
const RuntimeShape& input2_shape, const int16_t* input2_data,
const RuntimeShape& output_shape, int16_t* output_data,
bool pot_scale = true) {
if (!pot_scale) {
AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
return;
}
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int input1_shift = params.input1_shift;
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
const int16_t output_activation_min = params.quantized_activation_min;
const int16_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
TFLITE_DCHECK_LE(input1_shift, 0);
TFLITE_DCHECK_LE(params.input2_shift, 0);
const int16_t* not_shift_input =
input1_shift == 0 ? input1_data : input2_data;
const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
const int input_right_shift =
input1_shift == 0 ? -params.input2_shift : -input1_shift;
for (int i = 0; i < flat_size; i++) {
// F0 uses 0 integer bits, range [-1, 1].
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
F0 scaled_input = F0::FromRaw(
gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
const int16_t raw_output = result.raw();
const int16_t clamped_output = std::min(
output_activation_max, std::max(output_activation_min, raw_output));
output_data[i] = clamped_output;
}
}
// TODO(jiawen): We can implement BroadcastAdd on buffers of arbitrary
// dimensionality if the runtime code does a single loop over one dimension
// that handles broadcasting as the base case. The code generator would then
// generate max(D1, D2) nested for loops.
// TODO(benoitjacob): BroadcastAdd is intentionally duplicated from
// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
// reference_ops.h.
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const float* input1_data,
const RuntimeShape& input2_shape,
const float* input2_data,
const RuntimeShape& output_shape,
float* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
output_data[Offset(extended_output_shape, b, y, x, c)] =
ActivationFunctionWithMinMax(
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
params.float_activation_min, params.float_activation_max);
}
}
}
}
}
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int32_t* input1_data,
const RuntimeShape& input2_shape,
const int32_t* input2_data,
const RuntimeShape& output_shape,
int32_t* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
output_data[Offset(extended_output_shape, b, y, x, c)] =
ActivationFunctionWithMinMax(
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
params.quantized_activation_min,
params.quantized_activation_max);
}
}
}
}
}
// This function is used for 8-bit as well as for 16-bit, but the accumulator
// is 32-bit for both cases. The overflow does not happen due to the
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
template <typename T>
inline void BroadcastAdd4DSlow(
const ArithmeticParams& params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
const int32_t input1_val =
params.input1_offset +
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32_t input2_val =
params.input2_offset +
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32_t shifted_input1_val =
input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val =
input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier,
params.input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier,
params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[Offset(extended_output_shape, b, y, x, c)] =
static_cast<T>(clamped_output);
}
}
}
}
}
inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
const RuntimeShape& unswitched_input1_shape,
const uint8_t* unswitched_input1_data,
const RuntimeShape& unswitched_input2_shape,
const uint8_t* unswitched_input2_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
ArithmeticParams switched_params = unswitched_params;
switched_params.input1_offset = unswitched_params.input2_offset;
switched_params.input1_multiplier = unswitched_params.input2_multiplier;
switched_params.input1_shift = unswitched_params.input2_shift;
switched_params.input2_offset = unswitched_params.input1_offset;
switched_params.input2_multiplier = unswitched_params.input1_multiplier;
switched_params.input2_shift = unswitched_params.input1_shift;
const bool use_unswitched =
unswitched_params.broadcast_category ==
tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
const ArithmeticParams& params =
use_unswitched ? unswitched_params : switched_params;
const uint8_t* input1_data =
use_unswitched ? unswitched_input1_data : unswitched_input2_data;
const uint8_t* input2_data =
use_unswitched ? unswitched_input2_data : unswitched_input1_data;
// Fivefold nested loops. The second input resets its position for each
// iteration of the second loop. The first input resets its position at the
// beginning of the fourth loop. The innermost loop is an elementwise add of
// sections of the arrays.
uint8_t* output_data_ptr = output_data;
const uint8_t* input1_data_ptr = input1_data;
const uint8_t* input2_data_reset = input2_data;
// In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
// between input shapes. y3 for input 1 is always broadcast, and so the
// dimension there is 1, whereas optionally y1 might be broadcast for input 2.
// Put another way,
// input1.shape.FlatSize = y0 * y1 * y2 * y4,
// input2.shape.FlatSize = y0 * y2 * y3 * y4.
int y0 = params.broadcast_shape[0];
int y1 = params.broadcast_shape[1];
int y2 = params.broadcast_shape[2];
int y3 = params.broadcast_shape[3];
int y4 = params.broadcast_shape[4];
if (y4 > 1) {
// General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
// dimension.
for (int i0 = 0; i0 < y0; ++i0) {
const uint8_t* input2_data_ptr;
for (int i1 = 0; i1 < y1; ++i1) {
input2_data_ptr = input2_data_reset;
for (int i2 = 0; i2 < y2; ++i2) {
for (int i3 = 0; i3 < y3; ++i3) {
AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
output_data_ptr);
input2_data_ptr += y4;
output_data_ptr += y4;
}
// We have broadcast y4 of input1 data y3 times, and now move on.
input1_data_ptr += y4;
}
}
// We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
input2_data_reset = input2_data_ptr;
}
} else {
// Special case of y4 == 1, in which the innermost loop is a single element
// and can be combined with the next (y3) as an inner broadcast.
//
// Note that this handles the case of pure scalar broadcast when
// y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
// broadcast with batch (as y2 > 1).
//
// NOTE The process is the same as the above general case except simplified
// for y4 == 1 and the loop over y3 is contained within the
// AddScalarBroadcast function.
for (int i0 = 0; i0 < y0; ++i0) {
const uint8_t* input2_data_ptr;
for (int i1 = 0; i1 < y1; ++i1) {
input2_data_ptr = input2_data_reset;
for (int i2 = 0; i2 < y2; ++i2) {
AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
output_data_ptr);
input2_data_ptr += y3;
output_data_ptr += y3;
input1_data_ptr += 1;
}
}
input2_data_reset = input2_data_ptr;
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_

View File

@@ -0,0 +1,68 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T1, typename T2, typename T3, typename Cmp>
void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
const T3* input2_data, const RuntimeShape& output_shape,
T2* output_data, const Cmp& cmp) {
TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
output_shape.DimensionsCount());
int axis = input2_data[0];
if (axis < 0) {
axis += input1_shape.DimensionsCount();
}
const int axis_size = input1_shape.Dims(axis);
int outer_size = 1;
for (int i = 0; i < axis; ++i) {
TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
outer_size *= input1_shape.Dims(i);
}
int inner_size = 1;
const int dims_count = input1_shape.DimensionsCount();
for (int i = axis + 1; i < dims_count; ++i) {
TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
inner_size *= input1_shape.Dims(i);
}
for (int outer = 0; outer < outer_size; ++outer) {
for (int inner = 0; inner < inner_size; ++inner) {
auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
T2 min_max_index = 0;
for (int i = 1; i < axis_size; ++i) {
const auto& curr_value =
input1_data[(outer * axis_size + i) * inner_size + inner];
if (cmp(curr_value, min_max_value)) {
min_max_value = curr_value;
min_max_index = static_cast<T2>(i);
}
}
output_data[outer * inner_size + inner] = min_max_index;
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_

View File

@@ -0,0 +1,84 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// TODO(ycling): Refactoring. Remove BroadcastLogical and use the more
// generalized and efficient BroadcastBinaryFunction.
//
// Also appears to duplicate MinimumMaximum.
//
// R: Result type. T1: Input 1 type. T2: Input 2 type.
template <typename R, typename T1, typename T2>
inline void BroadcastBinaryFunction4DSlow(
const RuntimeShape& unextended_input1_shape, const T1* input1_data,
const RuntimeShape& unextended_input2_shape, const T2* input2_data,
const RuntimeShape& unextended_output_shape, R* output_data,
R (*func)(T1, T2)) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
for (int b = 0; b < output_shape.Dims(0); ++b) {
for (int y = 0; y < output_shape.Dims(1); ++y) {
for (int x = 0; x < output_shape.Dims(2); ++x) {
for (int c = 0; c < output_shape.Dims(3); ++c) {
auto out_idx = Offset(output_shape, b, y, x, c);
auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
auto in1_val = input1_data[in1_idx];
auto in2_val = input2_data[in2_idx];
output_data[out_idx] = func(in1_val, in2_val);
}
}
}
}
}
// R: Result type. T1: Input 1 type. T2: Input 2 type.
// TODO(renjieliu): Refactor other binary functions to use this one.
template <typename R, typename T1, typename T2>
inline void BinaryFunction(const RuntimeShape& input1_shape,
const T1* input1_data,
const RuntimeShape& input2_shape,
const T2* input2_data,
const RuntimeShape& output_shape, R* output_data,
R (*func)(T1, T2)) {
const int flat_size =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = func(input1_data[i], input2_data[i]);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_

View File

@@ -0,0 +1,37 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
#include <cmath>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = std::ceil(input_data[i]);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_

View File

@@ -0,0 +1,334 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/string_util.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline bool EqualFn(T lhs, T rhs) {
return lhs == rhs;
}
template <typename T>
inline bool NotEqualFn(T lhs, T rhs) {
return lhs != rhs;
}
template <typename T>
inline bool GreaterFn(T lhs, T rhs) {
return lhs > rhs;
}
template <typename T>
inline bool GreaterEqualFn(T lhs, T rhs) {
return lhs >= rhs;
}
template <typename T>
inline bool LessFn(T lhs, T rhs) {
return lhs < rhs;
}
template <typename T>
inline bool LessEqualFn(T lhs, T rhs) {
return lhs <= rhs;
}
inline bool StringRefEqualFn(const StringRef& lhs, const StringRef& rhs) {
if (lhs.len != rhs.len) return false;
for (int i = 0; i < lhs.len; ++i) {
if (lhs.str[i] != rhs.str[i]) return false;
}
return true;
}
inline bool StringRefNotEqualFn(const StringRef& lhs, const StringRef& rhs) {
return !StringRefEqualFn(lhs, rhs);
}
template <typename T>
using ComparisonFn = bool (*)(T, T);
template <typename T, ComparisonFn<T> F>
inline void ComparisonImpl(
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
const int64_t flatsize =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int64_t i = 0; i < flatsize; ++i) {
output_data[i] = F(input1_data[i], input2_data[i]);
}
}
inline void ComparisonStringImpl(bool (*F)(const StringRef&, const StringRef&),
const RuntimeShape& input1_shape,
const TfLiteTensor* input1,
const RuntimeShape& input2_shape,
const TfLiteTensor* input2,
const RuntimeShape& output_shape,
bool* output_data) {
const int64_t flatsize =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int64_t i = 0; i < flatsize; ++i) {
const auto lhs = GetString(input1, i);
const auto rhs = GetString(input2, i);
output_data[i] = F(lhs, rhs);
}
}
template <ComparisonFn<float> F>
inline void Comparison(const ComparisonParams& op_params,
const RuntimeShape& input1_shape,
const float* input1_data,
const RuntimeShape& input2_shape,
const float* input2_data,
const RuntimeShape& output_shape, bool* output_data) {
ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
}
template <typename T, ComparisonFn<int32_t> F>
inline void ComparisonWithScaling(
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
int left_shift = op_params.left_shift;
int32_t input1_offset = op_params.input1_offset;
int32_t input1_multiplier = op_params.input1_multiplier;
int input1_shift = op_params.input1_shift;
int32_t input2_offset = op_params.input2_offset;
int32_t input2_multiplier = op_params.input2_multiplier;
int input2_shift = op_params.input2_shift;
const int64_t flatsize =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int64_t i = 0; i < flatsize; ++i) {
const int32_t input1_val = input1_offset + input1_data[i];
const int32_t input2_val = input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, input1_multiplier, input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, input2_multiplier, input2_shift);
output_data[i] = F(scaled_input1_val, scaled_input2_val);
}
}
struct BroadcastComparison4DSlowCommon {
const RuntimeShape output_shape;
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
};
inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
const RuntimeShape& unextended_input1_shape,
const RuntimeShape& unextended_input2_shape,
const RuntimeShape& unextended_output_shape) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
desc2};
}
template <typename T, ComparisonFn<T> F>
inline void BroadcastComparison4DSlowImpl(
const ComparisonParams& op_params,
const RuntimeShape& unextended_input1_shape, const T* input1_data,
const RuntimeShape& unextended_input2_shape, const T* input2_data,
const RuntimeShape& unextended_output_shape, bool* output_data) {
const BroadcastComparison4DSlowCommon dims =
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
unextended_input2_shape,
unextended_output_shape);
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
output_data[Offset(dims.output_shape, b, y, x, c)] =
F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
}
}
}
}
}
inline void BroadcastComparison4DSlowStringImpl(
bool (*F)(const StringRef&, const StringRef&),
const RuntimeShape& unextended_input1_shape, const TfLiteTensor* input1,
const RuntimeShape& unextended_input2_shape, const TfLiteTensor* input2,
const RuntimeShape& unextended_output_shape, bool* output_data) {
const BroadcastComparison4DSlowCommon dims =
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
unextended_input2_shape,
unextended_output_shape);
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
const auto lhs =
GetString(input1, SubscriptToIndex(dims.desc1, b, y, x, c));
const auto rhs =
GetString(input2, SubscriptToIndex(dims.desc2, b, y, x, c));
output_data[Offset(dims.output_shape, b, y, x, c)] = F(lhs, rhs);
}
}
}
}
}
template <ComparisonFn<float> F>
inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
const RuntimeShape& input1_shape,
const float* input1_data,
const RuntimeShape& input2_shape,
const float* input2_data,
const RuntimeShape& output_shape,
bool* output_data) {
BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
input2_shape, input2_data,
output_shape, output_data);
}
template <typename T, ComparisonFn<int32_t> F>
inline void BroadcastComparison4DSlowWithScaling(
const ComparisonParams& op_params,
const RuntimeShape& unextended_input1_shape, const T* input1_data,
const RuntimeShape& unextended_input2_shape, const T* input2_data,
const RuntimeShape& unextended_output_shape, bool* output_data) {
const BroadcastComparison4DSlowCommon dims =
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
unextended_input2_shape,
unextended_output_shape);
int left_shift = op_params.left_shift;
int32_t input1_offset = op_params.input1_offset;
int32_t input1_multiplier = op_params.input1_multiplier;
int input1_shift = op_params.input1_shift;
int32_t input2_offset = op_params.input2_offset;
int32_t input2_multiplier = op_params.input2_multiplier;
int input2_shift = op_params.input2_shift;
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
const int32_t input1_val =
input1_offset +
input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
const int32_t input2_val =
input2_offset +
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, input1_multiplier, input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, input2_multiplier, input2_shift);
output_data[Offset(dims.output_shape, b, y, x, c)] =
F(scaled_input1_val, scaled_input2_val);
}
}
}
}
}
#define TFLITE_COMPARISON_OP(name) \
inline void name(const ComparisonParams& op_params, \
const RuntimeShape& input1_shape, const float* input1_data, \
const RuntimeShape& input2_shape, const float* input2_data, \
const RuntimeShape& output_shape, bool* output_data) { \
Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape, \
input2_data, output_shape, output_data); \
} \
template <typename T> \
inline void name##NoScaling( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data, \
input2_shape, input2_data, output_shape, \
output_data); \
} \
template <typename T> \
inline void name##WithScaling( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data, \
input2_shape, input2_data, \
output_shape, output_data); \
} \
template <typename T> \
inline void Broadcast4DSlow##name##NoScaling( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
BroadcastComparison4DSlowImpl<T, name##Fn>( \
op_params, input1_shape, input1_data, input2_shape, input2_data, \
output_shape, output_data); \
} \
inline void Broadcast4DSlow##name( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const float* input1_data, const RuntimeShape& input2_shape, \
const float* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data, \
input2_shape, input2_data, \
output_shape, output_data); \
} \
template <typename T> \
inline void Broadcast4DSlow##name##WithScaling( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
BroadcastComparison4DSlowWithScaling<T, name##Fn>( \
op_params, input1_shape, input1_data, input2_shape, input2_data, \
output_shape, output_data); \
}
TFLITE_COMPARISON_OP(Equal);
TFLITE_COMPARISON_OP(NotEqual);
TFLITE_COMPARISON_OP(Greater);
TFLITE_COMPARISON_OP(GreaterEqual);
TFLITE_COMPARISON_OP(Less);
TFLITE_COMPARISON_OP(LessEqual);
#undef TFLITE_COMPARISON_OP
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_

View File

@@ -0,0 +1,140 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename Scalar>
inline void Concatenation(const ConcatenationParams& params,
const RuntimeShape* const* input_shapes,
const Scalar* const* input_data,
const RuntimeShape& output_shape,
Scalar* output_data) {
int axis = params.axis;
int inputs_count = params.inputs_count;
const int concat_dimensions = output_shape.DimensionsCount();
TFLITE_DCHECK_LT(axis, concat_dimensions);
int64_t concat_size = 0;
for (int i = 0; i < inputs_count; i++) {
TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
for (int j = 0; j < concat_dimensions; j++) {
if (j != axis) {
MatchingDim(*input_shapes[i], j, output_shape, j);
}
}
concat_size += input_shapes[i]->Dims(axis);
}
TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
int64_t outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= output_shape.Dims(i);
}
// For all input arrays,
// FlatSize() = outer_size * Dims(axis) * base_inner_size;
int64_t base_inner_size = 1;
for (int i = axis + 1; i < concat_dimensions; ++i) {
base_inner_size *= output_shape.Dims(i);
}
Scalar* output_ptr = output_data;
for (int k = 0; k < outer_size; k++) {
for (int i = 0; i < inputs_count; ++i) {
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
const Scalar* input_ptr = input_data[i] + k * copy_size;
memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
output_ptr += copy_size;
}
}
}
// TODO(prabhumk): This is the same as the optimized implementation.
// TODO(prabhumk): The quantized implementation of concatentation isn't fully
// quantized as it takes scale as a floating point value. This should be fixed
// when optimizng this routine further.
inline void ConcatenationWithScaling(const ConcatenationParams& params,
const RuntimeShape* const* input_shapes,
const uint8_t* const* input_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
int axis = params.axis;
const int32_t* input_zeropoint = params.input_zeropoint;
const float* input_scale = params.input_scale;
int inputs_count = params.inputs_count;
const int32_t output_zeropoint = params.output_zeropoint;
const float output_scale = params.output_scale;
const int concat_dimensions = output_shape.DimensionsCount();
TFLITE_DCHECK_LT(axis, concat_dimensions);
int64_t concat_size = 0;
for (int i = 0; i < inputs_count; i++) {
TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
for (int j = 0; j < concat_dimensions; j++) {
if (j != axis) {
MatchingDim(*input_shapes[i], j, output_shape, j);
}
}
concat_size += input_shapes[i]->Dims(axis);
}
TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
int64_t outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= output_shape.Dims(i);
}
// For all input arrays,
// FlatSize() = outer_size * Dims(axis) * base_inner_size;
int64_t base_inner_size = 1;
for (int i = axis + 1; i < concat_dimensions; ++i) {
base_inner_size *= output_shape.Dims(i);
}
const float inverse_output_scale = 1.f / output_scale;
uint8_t* output_ptr = output_data;
for (int k = 0; k < outer_size; k++) {
for (int i = 0; i < inputs_count; ++i) {
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
const uint8_t* input_ptr = input_data[i] + k * copy_size;
if (input_zeropoint[i] == output_zeropoint &&
input_scale[i] == output_scale) {
memcpy(output_ptr, input_ptr, copy_size);
} else {
const float scale = input_scale[i] * inverse_output_scale;
const float bias = -input_zeropoint[i] * scale;
for (int j = 0; j < copy_size; ++j) {
const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
input_ptr[j] * scale + bias)) +
output_zeropoint;
output_ptr[j] = static_cast<uint8_t>(
std::max<int32_t>(std::min<int32_t>(255, value), 0));
}
}
output_ptr += copy_size;
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_

View File

@@ -0,0 +1,262 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& filter_shape,
const float* filter_data, const RuntimeShape& bias_shape,
const float* bias_data, const RuntimeShape& output_shape,
float* output_data, const RuntimeShape& im2col_shape,
float* im2col_data) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
float total = 0.f;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
float input_value = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
float filter_value =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
total += (input_value * filter_value);
}
}
}
}
float bias_value = 0.0f;
if (bias_data) {
bias_value = bias_data[out_channel];
}
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
ActivationFunctionWithMinMax(total + bias_value,
output_activation_min,
output_activation_max);
}
}
}
}
}
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data, const RuntimeShape& im2col_shape,
uint8_t* im2col_data, void* cpu_backend_context) {
(void)cpu_backend_context; // only used in optimized code.
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
acc +=
(filter_val + filter_offset) * (input_val + input_offset);
}
}
}
}
if (bias_data) {
acc += bias_data[out_channel];
}
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<uint8_t>(acc);
}
}
}
}
}
inline void HybridConvPerChannel(
const ConvParams& params, float* scaling_factors_ptr,
const RuntimeShape& input_shape, const int8_t* input_data,
const RuntimeShape& filter_shape, const int8_t* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const RuntimeShape& im2col_shape, int8_t* im2col_data,
const float* per_channel_scale, int32_t* input_offset) {
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
acc += filter_val * (input_val - input_offset[batch]);
}
}
}
}
float acc_float =
acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
if (bias_data) {
acc_float += bias_data[out_channel];
}
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
ActivationFunctionWithMinMax(acc_float, output_activation_min,
output_activation_max);
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_

View File

@@ -0,0 +1,100 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void DepthwiseConv(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& filter_shape,
const float* filter_data, const RuntimeShape& bias_shape,
const float* bias_data, const RuntimeShape& output_shape,
float* output_data) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int b = 0; b < batches; ++b) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int ic = 0; ic < input_depth; ++ic) {
for (int m = 0; m < depth_multiplier; m++) {
const int oc = m + ic * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
float total = 0.f;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
float input_value =
input_data[Offset(input_shape, b, in_y, in_x, ic)];
float filter_value = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, oc)];
total += (input_value * filter_value);
}
}
}
float bias_value = 0.0f;
if (bias_data) {
bias_value = bias_data[oc];
}
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
ActivationFunctionWithMinMax(total + bias_value,
output_activation_min,
output_activation_max);
}
}
}
}
}
}
} // end namespace reference_ops
} // end namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_

View File

@@ -0,0 +1,297 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
#include <algorithm>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
// Used in tests and template parameters to control which version of depthwise
// convolution is called. Primarily for reference code, and specializations
// forced in tests.
enum class DepthwiseConvImplementation {
// Run all tests against kUseStandardEntry even if also testing another
// kernel, since we need to be sure that the main DepthwiseConv() function in
// optimized_ops.h dispatches to a correctly-executing kernel.
kNone = 0, // The "default" option: use the normal
// DepthwiseConv kernel (entry) function.
kUseGenericKernel, // Forced use of generic kernel.
kUseNeon3x3, // 3x3 kernel that uses NEON when available.
kUseNeon3x3DotProduct, // 3x3 kernel that uses dot-product enabled NEON
// when available.
kUseCModel3x3DotProduct, // 3x3 kernel, reference C model that is intended
// to match overall design NEON code.
kUseUnwound3x3DotProduct, // 3x3 kernel, reference C model with unwound loops
// and some arrays.
kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics.
};
// Category of depthwise convolution output rounding.
enum class DepthwiseConvOutputRounding {
kNone = 0, // Invalid: specific method must be specified.
kAwayFromZero, // Original method: exact halves rounded away from zero.
kUpward, // Halves towards +infinity: adds 0.5 before truncate.
// This is where a future kNearestEven would be placed.
};
// Category of depthwise convolution depth multiplication.
enum class DepthwiseConvDepthMultiplication {
kNoMultiplication = 0, // Depth multiplier = 1.
kUnitInputDepth, // Input depth = 1, output depth = depth multiplier.
};
namespace reference_ops {
namespace depthwise_conv {
template <DepthwiseConvOutputRounding output_rounding>
inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
int shift) {
TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
template <>
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
int32_t x, int32_t quantized_multiplier, int shift) {
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
template <>
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
int32_t x, int32_t quantized_multiplier, int shift) {
using gemmlowp::SaturatingRoundingDoublingHighMul;
const int left_shift = shift > 0 ? shift : 0;
const int right_shift = shift > 0 ? 0 : -shift;
const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
quantized_multiplier) +
rounding_offset) >>
right_shift;
}
template <DepthwiseConvOutputRounding output_rounding>
struct DepthwiseConvBasicKernel {
static inline void Run(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int b = 0; b < batches; ++b) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int ic = 0; ic < input_depth; ++ic) {
for (int m = 0; m < depth_multiplier; m++) {
const int oc = m + ic * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x =
in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32_t input_val =
input_data[Offset(input_shape, b, in_y, in_x, ic)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, oc)];
acc += (filter_val + filter_offset) *
(input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[oc];
}
acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
static_cast<uint8_t>(acc);
}
}
}
}
}
}
// TODO(b/148596273): Reconcile reference versions, perhaps with common
// MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
static inline void RunPerChannel(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
// Get parameters.
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
const int32_t* output_multiplier = params.output_multiplier_per_channel;
const int32_t* output_shift = params.output_shift_per_channel;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int m = 0; m < depth_multiplier; ++m) {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x =
in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we
// force real value of 0.0 be represented by a quantized
// value. This guarantees that the input_offset is a int8_t,
// even though it is represented using int32_t. int32_t +=
// int8_t
// * (int8_t - int8_t) so the highest value we can get from
// each accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
// applied to a filter so the accumulation logic will hold
// as long as the filter size (filter_y * filter_x *
// in_channel) does not exceed 2^16, which is the case in
// all the models we have seen so far.
acc += filter_val * (input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[output_channel];
}
acc = DepthwiseConvRound<output_rounding>(
acc, output_multiplier[output_channel],
output_shift[output_channel]);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x,
output_channel)] = static_cast<int8_t>(acc);
}
}
}
}
}
}
};
} // namespace depthwise_conv
inline void DepthwiseConv(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
return depthwise_conv::DepthwiseConvBasicKernel<
DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
input_data, filter_shape,
filter_data, bias_shape,
bias_data, output_shape,
output_data);
}
} // namespace reference_ops
} // end namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_

View File

@@ -0,0 +1,78 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
#include <limits.h>
#include <vector>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// Dequantizes into a float without rounding.
template <typename InputT, typename OutputT>
inline void Dequantize(const tflite::DequantizationParams& op_params,
const RuntimeShape& input_shape,
const InputT* input_data,
const RuntimeShape& output_shape, OutputT* output_data) {
int32_t zero_point = op_params.zero_point;
const double scale = op_params.scale;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
const int32_t val = input_data[i];
const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
output_data[i] = result;
}
}
// Dequantizes per-channel quantized tensor to float.
template <typename T>
inline void PerChannelDequantize(
const tflite::PerChannelDequantizationParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const RuntimeShape& output_shape, float* output_data) {
// Ensure flat size is same.
MatchingFlatSize(input_shape, output_shape);
const int32_t* zero_point = op_params.zero_point;
const float* scale = op_params.scale;
const int32_t quantized_dimension = op_params.quantized_dimension;
const int32_t num_dims = input_shape.DimensionsCount();
const int32_t* dims_data = input_shape.DimsData();
std::vector<int> current_dim(num_dims, 0);
do {
size_t offset =
ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
current_dim.data(), 0, nullptr);
const int channel = current_dim[quantized_dimension];
const int32_t val = input_data[offset];
const float result =
static_cast<float>(scale[channel] * (val - zero_point[channel]));
output_data[offset] = result;
} while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
current_dim.data()));
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_

View File

@@ -0,0 +1,39 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
#include <cmath>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void Floor(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
int offset = i;
output_data[offset] = std::floor(input_data[offset]);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_

View File

@@ -0,0 +1,320 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& weights_shape,
const float* weights_data, const RuntimeShape& bias_shape,
const float* bias_data, const RuntimeShape& output_shape,
float* output_data) {
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
// TODO(benoitjacob): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dims_count = output_shape.DimensionsCount();
const int weights_dims_count = weights_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
output_shape, output_dims_count - 1);
const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
float total = 0.f;
for (int d = 0; d < accum_depth; ++d) {
total += input_data[b * accum_depth + d] *
weights_data[out_c * accum_depth + d];
}
float bias_value = 0.0f;
if (bias_data) {
bias_value = bias_data[out_c];
}
output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
total + bias_value, output_activation_min, output_activation_max);
}
}
}
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
// TODO(benoitjacob): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dim_count = output_shape.DimensionsCount();
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
output_shape, output_dim_count - 1);
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int32_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
}
}
}
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(output_offset, 0);
// TODO(benoitjacob): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dim_count = output_shape.DimensionsCount();
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
output_shape, output_dim_count - 1);
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32_t accum = bias_data[out_c];
// Accumulation loop.
for (int d = 0; d < accum_depth; ++d) {
int16_t input_val = input_data[b * accum_depth + d] + input_offset;
int16_t filter_val =
filter_data[out_c * accum_depth + d] + filter_offset;
accum += filter_val * input_val;
}
// Down-scale the final int32_t accumulator to the scale used by our
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
// multiplier and shift here have been pre-computed offline
// (e.g. by toco).
accum =
MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
// Saturate, cast to int16_t, and store to output array.
accum = std::max(accum, output_activation_min - output_offset);
accum = std::min(accum, output_activation_max - output_offset);
accum += output_offset;
output_data[out_c + output_depth * b] = accum;
}
}
}
inline void ShuffledFullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& weights_shape,
const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
// TODO(benoitjacob): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dim_count = output_shape.DimensionsCount();
const int weights_dim_count = weights_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
output_shape, output_dim_count - 1);
const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
TFLITE_DCHECK((accum_depth % 16) == 0);
TFLITE_DCHECK((output_depth % 4) == 0);
// Shuffling and xoring of input activations into the workspace buffer
uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
if (batches == 1) {
for (int i = 0; i < accum_depth; i++) {
shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
}
} else if (batches == 4) {
for (int c = 0; c < accum_depth; c += 16) {
for (int b = 0; b < 4; b++) {
const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
for (int j = 0; j < 16; j++) {
uint8_t src_val = *src_data_ptr++;
// Flip the sign bit, so that the kernel will only need to
// reinterpret these uint8_t values as int8_t, getting for free the
// subtraction of the zero_point value 128.
uint8_t dst_val = src_val ^ 0x80;
*shuffled_input_workspace_ptr++ = dst_val;
}
}
}
} else {
TFLITE_DCHECK(false);
return;
}
// Actual computation
if (batches == 1) {
int16_t* output_ptr = output_data;
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
// so that just reinterpreting them as int8_t values is equivalent to
// subtracting 128 from them, thus implementing for free the subtraction of
// the zero_point value 128.
const int8_t* shuffled_weights_ptr =
reinterpret_cast<const int8_t*>(shuffled_weights_data);
// Likewise, we preshuffled and pre-xored the input data above.
const int8_t* shuffled_input_data =
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
for (int c = 0; c < output_depth; c += 4) {
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32_t accum[4] = {0};
// Accumulation loop.
for (int d = 0; d < accum_depth; d += 16) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 16; j++) {
int8_t input_val = shuffled_input_data[d + j];
int8_t weights_val = *shuffled_weights_ptr++;
accum[i] += weights_val * input_val;
}
}
}
for (int i = 0; i < 4; i++) {
// Add bias value
int32_t acc = accum[i] + bias_data[c + i];
// Down-scale the final int32_t accumulator to the scale used by our
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
// multiplier and shift here have been pre-computed offline
// (e.g. by toco).
acc =
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
// Saturate, cast to int16_t, and store to output array.
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_ptr[c + i] = acc;
}
}
} else if (batches == 4) {
int16_t* output_ptr = output_data;
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
// so that just reinterpreting them as int8_t values is equivalent to
// subtracting 128 from them, thus implementing for free the subtraction of
// the zero_point value 128.
const int8_t* shuffled_weights_ptr =
reinterpret_cast<const int8_t*>(shuffled_weights_data);
// Likewise, we preshuffled and pre-xored the input data above.
const int8_t* shuffled_input_data =
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
for (int c = 0; c < output_depth; c += 4) {
const int8_t* shuffled_input_ptr = shuffled_input_data;
// Accumulation loop.
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32_t accum[4][4];
for (int i = 0; i < 4; i++) {
for (int b = 0; b < 4; b++) {
accum[i][b] = 0;
}
}
for (int d = 0; d < accum_depth; d += 16) {
for (int i = 0; i < 4; i++) {
for (int b = 0; b < 4; b++) {
for (int j = 0; j < 16; j++) {
int8_t input_val = shuffled_input_ptr[16 * b + j];
int8_t weights_val = shuffled_weights_ptr[16 * i + j];
accum[i][b] += weights_val * input_val;
}
}
}
shuffled_input_ptr += 64;
shuffled_weights_ptr += 64;
}
for (int i = 0; i < 4; i++) {
for (int b = 0; b < 4; b++) {
// Add bias value
int32_t acc = accum[i][b] + bias_data[c + i];
// Down-scale the final int32_t accumulator to the scale used by our
// (16-bit, typically 3 integer bits) fixed-point format. The
// quantized multiplier and shift here have been pre-computed offline
// (e.g. by toco).
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
output_shift);
// Saturate, cast to int16_t, and store to output array.
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_ptr[b * output_depth + c + i] = acc;
}
}
}
} else {
TFLITE_DCHECK(false);
return;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_

View File

@@ -0,0 +1,166 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline int16_t SaturatingLeftShift(int16_t value, int amount) {
int32_t result = static_cast<int32_t>(value) * (1 << amount);
result = std::min<int32_t>(result, std::numeric_limits<int16_t>::max());
result = std::max<int32_t>(result, std::numeric_limits<int16_t>::min());
return result;
}
// Similar to ARM instruction SQDMULH.
// Similar to gemmlowp::SaturatingRoundingDoublingHighMul except
// rounding to zero instead of to nearest (SQRDMULH).
inline std::int16_t SaturatingDoublingHighMul(std::int16_t a, std::int16_t b) {
bool overflow = a == b && a == std::numeric_limits<std::int16_t>::min();
std::int32_t a_32(a);
std::int32_t b_32(b);
std::int32_t ab_32 = a_32 * b_32;
std::int16_t ab_x2_high16 = static_cast<std::int16_t>((ab_32) / (1 << 15));
return overflow ? std::numeric_limits<std::int16_t>::max() : ab_x2_high16;
}
template <typename T>
inline void HardSwish(const RuntimeShape& input_shape, const T* input_data,
const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("ReferenceHardSwish/Float");
auto matching_size = MatchingFlatSize(input_shape, output_shape);
const T* in_end = input_data + matching_size;
for (; input_data < in_end; input_data++, output_data++) {
const float in = *input_data;
*output_data =
in * std::min(static_cast<T>(6), std::max(static_cast<T>(0), in + 3)) /
6;
}
}
template <typename T>
inline void HardSwish(const HardSwishParams& params,
const RuntimeShape& input_shape, const T* input_data,
const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("ReferenceHardSwish/Quantized");
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
const int16_t input_value = input_data[i] - params.input_zero_point;
// Left-shift as much as we can without overflow/saturation to put
// significant bits in the high bits of our 16-bit fixedpoint values, so
// that fixed-point approximate computations below are as accurate as
// possible.
const int16_t input_value_on_hires_input_scale = input_value * (1 << 7);
// Compute the input value on essentially the output scale, just not
// right-shifted yet. This is the value that we'll use in the (x >= +3)
// case, and that in the general case we'll multiply against the "relu-ish"
// fixed-point multiplier in [0, 1].
const int16_t input_value_on_preshift_output_scale =
gemmlowp::SaturatingRoundingDoublingHighMul(
input_value_on_hires_input_scale,
params.output_multiplier_fixedpoint_int16);
// Now compute the "relu-ish multiplier". In the (-3 <= x <= +3) case, that
// is just an affine rescaling of x from [-3, 3] to [0, 1]. In the general
// case, it is just that plus saturation at the boundaries of [-3, 3].
// First, we rescale from [-3, 3] to [-1, 1], saturating.
// That is done by rescaling the input value with a fixed-point multiplier
// (reluish_multiplier_fixedpoint) and bit-shift such that we represent
// that input value on the scale where the real value 3.0f is represented
// by the quantized value 32768. (+32768 is actually not representable as
// int16_t, so this saturates at +32767, and that is seen empirically to be
// a negligible contribution to numerical error/bias).
//
// This code is careful to correctly implement any magnitude of multiplier,
// involving either a right shift or a left shift, with correct saturation
// behavior in the left-shift case. This forces this code to be more
// complicated, but is necessary for real applications: a partially
// trained quantized MobileNet v3-small model that motivated this code
// exhibits some large [min, max] range boundaries, of the order of
// magnitude of 10 or 100 depending on layers.
//
// The next few lines are basically just an ordinary
// MultiplyByQuantizedMultiplier, except that we are more careful here
// about the fine details of saturation when left-shifting, because here
// overflow in left-shift is a common case, not an anomaly as
// MultiplyByQuantizedMultiplier assumes.
int16_t reluish_value = input_value_on_hires_input_scale;
// Shift left, saturating, as much as we can while ensuring that this
// saturation will not contribute to the result. That is, left shift amount
// reduced by 1.
if (params.reluish_multiplier_exponent > 0) {
reluish_value = SaturatingLeftShift(
reluish_value, params.reluish_multiplier_exponent - 1);
}
// Apply the fixed-point multiplier, dividing the value by a divisor
// ranging in [1, 2].
reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul(
reluish_value, params.reluish_multiplier_fixedpoint_int16);
// Apply the last bit of left-shift. Thus, in the left-shifting case, if
// any saturation affects the result, it is happening here --- any
// saturation having occurred above is overwritten here, not affecting the
// result.
if (params.reluish_multiplier_exponent > 0) {
reluish_value = SaturatingLeftShift(reluish_value, 1);
}
// Shift right, in the right-shifting case.
if (params.reluish_multiplier_exponent < 0) {
reluish_value = gemmlowp::RoundingDivideByPOT(
reluish_value, -params.reluish_multiplier_exponent);
}
// At this point we have rescaled the value into a 16bit fixedpoint
// reluish_value in [-1, 1].
// We now convert that to a 16bit fixedpoint value in [0, 1].
reluish_value = (reluish_value + (1 << 15)) >> 1;
// Use of SaturatingDoublingHighMul here is important to cancel the biases
// from the above SaturatingRoundingDoublingHighMul.
//
// On a partially trained MobileNet-v3-small,
//
// | bias on | ImageNet
// | quantized | Top-1
// Operation used here | values | accuracy (50k)
// --------------------------------------+------------+-----------
// SaturatingDoublingHighMul | -0.0024 | 58.920
// SaturatingRoundingDoublingHighMul | -0.0067 | 58.064
//
// In activations_test, this is covered by this testcase:
// QuantizedActivationsOpTest.HardSwishBias
//
const int16_t preshift_output_value = SaturatingDoublingHighMul(
reluish_value, input_value_on_preshift_output_scale);
// We were so far operating on the pre-shift output scale. Now we finally
// apply that output shift, arriving at the final output scale.
int16_t output_value = gemmlowp::RoundingDivideByPOT(
preshift_output_value, -params.output_multiplier_exponent);
output_value += params.output_zero_point;
output_value =
std::min<int16_t>(output_value, std::numeric_limits<T>::max());
output_value =
std::max<int16_t>(output_value, std::numeric_limits<T>::min());
output_data[i] = output_value;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_

View File

@@ -0,0 +1,145 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_integer_ops {
inline void CheckArithmeticParams(const ArithmeticParams& params) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
}
// Element-wise add that can often be used for inner loop of broadcast add as
// well as the non-broadcast add.
inline void AddElementwise(int size, const ArithmeticParams& params,
const int8_t* input1_data, const int8_t* input2_data,
int8_t* output_data) {
CheckArithmeticParams(params);
for (int i = 0; i < size; ++i) {
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<int8_t>(clamped_output);
}
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int8_t* input1_data,
const RuntimeShape& input2_shape, const int8_t* input2_data,
const RuntimeShape& output_shape, int8_t* output_data) {
CheckArithmeticParams(params);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int8_t* input1_data,
const RuntimeShape& input2_shape,
const int8_t* input2_data,
const RuntimeShape& output_shape,
int8_t* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
const int32_t input1_val =
params.input1_offset +
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32_t input2_val =
params.input2_offset +
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32_t shifted_input1_val =
input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val =
input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier,
params.input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier,
params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[Offset(extended_output_shape, b, y, x, c)] =
static_cast<int8_t>(clamped_output);
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_

View File

@@ -0,0 +1,217 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
// Fixed-point per-channel-quantization convolution reference kernel.
inline void ConvPerChannel(
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
// Get parameters.
const int32_t input_offset = params.input_offset; // r = s(q - Z)
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int32_t output_offset = params.output_offset;
// Set min and max value of the output.
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Consistency check.
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
// Check dimensions of the tensors.
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we force
// real value of 0.0 be represented by a quantized value. This
// guarantees that the input_offset is a int8_t, even though
// it is represented using int32_t. int32_t += int8_t *
// (int8_t - int8_t) so the highest value we can get from each
// accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
// applied to a filter so the accumulation logic will hold as
// long as the filter size (filter_y * filter_x * in_channel)
// does not exceed 2^16, which is the case in all the models
// we have seen so far.
// TODO(jianlijianli): Add a check to make sure the
// accumulator depth is smaller than 2^16.
acc += filter_val * (input_val + input_offset);
}
}
}
}
if (bias_data) {
acc += bias_data[out_channel];
}
acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[out_channel], output_shift[out_channel]);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<int8_t>(acc);
}
}
}
}
}
// Fixed-point per-channel-quantization convolution reference kernel.
// 16-bit data and 8-bit filter
inline void ConvPerChannel(
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const std::int64_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
// Get parameters.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
// Set min and max value of the output.
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Consistency check.
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
// Check dimensions of the tensors.
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
std::int64_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
// Accumulate with 64 bits accumulator.
// int64_t += int8_t * int16_t so the highest value we can
// get from each accumulation is [-127, 127] * ([-32768,
// 32767] -
// [-32768, 32767]), which is [-8322945, 8322945].
// log2(8322945) = 22.99.
acc += filter_val * input_val;
}
}
}
}
if (bias_data) {
acc += bias_data[out_channel];
}
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[out_channel], output_shift[out_channel]);
scaled_acc = std::max(scaled_acc, output_activation_min);
scaled_acc = std::min(scaled_acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<int16_t>(scaled_acc);
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_

View File

@@ -0,0 +1,289 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void DepthwiseConvPerChannel(
const DepthwiseParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
// Get parameters.
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int m = 0; m < depth_multiplier; ++m) {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we force
// real value of 0.0 be represented by a quantized value. This
// guarantees that the input_offset is a int8_t, even though
// it is represented using int32_t. int32_t += int8_t *
// (int8_t - int8_t) so the highest value we can get from each
// accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
// applied to a filter so the accumulation logic will hold as
// long as the filter size (filter_y * filter_x * in_channel)
// does not exceed 2^16, which is the case in all the models
// we have seen so far.
// TODO(jianlijianli): Add a check to make sure the
// accumulator depth is smaller than 2^16.
acc += filter_val * (input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[output_channel];
}
acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[output_channel],
output_shift[output_channel]);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x,
output_channel)] = static_cast<int8_t>(acc);
}
}
}
}
}
}
inline void DepthwiseConvPerChannel(
const DepthwiseParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const std::int64_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
// Get parameters.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int m = 0; m < depth_multiplier; ++m) {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
std::int64_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
// Accumulate with 64 bits accumulator.
// We assume maximum of 2^16 accumulations as with the 8-bit
// case so actually the value in the accumulator should not
// exceed 40 bits
acc += static_cast<int64_t>(filter_val) *
static_cast<int64_t>(input_val);
}
}
}
if (bias_data) {
acc += bias_data[output_channel];
}
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[output_channel],
output_shift[output_channel]);
scaled_acc = std::max(scaled_acc, output_activation_min);
scaled_acc = std::min(scaled_acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x,
output_channel)] =
static_cast<int16_t>(scaled_acc);
}
}
}
}
}
}
inline void DepthwiseConvHybridPerChannel(
const DepthwiseParams& params, float* scaling_factors_ptr,
const RuntimeShape& input_shape, const int8_t* input_data,
const RuntimeShape& filter_shape, const int8_t* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const float* per_channel_scale, int32_t* input_offset) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int bias_depth = bias_shape.FlatSize();
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_depth, output_depth);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int m = 0; m < depth_multiplier; ++m) {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
acc += filter_val * (input_val - input_offset[batch]);
}
}
}
float acc_float = static_cast<float>(acc);
acc_float *=
per_channel_scale[output_channel] * scaling_factors_ptr[batch];
if (bias_data && output_channel < bias_depth) {
acc_float += bias_data[output_channel];
}
output_data[Offset(output_shape, batch, out_y, out_x,
output_channel)] =
ActivationFunctionWithMinMax(acc_float, output_activation_min,
output_activation_max);
}
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_

View File

@@ -0,0 +1,108 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = output_shape.Dims(0);
const int output_depth = output_shape.Dims(1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int32_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
}
}
}
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int64_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
const int32_t filter_offset = params.weights_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = output_shape.Dims(0);
const int output_depth = output_shape.Dims(1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int64_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * input_val;
}
if (bias_data) {
acc += bias_data[out_c];
}
int32_t acc_scaled =
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
acc_scaled = std::max(acc_scaled, output_activation_min);
acc_scaled = std::min(acc_scaled, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_

View File

@@ -0,0 +1,65 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
int32_t depth, const int8_t* input_data,
int8_t* output_data) {
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
// The output scale must be in sync with Prepare().
// Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
// to [-1, 127/128].
static constexpr int32_t kOutputScale = 7;
for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
// int32_t = (int8_t - int8_t) ^ 2.
// ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
// safe from overflowing in at least 2^16 steps.
int32_t acc = 0;
for (int inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input =
input_data[depth * outer_index + inner_index] - input_zero_point;
acc += input * input;
}
int32_t inv_l2norm_multiplier;
int inv_l2norm_shift;
GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
&inv_l2norm_shift);
for (int inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input =
input_data[depth * outer_index + inner_index] - input_zero_point;
// Rescale and downcast. Rescale is folded into the division.
int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
output_in_q24 =
std::min(static_cast<int32_t>(kMaxInt8),
std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
output_data[depth * outer_index + inner_index] =
static_cast<int8_t>(output_in_q24);
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_

View File

@@ -0,0 +1,99 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
int32_t input_multiplier, int32_t input_left_shift,
int32_t input_size, const int8_t* input_data,
int8_t* output_data) {
// Integer bits must be in sync with Prepare() function.
static constexpr int32_t kInputIntegerBits = 4;
static constexpr int32_t kOutputIntegerBits = 8;
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
static constexpr int32_t kOutputZeroPoint = -128;
for (int i = 0; i < input_size; ++i) {
const int32_t input =
static_cast<int32_t>(input_data[i]) - input_zero_point;
if (input <= -input_range_radius) {
output_data[i] = kMinInt8;
} else if (input >= input_range_radius) {
output_data[i] = kMaxInt8;
} else {
const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
input, input_multiplier, input_left_shift);
using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
const int32_t output_in_q0 =
gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
// Rescale and downcast.
using gemmlowp::RoundingDivideByPOT;
int32_t output_in_q23 =
RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
static_cast<int32_t>(kMinInt8)),
static_cast<int32_t>(kMaxInt8));
output_data[i] = static_cast<int8_t>(output_in_q23);
}
}
}
inline void Logistic(int32_t input_multiplier, int32_t input_size,
const int16_t* ptr_input_data, int16_t* ptr_output_data) {
// We use the LUT for sigmoid and take into account, that
// tanh(x) = 2*sigmoid(2*x) - 1
int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
int32_t input_data = (*ptr_input_data) * input_data_mul;
// Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and
// we do interpolation on unsigned values.
uint32_t abs_input_data = 3 * abs(input_data);
// We divide by 2 power of 9, because
// we need to divide by 2 in power of 7 for
// the input conversion + 1/4 from the scale above.
uint8_t uh = abs_input_data >> 9;
uint32_t ua = sigmoid_table_uint16[uh];
uint32_t ub = sigmoid_table_uint16[uh + 1];
uint32_t ut = abs_input_data & 0x1ff;
// Interpolation is done using the fractional bit.
uint32_t result = (ua << 9) + ut * (ub - ua);
result = (input_data >= 0) ? (result + (1 << 9))
: ((1 << (16 + 9)) - result + (1 << 9) - 1);
// Back to 16-bit.
result >>= 10;
*ptr_output_data = result;
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_

View File

@@ -0,0 +1,131 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
#include "fixedpoint/fixedpoint.h"
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
template <typename T>
inline void MulElementwise(int size, const ArithmeticParams& params,
const T* input1_data, const T* input2_data,
T* output_data) {
for (int i = 0; i < size; ++i) {
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<T>(clamped_output);
}
}
template <typename T>
inline void Mul(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
ruy::profiler::ScopeLabel label("Mul/8bit");
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
MulElementwise(flat_size, params, input1_data, input2_data, output_data);
}
// Mul with 16 bit inputs and int8_t outputs.
inline void Mul(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int16_t* input1_data,
const RuntimeShape& input2_shape, const int16_t* input2_data,
const RuntimeShape& output_shape, int8_t* output_data) {
ruy::profiler::ScopeLabel label("Mul/Int16Int8");
int32_t output_offset = params.output_offset;
int32_t output_activation_min = params.quantized_activation_min;
int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
// F0 uses 0 integer bits, range [-1, 1].
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
F0 unclamped_result =
F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
int16_t rescaled_result =
gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
int16_t clamped_result = std::min<int16_t>(
output_activation_max - output_offset, rescaled_result);
clamped_result = std::max<int16_t>(output_activation_min - output_offset,
clamped_result);
output_data[i] = output_offset + clamped_result;
}
}
template <typename T>
inline void BroadcastMul4DSlow(
const ArithmeticParams& params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
// The input shapes are extended as part of NdArrayDesc initialization.
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
const int32_t input1_val =
params.input1_offset +
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32_t input2_val =
params.input2_offset +
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32_t clamped_output = std::min(
params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[Offset(extended_output_shape, b, y, x, c)] =
static_cast<T>(clamped_output);
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_

View File

@@ -0,0 +1,258 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int32_t acc = 0;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
acc +=
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
filter_count++;
}
}
// Round to the closest integer value.
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
: (acc - filter_count / 2) / filter_count;
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int8_t>(acc);
}
}
}
}
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_GE(params.quantized_activation_min,
std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(params.quantized_activation_max,
std::numeric_limits<int8_t>::max());
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int8_t max = std::numeric_limits<int8_t>::lowest();
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
max = std::max(
max,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
}
}
max = std::max<int8_t>(max, params.quantized_activation_min);
max = std::min<int8_t>(max, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int8_t>(max);
}
}
}
}
}
inline void AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int16_t* input_data,
const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int32_t acc = 0;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
acc +=
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
filter_count++;
}
}
// Round to the closest integer value.
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
: (acc - filter_count / 2) / filter_count;
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int16_t>(acc);
}
}
}
}
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_GE(params.quantized_activation_min,
std::numeric_limits<int16_t>::min());
TFLITE_DCHECK_LE(params.quantized_activation_max,
std::numeric_limits<int16_t>::max());
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int16_t max = std::numeric_limits<int16_t>::lowest();
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
max = std::max(
max,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
}
}
max = std::max<int16_t>(max, params.quantized_activation_min);
max = std::min<int16_t>(max, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int16_t>(max);
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_

View File

@@ -0,0 +1,106 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
#include <limits>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
int32_t input_multiplier, int32_t input_shift,
int32_t input_size, const int8_t* input_data,
int8_t* output_data) {
// Integer bits must be in sync with Prepare() function.
static constexpr int32_t kInputIntegerBits = 4;
static constexpr int32_t kOutputScale = 7;
static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
for (int i = 0; i < input_size; ++i) {
const int32_t input =
static_cast<int32_t>(input_data[i]) - input_zero_point;
if (input <= -input_range_radius) {
output_data[i] = kMinInt8;
} else if (input >= input_range_radius) {
output_data[i] = kMaxInt8;
} else {
const int32_t input_in_q4 =
MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
const int32_t output_in_q0 =
gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
// Rescale and downcast.
using gemmlowp::RoundingDivideByPOT;
int32_t output_in_q24 =
RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
output_data[i] = static_cast<int8_t>(output_in_q24);
}
}
}
inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
int32_t input_size, const int16_t* ptr_input_data,
int16_t* ptr_output_data) {
// We use the LUT for sigmoid and take into account, that
// tanh(x) = 2*sigmoid(2*x) - 1
int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
int32_t input_data = (*ptr_input_data) * input_data_mul;
if (input_left_shift == 1) {
input_data <<= 1;
}
// Scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
uint32_t abs_input_data = 3 * abs(input_data);
uint32_t uh = abs_input_data >> 8;
int32_t result;
if (uh >= 255) {
// Saturate to maximum.
result = 0xFFFF << 8;
} else {
uint32_t ua = sigmoid_table_uint16[uh];
uint32_t ub = sigmoid_table_uint16[uh + 1];
uint8_t ut = abs_input_data & 0xFF;
result = (ua << 8) + ut * (ub - ua);
}
result = (input_data >= 0)
? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
: (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
// Convert back to 16-bit.
result >>= (9 - 1);
*ptr_output_data = result;
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_

View File

@@ -0,0 +1,90 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
#include <algorithm>
#include <cmath>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
const RuntimeShape& input_shape,
const float* input_data,
const RuntimeShape& output_shape,
float* output_data, float epsilon = 1e-6) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
float squared_l2_norm = 0;
for (int c = 0; c < depth; ++c) {
const float val = input_data[depth * i + c];
squared_l2_norm += val * val;
}
float l2_norm = std::sqrt(squared_l2_norm);
l2_norm = std::max(l2_norm, epsilon);
for (int c = 0; c < depth; ++c) {
output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
}
}
}
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
const RuntimeShape& input_shape,
const uint8_t* input_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int32_t input_zero_point = op_params.input_zero_point;
for (int i = 0; i < outer_size; ++i) {
int32_t square_l2_norm = 0;
for (int c = 0; c < depth; c++) {
int32_t diff = input_data[depth * i + c] - input_zero_point;
square_l2_norm += diff * diff;
}
int32_t inv_l2norm_multiplier;
int inv_l2norm_shift;
GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
&inv_l2norm_multiplier, &inv_l2norm_shift);
for (int c = 0; c < depth; c++) {
int32_t diff = input_data[depth * i + c] - input_zero_point;
int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
int32_t unclamped_output_val = 128 + rescaled_diff;
int32_t output_val =
std::min(static_cast<int32_t>(255),
std::max(static_cast<int32_t>(0), unclamped_output_val));
output_data[depth * i + c] = static_cast<uint8_t>(output_val);
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_

View File

@@ -0,0 +1,132 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
#include <cmath>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace reference_ops {
inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const float cutoff_upper = 16.619047164916992188f;
const float cutoff_lower = -9.f;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
// Rational for using approximation in reference kernel.
// 0. This approximation gives enough precision for float.
// 1. This works around an issue on an embedded chipset where exp() does not
// return correctly as expected - exp(x) should return inf when overflown
// not 1.701417 IEEE 754 defines representation for inf.
// 2. This will speed up calculation and is matching the behavior in the
// optimized kernels. (check the definition of scalar_logistic_op<float>)
for (int i = 0; i < flat_size; i++) {
float val = input_data[i];
float result;
if (val > cutoff_upper) {
result = 1.0f;
} else if (val < cutoff_lower) {
result = std::exp(val);
} else {
result = 1.f / (1.f + std::exp(-val));
}
output_data[i] = result;
}
}
// Convenience version that allows, for example, generated-code calls to be
// uniform between data types.
inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& output_shape,
float* output_data) {
// Drop params: not needed.
Logistic(input_shape, input_data, output_shape, output_data);
}
inline void Logistic(const LogisticParams& params,
const RuntimeShape& input_shape, const int16_t* input_data,
const RuntimeShape& output_shape, int16_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
// F0 uses 0 integer bits, range [-1, 1].
// This is the return type of math functions such as tanh, logistic,
// whose range is in [-1, 1].
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
// F3 uses 3 integer bits, range [-8, 8], the input range expected here.
using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
const F3 input = F3::FromRaw(input_data[i]);
F0 output = gemmlowp::logistic(input);
output_data[i] = output.raw();
}
}
// Quantized int8_t logistic activation. Cheats by dequantizing and
// requantizing around the floating point logistic method. This implementation
// is slow on platforms without a floating point unit.
// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
// approach used in TFLite for int8_t Logistic.
inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
float input_scale, int input_zero_point,
const RuntimeShape& output_shape, int8_t* output_data,
float output_scale, int output_zero_point) {
const float cutoff_upper = 16.619047164916992188f;
const float cutoff_lower = -9.f;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
// Rational for using approximation in reference kernel.
// 0. This approximation gives enough precision for float.
// 1. This works around an issue on an embedded chipset where exp() does not
// return correctly as expected - exp(x) should return inf when overflown
// not 1.701417 IEEE 754 defines representation for inf.
// 2. This will speed up calculation and is matching the behavior in the
// optimized kernels. (check the definition of scalar_logistic_op<float>)
for (int i = 0; i < flat_size; i++) {
// Dequantize.
float val =
static_cast<float>((input_data[i] - input_zero_point) * input_scale);
float result;
if (val > cutoff_upper) {
result = 1.0f;
} else if (val < cutoff_lower) {
result = std::exp(val);
} else {
result = 1.f / (1.f + std::exp(-val));
}
// Requantize
int8_t output =
static_cast<int8_t>(result / output_scale + output_zero_point);
output_data[i] = output;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_

View File

@@ -0,0 +1,64 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T, typename Op, int N = 5>
void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
const T* input1_data,
const RuntimeShape& unextended_input2_shape,
const T* input2_data,
const RuntimeShape& unextended_output_shape,
T* output_data, Op op) {
// Uses element-wise calculation if broadcast is not required.
if (unextended_input1_shape == unextended_input2_shape) {
const int flat_size =
MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
unextended_output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = op(input1_data[i], input2_data[i]);
}
} else {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(
unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
auto maxmin_func = [&](int indexes[N]) {
output_data[SubscriptToIndex(output_desc, indexes)] =
op(input1_data[SubscriptToIndex(desc1, indexes)],
input2_data[SubscriptToIndex(desc2, indexes)]);
};
NDOpsHelper<N>(output_desc, maxmin_func);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_

View File

@@ -0,0 +1,166 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
// Element-wise mul that can often be used for inner loop of broadcast Mul as
// well as the non-broadcast Mul.
inline void MulElementwise(int size, const ArithmeticParams& params,
const uint8_t* input1_data,
const uint8_t* input2_data, uint8_t* output_data) {
for (int i = 0; i < size; ++i) {
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
template <typename T>
inline void Mul(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
T output_activation_min;
T output_activation_max;
GetActivationParams(params, &output_activation_min, &output_activation_max);
const int flat_size =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] * input2_data[i], output_activation_min,
output_activation_max);
}
}
inline void Mul(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const uint8_t* input1_data,
const RuntimeShape& input2_shape, const uint8_t* input2_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
MulElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void BroadcastMul4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const uint8_t* input1_data,
const RuntimeShape& input2_shape,
const uint8_t* input2_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
const int32_t input1_val =
params.input1_offset +
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32_t input2_val =
params.input2_offset +
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32_t clamped_output = std::min(
params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[Offset(extended_output_shape, b, y, x, c)] =
static_cast<uint8_t>(clamped_output);
}
}
}
}
}
template <typename T>
void BroadcastMul4DSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const T* input1_data,
const RuntimeShape& unextended_input2_shape,
const T* input2_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
T output_activation_min;
T output_activation_max;
GetActivationParams(params, &output_activation_min, &output_activation_max);
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < output_shape.Dims(0); ++b) {
for (int y = 0; y < output_shape.Dims(1); ++y) {
for (int x = 0; x < output_shape.Dims(2); ++x) {
for (int c = 0; c < output_shape.Dims(3); ++c) {
output_data[Offset(output_shape, b, y, x, c)] =
ActivationFunctionWithMinMax(
input1_data[SubscriptToIndex(desc1, b, y, x, c)] *
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
output_activation_min, output_activation_max);
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_

View File

@@ -0,0 +1,37 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void Negate(const RuntimeShape& input_shape, const T* input_data,
const RuntimeShape& output_shape, T* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = -input_data[i];
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_

View File

@@ -0,0 +1,162 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
#include <vector>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// TFLite Pad supports activation tensors with up to 4 dimensions.
constexpr int PadKernelMaxDimensionCount() { return 4; }
// There are two versions of pad: Pad and PadV2. In PadV2 there is a second
// scalar input that provides the padding value. Therefore pad_value_ptr can be
// equivalent to a simple input1_data. For Pad, it should point to a zero
// value.
//
// Note that two typenames are required, so that T=P=int32_t is considered a
// specialization distinct from P=int32_t.
template <typename T, typename P>
inline void PadImpl(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const P* pad_value_ptr, const RuntimeShape& output_shape,
T* output_data) {
const RuntimeShape ext_input_shape =
RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
const RuntimeShape ext_output_shape =
RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
// Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
// pad them to 4 dims (yes, we are "padding the padding").
int left_padding_copy[PadKernelMaxDimensionCount()];
for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
left_padding_copy[i] = 0;
}
for (int i = 0; i < op_params.left_padding_count; ++i) {
left_padding_copy[i + PadKernelMaxDimensionCount() -
op_params.left_padding_count] = op_params.left_padding[i];
}
int right_padding_copy[PadKernelMaxDimensionCount()];
for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
right_padding_copy[i] = 0;
}
for (int i = 0; i < op_params.right_padding_count; ++i) {
right_padding_copy[i + PadKernelMaxDimensionCount() -
op_params.right_padding_count] =
op_params.right_padding[i];
}
const int output_batch = ext_output_shape.Dims(0);
const int output_height = ext_output_shape.Dims(1);
const int output_width = ext_output_shape.Dims(2);
const int output_depth = ext_output_shape.Dims(3);
const int left_b_padding = left_padding_copy[0];
const int left_h_padding = left_padding_copy[1];
const int left_w_padding = left_padding_copy[2];
const int left_d_padding = left_padding_copy[3];
const int right_b_padding = right_padding_copy[0];
const int right_h_padding = right_padding_copy[1];
const int right_w_padding = right_padding_copy[2];
const int right_d_padding = right_padding_copy[3];
const T pad_value = *pad_value_ptr;
const T* in_ptr = input_data;
T* out_ptr = output_data;
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_h = 0; out_h < output_height; ++out_h) {
for (int out_w = 0; out_w < output_width; ++out_w) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
if (out_b < left_b_padding ||
out_b >= output_batch - right_b_padding ||
out_h < left_h_padding ||
out_h >= output_height - right_h_padding ||
out_w < left_w_padding ||
out_w >= output_width - right_w_padding ||
out_d < left_d_padding ||
out_d >= output_depth - right_d_padding) {
*out_ptr++ = pad_value;
} else {
*out_ptr++ = *in_ptr++;
}
}
}
}
}
}
template <typename T, typename P>
inline void Pad(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const P* pad_value_ptr, const RuntimeShape& output_shape,
T* output_data) {
PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
template <typename T>
inline void Pad(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
T* output_data) {
const T converted_pad_value = static_cast<T>(*pad_value_ptr);
PadImpl(op_params, input_shape, input_data, &converted_pad_value,
output_shape, output_data);
}
// This version avoids conflicting template matching.
template <>
inline void Pad(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const int32_t* input_data,
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
int32_t* output_data) {
PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
template <typename T, typename P>
inline void PadImageStyle(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const P* pad_value_ptr,
const RuntimeShape& output_shape, T* output_data) {
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
template <typename P>
inline void PadImageStyle(const tflite::PadParams& op_params,
const RuntimeShape& input_shape,
const float* input_data, const P* pad_value_ptr,
const RuntimeShape& output_shape,
float* output_data) {
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_

View File

@@ -0,0 +1,297 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
float total = 0.f;
float filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
total +=
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
filter_count++;
}
}
const float average = total / filter_count;
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
ActivationFunctionWithMinMax(average, params.float_activation_min,
params.float_activation_max);
}
}
}
}
}
inline void AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const uint8_t* input_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int32_t acc = 0;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
acc +=
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
filter_count++;
}
}
acc = (acc + filter_count / 2) / filter_count;
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<uint8_t>(acc);
}
}
}
}
}
inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& output_shape,
float* output_data) {
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
float sum_squares = 0.f;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
const float val =
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
sum_squares += val * val;
filter_count++;
}
}
const float l2pool_result = std::sqrt(sum_squares / filter_count);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
ActivationFunctionWithMinMax(l2pool_result,
params.float_activation_min,
params.float_activation_max);
}
}
}
}
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& output_shape,
float* output_data) {
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
float max = std::numeric_limits<float>::lowest();
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
max = std::max(
max,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
}
}
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
ActivationFunctionWithMinMax(max, params.float_activation_min,
params.float_activation_max);
}
}
}
}
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
TFLITE_DCHECK_LE(params.quantized_activation_max, 255);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
uint8_t max = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
max = std::max(
max,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
}
}
max = std::max<uint8_t>(max, params.quantized_activation_min);
max = std::min<uint8_t>(max, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<uint8_t>(max);
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_

View File

@@ -0,0 +1,109 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// Broadcast prelu to output_shape for quantized uint8_t/int8_t data.
template <typename T>
inline void BroadcastPrelu4DSlow(
const PreluParams& params, const RuntimeShape& input_shape,
const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data,
const RuntimeShape& output_shape, T* output_data) {
TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(alpha_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input_shape, alpha_shape, &desc1, &desc2);
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
int output_index = Offset(extended_output_shape, b, y, x, c);
int input_index = SubscriptToIndex(desc1, b, y, x, c);
const int32_t input_value =
params.input_offset + input_data[input_index];
int32_t output_value;
if (input_value >= 0) {
output_value = MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_1, params.output_shift_1);
} else {
auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
const int32_t alpha_value =
params.alpha_offset + alpha_data[alpha_index];
output_value = MultiplyByQuantizedMultiplier(
input_value * alpha_value, params.output_multiplier_2,
params.output_shift_2);
}
output_value += params.output_offset;
const int32_t quantized_min = std::numeric_limits<T>::min();
const int32_t quantized_max = std::numeric_limits<T>::max();
const int32_t clamped_output =
std::min(quantized_max, std::max(quantized_min, output_value));
output_data[output_index] = static_cast<T>(clamped_output);
}
}
}
}
}
template <typename T>
inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape,
const T* input_data, const RuntimeShape& alpha_shape,
const T* alpha_data, const RuntimeShape& output_shape,
T* output_data) {
const int32_t quantized_min = std::numeric_limits<T>::min();
const int32_t quantized_max = std::numeric_limits<T>::max();
const int flat_size =
MatchingElementsSize(input_shape, alpha_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t input_value = params.input_offset + input_data[i];
int32_t output_value;
if (input_value >= 0) {
output_value = MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_1, params.output_shift_1);
} else {
const int32_t alpha_value = params.alpha_offset + alpha_data[i];
output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value,
params.output_multiplier_2,
params.output_shift_2);
}
output_value += params.output_offset;
const int32_t clamped_output =
std::min(quantized_max, std::max(quantized_min, output_value));
output_data[i] = static_cast<T>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_

View File

@@ -0,0 +1,138 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// Consolidates dimensions in broadcast inputs, checks for five-fold pattern.
//
// For example, if sequence of dimensions of one input is
// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ...
// we can consolidate these as
// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1.
//
// The category is updated in the less-frequent case of shapes that are
// not suited to a fivefold-loop broadcast.
//
// Falls back to generic pattern when it does not know how to process properly.
//
// Returns true iff there is some sort of broadcast, which includes five-fold
// patterns and falling back to generic broadcast.
inline bool ProcessBroadcastShapes(const RuntimeShape& shape0,
const RuntimeShape& shape1,
tflite::ArithmeticParams* params) {
const int dims_count =
std::max(shape0.DimensionsCount(), shape1.DimensionsCount());
params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
RuntimeShape scalar_shape(dims_count, 1);
auto extended_shape0 = RuntimeShape::ExtendedShape(dims_count, shape0);
auto extended_shape1 = RuntimeShape::ExtendedShape(dims_count, shape1);
// Check for "exact" match, implicitly accepting any scalar shapes.
if (extended_shape0 == extended_shape1) {
params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
return false;
}
for (int i = dims_count - 1; i >= 0; --i) {
if (extended_shape0.Dims(i) == extended_shape1.Dims(i)) {
continue;
} else if (extended_shape0.Dims(i) == 1) {
params->broadcast_category =
BroadcastableOpCategory::kFirstInputBroadcastsFast;
break;
} else if (extended_shape1.Dims(i) == 1) {
params->broadcast_category =
BroadcastableOpCategory::kSecondInputBroadcastsFast;
break;
} else {
// This case is erroneous: there is a dimension that does not match and
// is not a broadcast from one shape to the other.
params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
return true;
}
}
if (params->broadcast_category !=
BroadcastableOpCategory::kFirstInputBroadcastsFast &&
params->broadcast_category !=
BroadcastableOpCategory::kSecondInputBroadcastsFast) {
// This is unreachable because at least one else clause in the above loop
// must be reached.
TFLITE_DCHECK(false);
params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
return false;
}
// From this point it is assumed contractually that corresponding dimensions
// in shape0 and shape1 are either (a) equal or (b) one or other equals 1.
const bool swap_inputs = params->broadcast_category ==
BroadcastableOpCategory::kSecondInputBroadcastsFast;
const RuntimeShape* shape_a =
swap_inputs ? &extended_shape1 : &extended_shape0;
const RuntimeShape* shape_b =
swap_inputs ? &extended_shape0 : &extended_shape1;
int i = dims_count - 1;
params->broadcast_shape[0] = 1;
params->broadcast_shape[1] = 1;
params->broadcast_shape[2] = 1;
params->broadcast_shape[3] = 1;
params->broadcast_shape[4] = 1;
// y_0 is greedy: include dims if both or neither equal 1: in other words,
// test for equality rather than (shape_a->Dims(i) != 1).
while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
params->broadcast_shape[4] *= shape_b->Dims(i);
--i;
}
// Here either input_a or input_b has dim of 1 (if i >= 0). If it is input_b
// that has the unit dimension, the next two loops are not entered.
while (i >= 0 && shape_a->Dims(i) == 1) {
params->broadcast_shape[3] *= shape_b->Dims(i);
--i;
}
while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
params->broadcast_shape[2] *= shape_a->Dims(i);
--i;
}
// Here either input_a or input_b has dim of 1 (if i >= 0).
while (i >= 0 && shape_b->Dims(i) == 1) {
params->broadcast_shape[1] *= shape_a->Dims(i);
--i;
}
while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
params->broadcast_shape[0] *= shape_b->Dims(i);
--i;
}
// Rarer case is when the broadcast dimensions cannot be handled by a fivefold
// loop.
if (i >= 0) {
params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
}
return true;
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_

View File

@@ -0,0 +1,55 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename InputT, typename OutputT>
inline void AffineQuantize(const tflite::QuantizationParams& op_params,
const RuntimeShape& input_shape,
const InputT* input_data,
const RuntimeShape& output_shape,
OutputT* output_data) {
const int32_t zero_point = op_params.zero_point;
const double scale = op_params.scale;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
for (int i = 0; i < flat_size; i++) {
const InputT val = input_data[i];
int32_t unclamped =
static_cast<int32_t>(TfLiteRound(val / static_cast<float>(scale))) +
zero_point;
int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
output_data[i] = clamped;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_

View File

@@ -0,0 +1,405 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/max.h"
#include "tensorflow/lite/kernels/internal/min.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
// This method iterates through input data and reduce elements along the
// dimensions given in axis.
template <typename In, typename Out>
inline bool Reduce(const In* input_data, const int* input_dims,
const int* output_dims, const int input_num_dims,
const int output_num_dims, const int* axis,
const int num_axis, int* input_iter,
Out reducer(const Out current, const In in),
Out* output_data) {
// Reset input iterator.
for (int idx = 0; idx < input_num_dims; ++idx) {
input_iter[idx] = 0;
}
// Iterate through input_data.
do {
size_t input_offset =
ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
input_iter, num_axis, axis);
output_data[output_offset] =
reducer(output_data[output_offset], input_data[input_offset]);
} while (NextIndex(input_num_dims, input_dims, input_iter));
return true;
}
// This method parses the input 'axis' to remove duplicates and handle negative
// values, and returns a valid 'out_axis'
inline bool ResolveAxis(const int num_dims, const int* axis,
const int64_t num_axis, int* out_axis,
int* out_num_axis) {
*out_num_axis = 0; // Just in case.
// Short-circuit axis resolution for scalars; the axis will go unused.
if (num_dims == 0) {
return true;
}
// o(n^2) is fine since out_num_axis should be really small, mostly <= 4
for (int64_t idx = 0; idx < num_axis; ++idx) {
// Handle negative index. A positive index 'p_idx' can be represented as a
// negative index 'n_idx' as: n_idx = p_idx-num_dims
// eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1] */
int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
TFLITE_DCHECK(current >= 0 && current < num_dims);
bool is_dup = false;
for (int j = 0; j < *out_num_axis; ++j) {
if (out_axis[j] == current) {
is_dup = true;
break;
}
}
if (!is_dup) {
out_axis[*out_num_axis] = current;
*out_num_axis += 1;
}
}
return true;
}
// This method expects that output_data has been initialized.
template <typename In, typename Out>
inline bool ReduceSumImpl(const In* input_data, const int* input_dims,
const int* output_dims, const int input_num_dims,
const int output_num_dims, const int* axis,
const int num_axis, int* input_iter,
Out* output_data) {
auto reducer = [](const Out current, const In in) -> Out {
const Out actual_in = static_cast<Out>(in);
return current + actual_in;
};
return Reduce<In, Out>(input_data, input_dims, output_dims, input_num_dims,
output_num_dims, axis, num_axis, input_iter, reducer,
output_data);
}
template <typename T>
inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
const T init_value, T* data) {
size_t num_elements = 1;
for (int idx = 0; idx < num_dims; ++idx) {
size_t current = static_cast<size_t>(dims[idx]);
// Overflow prevention.
if (num_elements > std::numeric_limits<size_t>::max() / current) {
return false;
}
num_elements *= current;
}
for (size_t idx = 0; idx < num_elements; ++idx) {
data[idx] = init_value;
}
return true;
}
// Computes the generic value (i.e., sum/max/min/prod) of elements across
// dimensions given in axis. It needs to pass in init_value and reducer.
template <typename T>
inline bool ReduceGeneric(const T* input_data, const int* input_dims,
const int input_num_dims, T* output_data,
const int* output_dims, const int output_num_dims,
const int* axis, const int64_t num_axis_dimensions,
bool keep_dims, int* temp_index, int* resolved_axis,
T init_value,
T reducer(const T current, const T in)) {
// Reset output data.
if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
output_data)) {
return false;
}
// Resolve axis.
int num_resolved_axis = 0;
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
&num_resolved_axis)) {
return false;
}
return Reduce<T, T>(input_data, input_dims, output_dims, input_num_dims,
output_num_dims, resolved_axis, num_resolved_axis,
temp_index, reducer, output_data);
}
// Computes the mean of elements across dimensions given in axis.
// It does so in two stages, first calculates the sum of elements along the axis
// then divides it by the number of element in axis.
template <typename T, typename U>
inline bool Mean(const T* input_data, const int* input_dims,
const int input_num_dims, T* output_data,
const int* output_dims, const int output_num_dims,
const int* axis, const int num_axis_dimensions, bool keep_dims,
int* temp_index, int* resolved_axis, U* temp_sum) {
ruy::profiler::ScopeLabel label("Mean");
// Reset output data.
size_t num_outputs = 1;
for (int idx = 0; idx < output_num_dims; ++idx) {
size_t current = static_cast<size_t>(output_dims[idx]);
// Overflow prevention.
if (num_outputs > std::numeric_limits<size_t>::max() / current) {
return false;
}
num_outputs *= current;
}
for (size_t idx = 0; idx < num_outputs; ++idx) {
output_data[idx] = T();
temp_sum[idx] = U();
}
// Resolve axis.
int num_resolved_axis = 0;
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
&num_resolved_axis)) {
return false;
}
if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
output_num_dims, resolved_axis, num_resolved_axis,
temp_index, temp_sum)) {
return false;
}
// Calculate mean by dividing output_data by num of aggregated element.
U num_elements_in_axis = 1;
for (int idx = 0; idx < num_resolved_axis; ++idx) {
size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
// Overflow prevention.
if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
return false;
}
num_elements_in_axis *= current;
}
if (num_elements_in_axis > 0) {
for (size_t idx = 0; idx < num_outputs; ++idx) {
output_data[idx] =
static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
}
}
return true;
}
template <typename T>
inline void Mean(const tflite::MeanParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("Mean4D");
// Current implementation only supports dimension equals 4 and simultaneous
// reduction over width and height.
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int output_batch = output_shape.Dims(0);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int output_depth = output_shape.Dims(3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
TFLITE_CHECK_EQ(op_params.axis_count, 2);
TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
TFLITE_CHECK_EQ(output_height, 1);
TFLITE_CHECK_EQ(output_width, 1);
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
float value = 0;
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
}
}
output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
value / (input_width * input_height);
}
}
}
inline void Mean(const tflite::MeanParams& op_params,
const RuntimeShape& unextended_input_shape,
const uint8_t* input_data, int32_t input_zero_point,
float input_scale, const RuntimeShape& unextended_output_shape,
uint8_t* output_data, int32_t output_zero_point,
float output_scale) {
ruy::profiler::ScopeLabel label("Mean4D/Uint8");
// Current implementation only supports dimension equals 4 and simultaneous
// reduction over width and height.
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int output_batch = output_shape.Dims(0);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int output_depth = output_shape.Dims(3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const float num_elements_in_axis = input_width * input_height;
TFLITE_CHECK_EQ(op_params.axis_count, 2);
TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
TFLITE_CHECK_EQ(output_height, 1);
TFLITE_CHECK_EQ(output_width, 1);
constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
int32_t bias =
output_zero_point -
static_cast<int32_t>(input_zero_point * input_scale / output_scale);
double real_scale =
static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
int32_t multiplier;
int shift;
QuantizeMultiplier(real_scale, &multiplier, &shift);
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
int32_t acc = 0;
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
}
}
acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
acc += bias;
acc = std::min(std::max(acc, kMinValue), kMaxValue);
output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
static_cast<uint8_t>(acc);
}
}
}
// Computes the mean of elements across dimensions given in axis.
// It does so in two stages, first calculates the sum of elements along the axis
// then divides it by the number of element in axis for quantized values.
template <typename T, typename U>
inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
float input_scale, const int* input_dims,
const int input_num_dims, T* output_data,
int32_t output_zero_point, float output_scale,
const int* output_dims,
const int output_num_dims, const int* axis,
const int num_axis_dimensions, bool keep_dims,
int* temp_index, int* resolved_axis, U* temp_sum,
bool compute_sum) {
const bool uint8_case = std::is_same<T, uint8_t>::value;
const bool int16_case = std::is_same<T, int16_t>::value;
if (uint8_case) {
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8");
} else if (int16_case) {
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16");
} else {
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8");
}
// Reset output data.
size_t num_outputs = 1;
for (int idx = 0; idx < output_num_dims; ++idx) {
size_t current = static_cast<size_t>(output_dims[idx]);
// Overflow prevention.
if (num_outputs > std::numeric_limits<size_t>::max() / current) {
return false;
}
num_outputs *= current;
}
for (size_t idx = 0; idx < num_outputs; ++idx) {
output_data[idx] = T();
temp_sum[idx] = U();
}
// Resolve axis.
int num_resolved_axis = 0;
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
&num_resolved_axis)) {
return false;
}
if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
output_num_dims, resolved_axis, num_resolved_axis,
temp_index, temp_sum)) {
return false;
}
// Calculate mean by dividing output_data by num of aggregated element.
U num_elements_in_axis = 1;
for (int idx = 0; idx < num_resolved_axis; ++idx) {
size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
// Overflow prevention.
if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
return false;
}
num_elements_in_axis *= current;
}
if (num_elements_in_axis > 0) {
const float scale = input_scale / output_scale;
if (compute_sum) {
// TODO(b/116341117): Eliminate float and do this completely in 8bit.
const float bias =
-input_zero_point * scale * num_elements_in_axis + 0.5f;
for (size_t idx = 0; idx < num_outputs; ++idx) {
const U value =
static_cast<U>(TfLiteRound(temp_sum[idx] * scale + bias)) +
output_zero_point;
output_data[idx] = static_cast<T>(value);
}
} else {
const float bias = -input_zero_point * scale + 0.5f;
for (size_t idx = 0; idx < num_outputs; ++idx) {
float float_mean = static_cast<float>(temp_sum[idx]) /
static_cast<float>(num_elements_in_axis);
float result = TfLiteMin(
TfLiteRound(float_mean * scale + bias) + output_zero_point,
static_cast<float>(std::numeric_limits<T>::max()));
result = TfLiteMax(result,
static_cast<float>(std::numeric_limits<T>::min()));
output_data[idx] = static_cast<T>(result);
}
}
}
return true;
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_

View File

@@ -0,0 +1,67 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename input_type, typename output_type>
inline void Requantize(const input_type* input_data, int32_t size,
int32_t effective_scale_multiplier,
int32_t effective_scale_shift, int32_t input_zeropoint,
int32_t output_zeropoint, output_type* output_data) {
ruy::profiler::ScopeLabel label("Requantize");
const bool same_scale =
(effective_scale_multiplier == 1 << 30 && effective_scale_shift == 1);
if (same_scale) {
const bool mixed_type_int8_uint8 =
std::is_same<input_type, int8_t>::value &&
std::is_same<output_type, uint8_t>::value;
const bool mixed_type_uint8_int8 =
std::is_same<input_type, uint8_t>::value &&
std::is_same<output_type, int8_t>::value;
const int32_t zero_point_diff = input_zeropoint - output_zeropoint;
// Fast path to do requantization for the case when just a shift of 128 is
// needed.
if ((mixed_type_int8_uint8 && zero_point_diff == -128) ||
(mixed_type_uint8_int8 && zero_point_diff == 128)) {
for (int i = 0; i < size; ++i) {
output_data[i] = input_data[i] ^ 0x80;
}
}
}
static constexpr int32_t kMinOutput = std::numeric_limits<output_type>::min();
static constexpr int32_t kMaxOutput = std::numeric_limits<output_type>::max();
for (int i = 0; i < size; ++i) {
const int32_t input = input_data[i] - input_zeropoint;
const int32_t output =
MultiplyByQuantizedMultiplier(input, effective_scale_multiplier,
effective_scale_shift) +
output_zeropoint;
const int32_t clamped_output =
std::max(std::min(output, kMaxOutput), kMinOutput);
output_data[i] = static_cast<output_type>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_

Some files were not shown because too many files have changed in this diff Show More