add nnom pack and example

This commit is contained in:
dkk0918
2021-09-08 23:47:15 +08:00
parent b06e1f1386
commit fa4395d3f8
121 changed files with 53527 additions and 9093 deletions

View File

@@ -0,0 +1,77 @@
// File: STM32L4x5_4x6.dbgconf
// Version: 1.0.0
// Note: refer to STM32L4x5 and STM32L4x6 Reference manual (RM0351)
// refer to STM32L475xx STM32L476xx STM32L486xx STM32L496xx STM32L4A6xx datasheets
// <<< Use Configuration Wizard in Context Menu >>>
// <h> Debug MCU configuration register (DBGMCU_CR)
// <o.2> DBG_STANDBY <i> Debug Standby mode
// <o.1> DBG_STOP <i> Debug Stop mode
// <o.0> DBG_SLEEP <i> Debug Sleep mode
// </h>
DbgMCU_CR = 0x00000007;
// <h> Debug MCU APB1 freeze register1 (DBGMCU_APB1FZR1)
// <i> Reserved bits must be kept at reset value
// <o.31> DBG_LPTIM1_STOP <i> LPTIM1 counter stopped when core is halted
// <o.26> DBG_CAN2_STOP <i> bxCAN2 stopped when core is halted
// <o.25> DBG_CAN1_STOP <i> bxCAN1 stopped when core is halted
// <o.23> DBG_I2C3_STOP <i> I2C3 SMBUS timeout counter stopped when core is halted
// <o.22> DBG_I2C2_STOP <i> I2C2 SMBUS timeout counter stopped when core is halted
// <o.21> DBG_I2C1_STOP <i> I2C1 SMBUS timeout counter stopped when core is halted
// <o.12> DBG_IWDG_STOP <i> Independent watchdog counter stopped when core is halted
// <o.11> DBG_WWDG_STOP <i> Window watchdog counter stopped when core is halted
// <o.10> DBG_RTC_STOP <i> RTC counter stopped when core is halted
// <o.5> DBG_TIM7_STOP <i> TIM7 counter stopped when core is halted
// <o.4> DBG_TIM6_STOP <i> TIM6 counter stopped when core is halted
// <o.3> DBG_TIM5_STOP <i> TIM5 counter stopped when core is halted
// <o.2> DBG_TIM4_STOP <i> TIM4 counter stopped when core is halted
// <o.1> DBG_TIM3_STOP <i> TIM3 counter stopped when core is halted
// <o.0> DBG_TIM2_STOP <i> TIM2 counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz1 = 0x00000000;
// <h> Debug MCU APB1 freeze register 2 (DBGMCU_APB1FZR2)
// <i> Reserved bits must be kept at reset value
// <o.5> DBG_LPTIM2_STOP <i> LPTIM2 counter stopped when core is halted
// <o.1> DBG_I2C4_STOP <i> I2C4 SMBUS timeout counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz2 = 0x00000000;
// <h> Debug MCU APB2 freeze register (DBGMCU_APB2FZR)
// <i> Reserved bits must be kept at reset value
// <o.18> DBG_TIM17_STOP <i> TIM17 counter stopped when core is halted
// <o.17> DBG_TIM16_STOP <i> TIM16 counter stopped when core is halted
// <o.16> DBG_TIM15_STOP <i> TIM15 counter stopped when core is halted
// <o.13> DBG_TIM8_STOP <i> TIM8 counter stopped when core is halted
// <o.11> DBG_TIM1_STOP <i> TIM1 counter stopped when core is halted
// </h>
DbgMCU_APB2_Fz = 0x00000000;
// <h> TPIU Pin Routing (TRACECLK fixed on Pin PE2)
// <i> TRACECLK: Pin PE2
// <o1> TRACED0
// <i> ETM Trace Data 0
// <0x00040003=> Pin PE3
// <0x00020001=> Pin PC1
// <o2> TRACED1
// <i> ETM Trace Data 1
// <0x00040004=> Pin PE4
// <0x0002000A=> Pin PC10
// <o3> TRACED2
// <i> ETM Trace Data 2
// <0x00040005=> Pin PE5
// <0x00030002=> Pin PD2
// <o4> TRACED3
// <i> ETM Trace Data 3
// <0x00040006=> Pin PE6
// <0x0002000C=> Pin PC12
// </h>
TraceClk_Pin = 0x00040002;
TraceD0_Pin = 0x00040003;
TraceD1_Pin = 0x00040004;
TraceD2_Pin = 0x00040005;
TraceD3_Pin = 0x00040006;
// <<< end of configuration section >>>

View File

@@ -0,0 +1,21 @@
/*
* Auto generated Run-Time-Environment Configuration File
* *** Do not modify ! ***
*
* Project: 'TencentOS_tiny'
* Target: 'TencentOS_tiny'
*/
#ifndef RTE_COMPONENTS_H
#define RTE_COMPONENTS_H
/*
* Define the Device Header File:
*/
#define CMSIS_device_header "stm32l4xx.h"
#endif /* RTE_COMPONENTS_H */

File diff suppressed because one or more lines are too long

View File

@@ -12,7 +12,7 @@
<lExt>*.lib</lExt>
<tExt>*.txt; *.h; *.inc</tExt>
<pExt>*.plm</pExt>
<CppX>*.cpp</CppX>
<CppX>*.cpp;*.cc</CppX>
<nMigrate>0</nMigrate>
</Extensions>
@@ -26,7 +26,7 @@
<ToolsetNumber>0x4</ToolsetNumber>
<ToolsetName>ARM-ADS</ToolsetName>
<TargetOption>
<CLKADS>80000000</CLKADS>
<CLKADS>8000000</CLKADS>
<OPTTT>
<gFlags>1</gFlags>
<BeepAtEnd>1</BeepAtEnd>
@@ -93,7 +93,7 @@
<tRbreak>1</tRbreak>
<tRwatch>1</tRwatch>
<tRmem>1</tRmem>
<tRfunc>1</tRfunc>
<tRfunc>0</tRfunc>
<tRbox>1</tRbox>
<tRtrace>1</tRtrace>
<sRSysVw>1</sRSysVw>
@@ -119,72 +119,28 @@
<TargetDriverDllRegistry>
<SetRegEntry>
<Number>0</Number>
<Key>ARMRTXEVENTFLAGS</Key>
<Name>-L70 -Z18 -C0 -M0 -T1</Name>
</SetRegEntry>
<SetRegEntry>
<Number>0</Number>
<Key>DLGTARM</Key>
<Name>(1010=-1,-1,-1,-1,0)(1007=-1,-1,-1,-1,0)(1008=-1,-1,-1,-1,0)(1009=-1,-1,-1,-1,0)(1012=-1,-1,-1,-1,0)</Name>
</SetRegEntry>
<SetRegEntry>
<Number>0</Number>
<Key>ARMDBGFLAGS</Key>
<Name></Name>
</SetRegEntry>
<SetRegEntry>
<Number>0</Number>
<Key>DLGUARM</Key>
<Name>(105=-1,-1,-1,-1,0)</Name>
<Key>ST-LINKIII-KEIL_SWO</Key>
<Name>-U0667FF343339415043053749 -O206 -SF10000 -C0 -A0 -I0 -HNlocalhost -HP7184 -P1 -N00("ARM CoreSight SW-DP (ARM Core") -D00(2BA01477) -L00(0) -TO131090 -TC10000000 -TT10000000 -TP21 -TDS8007 -TDT0 -TDC1F -TIEFFFFFFFF -TIP8 -FO15 -FD20000000 -FC1000 -FN1 -FF0STM32L4xx_1024.FLM -FS08000000 -FL0100000 -FP0($$Device:STM32L496ZGTx$CMSIS\Flash\STM32L4xx_1024.FLM)</Name>
</SetRegEntry>
<SetRegEntry>
<Number>0</Number>
<Key>UL2CM3</Key>
<Name>UL2CM3(-S0 -C0 -P0 -FD20000000 -FC1000 -FN1 -FF0STM32L4xx_1024 -FS08000000 -FL0100000 -FP0($$Device:STM32L496ZGTx$CMSIS\Flash\STM32L4xx_1024.FLM))</Name>
</SetRegEntry>
<SetRegEntry>
<Number>0</Number>
<Key>ST-LINKIII-KEIL_SWO</Key>
<Name>-U0674FF525750877267153432 -O2254 -SF10000 -C0 -A0 -I0 -HNlocalhost -HP7184 -P1 -N00("ARM CoreSight SW-DP") -D00(2BA01477) -L00(0) -TO18 -TC10000000 -TP21 -TDS8007 -TDT0 -TDC1F -TIEFFFFFFFF -TIP8 -FO15 -FD20000000 -FC1000 -FN1 -FF0STM32L4xx_1024.FLM -FS08000000 -FL0100000 -FP0($$Device:STM32L496ZGTx$CMSIS\Flash\STM32L4xx_1024.FLM)</Name>
</SetRegEntry>
</TargetDriverDllRegistry>
<Breakpoint>
<Bp>
<Number>0</Number>
<Type>0</Type>
<LineNumber>573</LineNumber>
<EnabledFlag>1</EnabledFlag>
<Address>134226432</Address>
<ByteObject>0</ByteObject>
<HtxType>0</HtxType>
<ManyObjects>0</ManyObjects>
<SizeOfObject>0</SizeOfObject>
<BreakByAccess>0</BreakByAccess>
<BreakIfRCount>1</BreakIfRCount>
<Filename>..\..\..\..\kernel\core\tos_mmheap.c</Filename>
<ExecCommand></ExecCommand>
<Expression>\\TencentOS_tiny\../../../../kernel/core/tos_mmheap.c\573</Expression>
</Bp>
</Breakpoint>
<WatchWindow1>
<Ww>
<count>0</count>
<WinNumber>1</WinNumber>
<ItemText>k_mmheap_default_pool</ItemText>
</Ww>
</WatchWindow1>
<Breakpoint/>
<Tracepoint>
<THDelay>0</THDelay>
</Tracepoint>
<DebugFlag>
<trace>0</trace>
<periodic>1</periodic>
<aLwin>1</aLwin>
<periodic>0</periodic>
<aLwin>0</aLwin>
<aCover>0</aCover>
<aSer1>0</aSer1>
<aSer2>0</aSer2>
<aPa>0</aPa>
<viewmode>1</viewmode>
<viewmode>0</viewmode>
<vrSel>0</vrSel>
<aSym>0</aSym>
<aTbox>0</aTbox>
@@ -247,7 +203,7 @@
<Group>
<GroupName>Application/User</GroupName>
<tvExp>1</tvExp>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<cbSel>0</cbSel>
<RteFlg>0</RteFlg>
@@ -258,8 +214,8 @@
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Src\gpio.c</PathWithFileName>
<FilenameWithoutPath>gpio.c</FilenameWithoutPath>
<PathWithFileName>..\..\BSP\Src\main.c</PathWithFileName>
<FilenameWithoutPath>main.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
@@ -270,8 +226,8 @@
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Src\main.c</PathWithFileName>
<FilenameWithoutPath>main.c</FilenameWithoutPath>
<PathWithFileName>..\..\BSP\Src\gpio.c</PathWithFileName>
<FilenameWithoutPath>gpio.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
@@ -335,6 +291,66 @@
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>2</GroupNumber>
<FileNumber>9</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Src\dcmi.c</PathWithFileName>
<FilenameWithoutPath>dcmi.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>2</GroupNumber>
<FileNumber>10</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Src\dma.c</PathWithFileName>
<FilenameWithoutPath>dma.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>2</GroupNumber>
<FileNumber>11</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Src\i2c.c</PathWithFileName>
<FilenameWithoutPath>i2c.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>2</GroupNumber>
<FileNumber>12</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Src\spi.c</PathWithFileName>
<FilenameWithoutPath>spi.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>2</GroupNumber>
<FileNumber>13</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Src\tim.c</PathWithFileName>
<FilenameWithoutPath>tim.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
</Group>
<Group>
@@ -345,7 +361,7 @@
<RteFlg>0</RteFlg>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>9</FileNumber>
<FileNumber>14</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -357,7 +373,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>10</FileNumber>
<FileNumber>15</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -369,7 +385,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>11</FileNumber>
<FileNumber>16</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -381,7 +397,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>12</FileNumber>
<FileNumber>17</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -393,7 +409,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>13</FileNumber>
<FileNumber>18</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -405,7 +421,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>14</FileNumber>
<FileNumber>19</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -417,7 +433,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>15</FileNumber>
<FileNumber>20</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -429,7 +445,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>16</FileNumber>
<FileNumber>21</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -441,7 +457,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>17</FileNumber>
<FileNumber>22</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -453,7 +469,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>18</FileNumber>
<FileNumber>23</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -465,7 +481,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>19</FileNumber>
<FileNumber>24</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -477,7 +493,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>20</FileNumber>
<FileNumber>25</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -489,7 +505,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>21</FileNumber>
<FileNumber>26</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -501,7 +517,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>22</FileNumber>
<FileNumber>27</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -513,7 +529,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>23</FileNumber>
<FileNumber>28</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -525,7 +541,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>24</FileNumber>
<FileNumber>29</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -537,7 +553,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>25</FileNumber>
<FileNumber>30</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -549,7 +565,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>26</FileNumber>
<FileNumber>31</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -561,7 +577,7 @@
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>27</FileNumber>
<FileNumber>32</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -571,6 +587,42 @@
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>33</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_dcmi.c</PathWithFileName>
<FilenameWithoutPath>stm32l4xx_hal_dcmi.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>34</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_spi.c</PathWithFileName>
<FilenameWithoutPath>stm32l4xx_hal_spi.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>3</GroupNumber>
<FileNumber>35</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_spi_ex.c</PathWithFileName>
<FilenameWithoutPath>stm32l4xx_hal_spi_ex.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
</Group>
<Group>
@@ -581,7 +633,7 @@
<RteFlg>0</RteFlg>
<File>
<GroupNumber>4</GroupNumber>
<FileNumber>28</FileNumber>
<FileNumber>36</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -601,7 +653,7 @@
<RteFlg>0</RteFlg>
<File>
<GroupNumber>5</GroupNumber>
<FileNumber>29</FileNumber>
<FileNumber>37</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -613,7 +665,7 @@
</File>
<File>
<GroupNumber>5</GroupNumber>
<FileNumber>30</FileNumber>
<FileNumber>38</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -625,7 +677,7 @@
</File>
<File>
<GroupNumber>5</GroupNumber>
<FileNumber>31</FileNumber>
<FileNumber>39</FileNumber>
<FileType>2</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -645,7 +697,7 @@
<RteFlg>0</RteFlg>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>32</FileNumber>
<FileNumber>40</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -657,7 +709,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>33</FileNumber>
<FileNumber>41</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -669,7 +721,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>34</FileNumber>
<FileNumber>42</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -681,7 +733,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>35</FileNumber>
<FileNumber>43</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -693,7 +745,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>36</FileNumber>
<FileNumber>44</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -705,7 +757,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>37</FileNumber>
<FileNumber>45</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -717,7 +769,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>38</FileNumber>
<FileNumber>46</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -729,7 +781,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>39</FileNumber>
<FileNumber>47</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -741,7 +793,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>40</FileNumber>
<FileNumber>48</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -753,7 +805,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>41</FileNumber>
<FileNumber>49</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -765,7 +817,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>42</FileNumber>
<FileNumber>50</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -777,7 +829,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>43</FileNumber>
<FileNumber>51</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -789,7 +841,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>44</FileNumber>
<FileNumber>52</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -801,7 +853,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>45</FileNumber>
<FileNumber>53</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -813,7 +865,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>46</FileNumber>
<FileNumber>54</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -825,7 +877,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>47</FileNumber>
<FileNumber>55</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -837,7 +889,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>48</FileNumber>
<FileNumber>56</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -849,7 +901,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>49</FileNumber>
<FileNumber>57</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -861,7 +913,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>50</FileNumber>
<FileNumber>58</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -873,7 +925,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>51</FileNumber>
<FileNumber>59</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -885,7 +937,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>52</FileNumber>
<FileNumber>60</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -897,7 +949,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>53</FileNumber>
<FileNumber>61</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -909,7 +961,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>54</FileNumber>
<FileNumber>62</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -921,7 +973,7 @@
</File>
<File>
<GroupNumber>6</GroupNumber>
<FileNumber>55</FileNumber>
<FileNumber>63</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -941,7 +993,7 @@
<RteFlg>0</RteFlg>
<File>
<GroupNumber>7</GroupNumber>
<FileNumber>56</FileNumber>
<FileNumber>64</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
@@ -954,14 +1006,82 @@
</Group>
<Group>
<GroupName>examples</GroupName>
<GroupName>hal</GroupName>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<cbSel>0</cbSel>
<RteFlg>0</RteFlg>
<File>
<GroupNumber>8</GroupNumber>
<FileNumber>57</FileNumber>
<FileNumber>65</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Hardware\Src\delay.c</PathWithFileName>
<FilenameWithoutPath>delay.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>8</GroupNumber>
<FileNumber>66</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Hardware\Src\lcd_2inch4.c</PathWithFileName>
<FilenameWithoutPath>lcd_2inch4.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>8</GroupNumber>
<FileNumber>67</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Hardware\Src\lcd_config.c</PathWithFileName>
<FilenameWithoutPath>lcd_config.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>8</GroupNumber>
<FileNumber>68</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Hardware\Src\ov2640.c</PathWithFileName>
<FilenameWithoutPath>ov2640.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
<File>
<GroupNumber>8</GroupNumber>
<FileNumber>69</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<bDave2>0</bDave2>
<PathWithFileName>..\..\BSP\Hardware\Src\sccb.c</PathWithFileName>
<FilenameWithoutPath>sccb.c</FilenameWithoutPath>
<RteFlg>0</RteFlg>
<bShared>0</bShared>
</File>
</Group>
<Group>
<GroupName>examples</GroupName>
<tvExp>1</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<cbSel>0</cbSel>
<RteFlg>0</RteFlg>
<File>
<GroupNumber>9</GroupNumber>
<FileNumber>70</FileNumber>
<FileType>1</FileType>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>

View File

@@ -10,14 +10,14 @@
<TargetName>TencentOS_tiny</TargetName>
<ToolsetNumber>0x4</ToolsetNumber>
<ToolsetName>ARM-ADS</ToolsetName>
<pCCUsed>5060750::V5.06 update 6 (build 750)::.\ARMCC</pCCUsed>
<uAC6>0</uAC6>
<pCCUsed>6140000::V6.14::ARMCLANG</pCCUsed>
<uAC6>1</uAC6>
<TargetOption>
<TargetCommonOption>
<Device>STM32L496ZGTx</Device>
<Vendor>STMicroelectronics</Vendor>
<PackID>Keil.STM32L4xx_DFP.2.4.0</PackID>
<PackURL>http://www.keil.com/pack/</PackURL>
<PackID>Keil.STM32L4xx_DFP.2.5.0</PackID>
<PackURL>https://www.keil.com/pack/</PackURL>
<Cpu>IRAM(0x20000000-0x2004FFFF) IROM(0x8000000-0x80FFFFF) CLOCK(8000000) FPU2 CPUTYPE("Cortex-M4")</Cpu>
<FlashUtilSpec></FlashUtilSpec>
<StartupFile></StartupFile>
@@ -54,7 +54,7 @@
<CreateLib>0</CreateLib>
<CreateHexFile>1</CreateHexFile>
<DebugInformation>1</DebugInformation>
<BrowseInformation>0</BrowseInformation>
<BrowseInformation>1</BrowseInformation>
<ListingPath></ListingPath>
<HexFormatSelection>1</HexFormatSelection>
<Merge32K>0</Merge32K>
@@ -137,7 +137,7 @@
<DriverSelection>4107</DriverSelection>
</Flash1>
<bUseTDR>1</bUseTDR>
<Flash2>STLink\ST-LINKIII-KEIL_SWO.dll</Flash2>
<Flash2>BIN\UL2CM3.DLL</Flash2>
<Flash3></Flash3>
<Flash4></Flash4>
<pFcarmOut></pFcarmOut>
@@ -322,14 +322,14 @@
<PlainCh>0</PlainCh>
<Ropi>0</Ropi>
<Rwpi>0</Rwpi>
<wLevel>2</wLevel>
<wLevel>3</wLevel>
<uThumb>0</uThumb>
<uSurpInc>0</uSurpInc>
<uC99>1</uC99>
<uGnu>0</uGnu>
<useXO>0</useXO>
<v6Lang>1</v6Lang>
<v6LangP>1</v6LangP>
<v6Lang>3</v6Lang>
<v6LangP>3</v6LangP>
<vShortEn>1</vShortEn>
<vShortWch>1</vShortWch>
<v6Lto>0</v6Lto>
@@ -337,9 +337,9 @@
<v6Rtti>0</v6Rtti>
<VariousControls>
<MiscControls></MiscControls>
<Define>USE_HAL_DRIVER,STM32L496xx</Define>
<Define>USE_HAL_DRIVER,STM32L496xx,NUCLEO_STM32L496ZG</Define>
<Undefine></Undefine>
<IncludePath>..\..\BSP\Inc;..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Inc;..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Inc\Legacy;..\..\..\..\platform\vendor_bsp\st\CMSIS\Device\ST\STM32L4xx\Include;..\..\..\..\platform\vendor_bsp\st\CMSIS\Include;..\..\..\..\arch\arm\arm-v7m\common\include;..\..\..\..\arch\arm\arm-v7m\cortex-m4\armcc;..\..\..\..\kernel\core\include;..\..\..\..\kernel\pm\include;..\..\..\..\osal\cmsis_os;..\..\..\..\examples\hello_world;..\..\TOS_CONFIG;..\..\..\..\net\at\include;..\..\..\..\kernel\hal\include</IncludePath>
<IncludePath>..\..\BSP\Inc;..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Inc;..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Inc\Legacy;..\..\..\..\platform\vendor_bsp\st\CMSIS\Device\ST\STM32L4xx\Include;..\..\..\..\platform\vendor_bsp\st\CMSIS\Include;..\..\..\..\arch\arm\arm-v7m\common\include;..\..\..\..\arch\arm\arm-v7m\cortex-m4\armcc;..\..\..\..\kernel\core\include;..\..\..\..\kernel\pm\include;..\..\..\..\osal\cmsis_os;..\..\..\..\examples\hello_world;..\..\TOS_CONFIG;..\..\..\..\net\at\include;..\..\..\..\kernel\hal\include;..\..\BSP\Hardware\Inc</IncludePath>
</VariousControls>
</Cads>
<Aads>
@@ -393,16 +393,16 @@
<Group>
<GroupName>Application/User</GroupName>
<Files>
<File>
<FileName>gpio.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\gpio.c</FilePath>
</File>
<File>
<FileName>main.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\main.c</FilePath>
</File>
<File>
<FileName>gpio.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\gpio.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_msp.c</FileName>
<FileType>1</FileType>
@@ -428,6 +428,31 @@
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\mcu_init.c</FilePath>
</File>
<File>
<FileName>dcmi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\dcmi.c</FilePath>
</File>
<File>
<FileName>dma.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\dma.c</FilePath>
</File>
<File>
<FileName>i2c.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\i2c.c</FilePath>
</File>
<File>
<FileName>spi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\spi.c</FilePath>
</File>
<File>
<FileName>tim.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\tim.c</FilePath>
</File>
</Files>
</Group>
<Group>
@@ -528,6 +553,21 @@
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_tim_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_dcmi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_dcmi.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_spi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_spi.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_spi_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_spi_ex.c</FilePath>
</File>
</Files>
</Group>
<Group>
@@ -695,6 +735,36 @@
</File>
</Files>
</Group>
<Group>
<GroupName>hal</GroupName>
<Files>
<File>
<FileName>delay.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\delay.c</FilePath>
</File>
<File>
<FileName>lcd_2inch4.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\lcd_2inch4.c</FilePath>
</File>
<File>
<FileName>lcd_config.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\lcd_config.c</FilePath>
</File>
<File>
<FileName>ov2640.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\ov2640.c</FilePath>
</File>
<File>
<FileName>sccb.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\sccb.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>examples</GroupName>
<Files>
@@ -715,8 +785,8 @@
<RTE>
<apis/>
<components>
<component Cclass="CMSIS" Cgroup="CORE" Cvendor="ARM" Cversion="4.3.0" condition="CMSIS Core">
<package name="CMSIS" schemaVersion="1.3" url="http://www.keil.com/pack/" vendor="ARM" version="4.5.0"/>
<component Cclass="CMSIS" Cgroup="CORE" Cvendor="ARM" Cversion="5.4.0" condition="ARMv6_7_8-M Device">
<package name="CMSIS" schemaVersion="1.3" url="http://www.keil.com/pack/" vendor="ARM" version="5.7.0"/>
<targetInfos>
<targetInfo name="TencentOS_tiny"/>
</targetInfos>

View File

@@ -0,0 +1,127 @@
<html>
<body>
<pre>
<h1><EFBFBD>Vision Build Log</h1>
<h2>Tool Versions:</h2>
IDE-Version: <20><>Vision V5.30.0.0
Copyright (C) 2020 ARM Ltd and ARM Germany GmbH. All rights reserved.
License Information: 1 2, 3, LIC=VGXG8-3EBEY-FWM2N-Y5VPW-1RV7D-LEXKU
Tool Versions:
Toolchain: MDK-ARM Plus Version: 5.30.0.0
Toolchain Path: D:\software\Keil\ARM\ARMCLANG\Bin
C Compiler: ArmClang.exe V6.14
Assembler: Armasm.exe V6.14
Linker/Locator: ArmLink.exe V6.14
Library Manager: ArmAr.exe V6.14
Hex Converter: FromElf.exe V6.14
CPU DLL: SARMCM3.DLL V5.30.0.0
Dialog DLL: DCM.DLL V1.17.3.0
Target DLL: STLink\ST-LINKIII-KEIL_SWO.dll V3.0.7.0
Dialog DLL: TCM.DLL V1.42.0.0
<h2>Project:</h2>
D:\Code\Project\tencentos\TencentOS-tiny\board\NUCLEO_STM32L496ZG\KEIL\hello_world\TencentOS_tiny.uvprojx
Project File Date: 09/08/2021
<h2>Output:</h2>
*** Using Compiler 'V6.14', folder: 'D:\software\Keil\ARM\ARMCLANG\Bin'
Build target 'TencentOS_tiny'
compiling gpio.c...
compiling sys.c...
compiling stm32l4xx_hal_msp.c...
compiling main.c...
compiling stm32l4xx_it.c...
compiling usart.c...
compiling mcu_init.c...
compiling dcmi.c...
compiling dma.c...
compiling tim.c...
compiling spi.c...
compiling i2c.c...
compiling stm32l4xx_hal_uart_ex.c...
compiling stm32l4xx_hal.c...
compiling stm32l4xx_hal_uart.c...
compiling stm32l4xx_hal_i2c_ex.c...
compiling stm32l4xx_hal_flash.c...
compiling stm32l4xx_hal_rcc.c...
compiling stm32l4xx_hal_rcc_ex.c...
compiling stm32l4xx_hal_flash_ex.c...
compiling stm32l4xx_hal_flash_ramfunc.c...
compiling stm32l4xx_hal_dma_ex.c...
compiling stm32l4xx_hal_i2c.c...
compiling stm32l4xx_hal_gpio.c...
compiling stm32l4xx_hal_dma.c...
compiling stm32l4xx_hal_pwr.c...
compiling stm32l4xx_hal_cortex.c...
compiling stm32l4xx_hal_pwr_ex.c...
compiling stm32l4xx_hal_exti.c...
compiling stm32l4xx_hal_dcmi.c...
compiling stm32l4xx_hal_spi_ex.c...
compiling stm32l4xx_hal_tim_ex.c...
compiling system_stm32l4xx.c...
compiling tos_cpu.c...
compiling stm32l4xx_hal_tim.c...
compiling tos_completion.c...
compiling stm32l4xx_hal_spi.c...
compiling port_c.c...
compiling tos_char_fifo.c...
compiling tos_binary_heap.c...
compiling tos_countdownlatch.c...
compiling tos_event.c...
compiling tos_global.c...
compiling tos_mmheap.c...
compiling tos_mail_queue.c...
compiling tos_message_queue.c...
compiling tos_mmblk.c...
compiling tos_mutex.c...
compiling tos_pend.c...
compiling tos_priority_mail_queue.c...
compiling tos_priority_message_queue.c...
compiling tos_robin.c...
compiling tos_ring_queue.c...
compiling tos_priority_queue.c...
compiling tos_sched.c...
compiling tos_sem.c...
compiling tos_sys.c...
compiling tos_tick.c...
compiling tos_time.c...
compiling tos_timer.c...
compiling delay.c...
compiling tos_task.c...
compiling cmsis_os.c...
compiling lcd_config.c...
compiling lcd_2inch4.c...
compiling hello_world.c...
compiling ov2640.c...
compiling sccb.c...
linking...
Program Size: Code=20332 RO-data=728 RW-data=56 ZI-data=45088
FromELF: creating hex file...
"TencentOS_tiny\TencentOS_tiny.axf" - 0 Error(s), 0 Warning(s).
<h2>Software Packages used:</h2>
Package Vendor: ARM
http://www.keil.com/pack/ARM.CMSIS.5.7.0.pack
ARM.CMSIS.5.7.0
CMSIS (Cortex Microcontroller Software Interface Standard)
* Component: CORE Version: 5.4.0
Package Vendor: Keil
https://www.keil.com/pack/Keil.STM32L4xx_DFP.2.5.0.pack
Keil.STM32L4xx_DFP.2.5.0
STMicroelectronics STM32L4 Series Device Support, Drivers and Examples
<h2>Collection of Component include folders:</h2>
.\RTE\_TencentOS_tiny
D:\software\Keil\ARM\CMSIS\5.7.0\CMSIS\Core\Include
D:\software\Keil\Keil\STM32L4xx_DFP\2.5.0\Drivers\CMSIS\Device\ST\STM32L4xx\Include
<h2>Collection of Component Files used:</h2>
* Component: ARM::CMSIS:CORE:5.4.0
Build Time Elapsed: 00:00:12
</pre>
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,16 @@
; *************************************************************
; *** Scatter-Loading Description File generated by uVision ***
; *************************************************************
LR_IROM1 0x08000000 0x00100000 { ; load region size_region
ER_IROM1 0x08000000 0x00100000 { ; load address = execution address
*.o (RESET, +First)
*(InRoot$$Sections)
.ANY (+RO)
.ANY (+XO)
}
RW_IRAM1 0x20000000 0x00050000 { ; RW data
.ANY (+RW +ZI)
}
}

View File

@@ -0,0 +1,77 @@
// File: STM32L4x5_4x6.dbgconf
// Version: 1.0.0
// Note: refer to STM32L4x5 and STM32L4x6 Reference manual (RM0351)
// refer to STM32L475xx STM32L476xx STM32L486xx STM32L496xx STM32L4A6xx datasheets
// <<< Use Configuration Wizard in Context Menu >>>
// <h> Debug MCU configuration register (DBGMCU_CR)
// <o.2> DBG_STANDBY <i> Debug Standby mode
// <o.1> DBG_STOP <i> Debug Stop mode
// <o.0> DBG_SLEEP <i> Debug Sleep mode
// </h>
DbgMCU_CR = 0x00000007;
// <h> Debug MCU APB1 freeze register1 (DBGMCU_APB1FZR1)
// <i> Reserved bits must be kept at reset value
// <o.31> DBG_LPTIM1_STOP <i> LPTIM1 counter stopped when core is halted
// <o.26> DBG_CAN2_STOP <i> bxCAN2 stopped when core is halted
// <o.25> DBG_CAN1_STOP <i> bxCAN1 stopped when core is halted
// <o.23> DBG_I2C3_STOP <i> I2C3 SMBUS timeout counter stopped when core is halted
// <o.22> DBG_I2C2_STOP <i> I2C2 SMBUS timeout counter stopped when core is halted
// <o.21> DBG_I2C1_STOP <i> I2C1 SMBUS timeout counter stopped when core is halted
// <o.12> DBG_IWDG_STOP <i> Independent watchdog counter stopped when core is halted
// <o.11> DBG_WWDG_STOP <i> Window watchdog counter stopped when core is halted
// <o.10> DBG_RTC_STOP <i> RTC counter stopped when core is halted
// <o.5> DBG_TIM7_STOP <i> TIM7 counter stopped when core is halted
// <o.4> DBG_TIM6_STOP <i> TIM6 counter stopped when core is halted
// <o.3> DBG_TIM5_STOP <i> TIM5 counter stopped when core is halted
// <o.2> DBG_TIM4_STOP <i> TIM4 counter stopped when core is halted
// <o.1> DBG_TIM3_STOP <i> TIM3 counter stopped when core is halted
// <o.0> DBG_TIM2_STOP <i> TIM2 counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz1 = 0x00000000;
// <h> Debug MCU APB1 freeze register 2 (DBGMCU_APB1FZR2)
// <i> Reserved bits must be kept at reset value
// <o.5> DBG_LPTIM2_STOP <i> LPTIM2 counter stopped when core is halted
// <o.1> DBG_I2C4_STOP <i> I2C4 SMBUS timeout counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz2 = 0x00000000;
// <h> Debug MCU APB2 freeze register (DBGMCU_APB2FZR)
// <i> Reserved bits must be kept at reset value
// <o.18> DBG_TIM17_STOP <i> TIM17 counter stopped when core is halted
// <o.17> DBG_TIM16_STOP <i> TIM16 counter stopped when core is halted
// <o.16> DBG_TIM15_STOP <i> TIM15 counter stopped when core is halted
// <o.13> DBG_TIM8_STOP <i> TIM8 counter stopped when core is halted
// <o.11> DBG_TIM1_STOP <i> TIM1 counter stopped when core is halted
// </h>
DbgMCU_APB2_Fz = 0x00000000;
// <h> TPIU Pin Routing (TRACECLK fixed on Pin PE2)
// <i> TRACECLK: Pin PE2
// <o1> TRACED0
// <i> ETM Trace Data 0
// <0x00040003=> Pin PE3
// <0x00020001=> Pin PC1
// <o2> TRACED1
// <i> ETM Trace Data 1
// <0x00040004=> Pin PE4
// <0x0002000A=> Pin PC10
// <o3> TRACED2
// <i> ETM Trace Data 2
// <0x00040005=> Pin PE5
// <0x00030002=> Pin PD2
// <o4> TRACED3
// <i> ETM Trace Data 3
// <0x00040006=> Pin PE6
// <0x0002000C=> Pin PC12
// </h>
TraceClk_Pin = 0x00040002;
TraceD0_Pin = 0x00040003;
TraceD1_Pin = 0x00040004;
TraceD2_Pin = 0x00040005;
TraceD3_Pin = 0x00040006;
// <<< end of configuration section >>>

View File

@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="utf-8"?>
<component_viewer schemaVersion="0.1" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" xs:noNamespaceSchemaLocation="Component_Viewer.xsd">
<component name="EventRecorderStub" version="1.0.0"/> <!--name and version of the component-->
<events>
</events>
</component_viewer>

View File

@@ -0,0 +1,21 @@
/*
* Auto generated Run-Time-Environment Configuration File
* *** Do not modify ! ***
*
* Project: 'TencentOS_tiny'
* Target: 'TencentOS_tiny'
*/
#ifndef RTE_COMPONENTS_H
#define RTE_COMPONENTS_H
/*
* Define the Device Header File:
*/
#define CMSIS_device_header "stm32l4xx.h"
#endif /* RTE_COMPONENTS_H */

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,968 @@
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<Project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="project_projx.xsd">
<SchemaVersion>2.1</SchemaVersion>
<Header>### uVision Project, (C) Keil Software</Header>
<Targets>
<Target>
<TargetName>TencentOS_tiny</TargetName>
<ToolsetNumber>0x4</ToolsetNumber>
<ToolsetName>ARM-ADS</ToolsetName>
<pCCUsed>5060750::V5.06 update 6 (build 750)::.\ARMCC</pCCUsed>
<uAC6>0</uAC6>
<TargetOption>
<TargetCommonOption>
<Device>STM32L496ZGTx</Device>
<Vendor>STMicroelectronics</Vendor>
<PackID>Keil.STM32L4xx_DFP.2.5.0</PackID>
<PackURL>https://www.keil.com/pack/</PackURL>
<Cpu>IRAM(0x20000000-0x2004FFFF) IROM(0x8000000-0x80FFFFF) CLOCK(8000000) FPU2 CPUTYPE("Cortex-M4")</Cpu>
<FlashUtilSpec></FlashUtilSpec>
<StartupFile></StartupFile>
<FlashDriverDll></FlashDriverDll>
<DeviceId></DeviceId>
<RegisterFile></RegisterFile>
<MemoryEnv></MemoryEnv>
<Cmp></Cmp>
<Asm></Asm>
<Linker></Linker>
<OHString></OHString>
<InfinionOptionDll></InfinionOptionDll>
<SLE66CMisc></SLE66CMisc>
<SLE66AMisc></SLE66AMisc>
<SLE66LinkerMisc></SLE66LinkerMisc>
<SFDFile>$$Device:STM32L496ZGTx$CMSIS\SVD\STM32L4x6.svd</SFDFile>
<bCustSvd>0</bCustSvd>
<UseEnv>0</UseEnv>
<BinPath></BinPath>
<IncludePath></IncludePath>
<LibPath></LibPath>
<RegisterFilePath></RegisterFilePath>
<DBRegisterFilePath></DBRegisterFilePath>
<TargetStatus>
<Error>0</Error>
<ExitCodeStop>0</ExitCodeStop>
<ButtonStop>0</ButtonStop>
<NotGenerated>0</NotGenerated>
<InvalidFlash>1</InvalidFlash>
</TargetStatus>
<OutputDirectory>TencentOS_tiny\</OutputDirectory>
<OutputName>TencentOS_tiny</OutputName>
<CreateExecutable>1</CreateExecutable>
<CreateLib>0</CreateLib>
<CreateHexFile>1</CreateHexFile>
<DebugInformation>1</DebugInformation>
<BrowseInformation>1</BrowseInformation>
<ListingPath></ListingPath>
<HexFormatSelection>1</HexFormatSelection>
<Merge32K>0</Merge32K>
<CreateBatchFile>0</CreateBatchFile>
<BeforeCompile>
<RunUserProg1>0</RunUserProg1>
<RunUserProg2>0</RunUserProg2>
<UserProg1Name></UserProg1Name>
<UserProg2Name></UserProg2Name>
<UserProg1Dos16Mode>0</UserProg1Dos16Mode>
<UserProg2Dos16Mode>0</UserProg2Dos16Mode>
<nStopU1X>0</nStopU1X>
<nStopU2X>0</nStopU2X>
</BeforeCompile>
<BeforeMake>
<RunUserProg1>0</RunUserProg1>
<RunUserProg2>0</RunUserProg2>
<UserProg1Name></UserProg1Name>
<UserProg2Name></UserProg2Name>
<UserProg1Dos16Mode>0</UserProg1Dos16Mode>
<UserProg2Dos16Mode>0</UserProg2Dos16Mode>
<nStopB1X>0</nStopB1X>
<nStopB2X>0</nStopB2X>
</BeforeMake>
<AfterMake>
<RunUserProg1>0</RunUserProg1>
<RunUserProg2>0</RunUserProg2>
<UserProg1Name></UserProg1Name>
<UserProg2Name></UserProg2Name>
<UserProg1Dos16Mode>0</UserProg1Dos16Mode>
<UserProg2Dos16Mode>0</UserProg2Dos16Mode>
<nStopA1X>0</nStopA1X>
<nStopA2X>0</nStopA2X>
</AfterMake>
<SelectedForBatchBuild>0</SelectedForBatchBuild>
<SVCSIdString></SVCSIdString>
</TargetCommonOption>
<CommonProperty>
<UseCPPCompiler>0</UseCPPCompiler>
<RVCTCodeConst>0</RVCTCodeConst>
<RVCTZI>0</RVCTZI>
<RVCTOtherData>0</RVCTOtherData>
<ModuleSelection>0</ModuleSelection>
<IncludeInBuild>1</IncludeInBuild>
<AlwaysBuild>0</AlwaysBuild>
<GenerateAssemblyFile>0</GenerateAssemblyFile>
<AssembleAssemblyFile>0</AssembleAssemblyFile>
<PublicsOnly>0</PublicsOnly>
<StopOnExitCode>3</StopOnExitCode>
<CustomArgument></CustomArgument>
<IncludeLibraryModules></IncludeLibraryModules>
<ComprImg>0</ComprImg>
</CommonProperty>
<DllOption>
<SimDllName>SARMCM3.DLL</SimDllName>
<SimDllArguments>-REMAP -MPU</SimDllArguments>
<SimDlgDll>DCM.DLL</SimDlgDll>
<SimDlgDllArguments>-pCM4</SimDlgDllArguments>
<TargetDllName>SARMCM3.DLL</TargetDllName>
<TargetDllArguments>-MPU</TargetDllArguments>
<TargetDlgDll>TCM.DLL</TargetDlgDll>
<TargetDlgDllArguments>-pCM4</TargetDlgDllArguments>
</DllOption>
<DebugOption>
<OPTHX>
<HexSelection>1</HexSelection>
<HexRangeLowAddress>0</HexRangeLowAddress>
<HexRangeHighAddress>0</HexRangeHighAddress>
<HexOffset>0</HexOffset>
<Oh166RecLen>16</Oh166RecLen>
</OPTHX>
</DebugOption>
<Utilities>
<Flash1>
<UseTargetDll>1</UseTargetDll>
<UseExternalTool>0</UseExternalTool>
<RunIndependent>0</RunIndependent>
<UpdateFlashBeforeDebugging>1</UpdateFlashBeforeDebugging>
<Capability>1</Capability>
<DriverSelection>4107</DriverSelection>
</Flash1>
<bUseTDR>1</bUseTDR>
<Flash2>BIN\UL2CM3.DLL</Flash2>
<Flash3></Flash3>
<Flash4></Flash4>
<pFcarmOut></pFcarmOut>
<pFcarmGrp></pFcarmGrp>
<pFcArmRoot></pFcArmRoot>
<FcArmLst>0</FcArmLst>
</Utilities>
<TargetArmAds>
<ArmAdsMisc>
<GenerateListings>0</GenerateListings>
<asHll>1</asHll>
<asAsm>1</asAsm>
<asMacX>1</asMacX>
<asSyms>1</asSyms>
<asFals>1</asFals>
<asDbgD>1</asDbgD>
<asForm>1</asForm>
<ldLst>0</ldLst>
<ldmm>1</ldmm>
<ldXref>1</ldXref>
<BigEnd>0</BigEnd>
<AdsALst>1</AdsALst>
<AdsACrf>1</AdsACrf>
<AdsANop>0</AdsANop>
<AdsANot>0</AdsANot>
<AdsLLst>1</AdsLLst>
<AdsLmap>1</AdsLmap>
<AdsLcgr>1</AdsLcgr>
<AdsLsym>1</AdsLsym>
<AdsLszi>1</AdsLszi>
<AdsLtoi>1</AdsLtoi>
<AdsLsun>1</AdsLsun>
<AdsLven>1</AdsLven>
<AdsLsxf>1</AdsLsxf>
<RvctClst>0</RvctClst>
<GenPPlst>0</GenPPlst>
<AdsCpuType>"Cortex-M4"</AdsCpuType>
<RvctDeviceName></RvctDeviceName>
<mOS>0</mOS>
<uocRom>0</uocRom>
<uocRam>0</uocRam>
<hadIROM>1</hadIROM>
<hadIRAM>1</hadIRAM>
<hadXRAM>0</hadXRAM>
<uocXRam>0</uocXRam>
<RvdsVP>2</RvdsVP>
<RvdsMve>0</RvdsMve>
<RvdsCdeCp>0</RvdsCdeCp>
<hadIRAM2>0</hadIRAM2>
<hadIROM2>0</hadIROM2>
<StupSel>8</StupSel>
<useUlib>1</useUlib>
<EndSel>0</EndSel>
<uLtcg>0</uLtcg>
<nSecure>0</nSecure>
<RoSelD>3</RoSelD>
<RwSelD>3</RwSelD>
<CodeSel>0</CodeSel>
<OptFeed>0</OptFeed>
<NoZi1>0</NoZi1>
<NoZi2>0</NoZi2>
<NoZi3>0</NoZi3>
<NoZi4>0</NoZi4>
<NoZi5>0</NoZi5>
<Ro1Chk>0</Ro1Chk>
<Ro2Chk>0</Ro2Chk>
<Ro3Chk>0</Ro3Chk>
<Ir1Chk>1</Ir1Chk>
<Ir2Chk>0</Ir2Chk>
<Ra1Chk>0</Ra1Chk>
<Ra2Chk>0</Ra2Chk>
<Ra3Chk>0</Ra3Chk>
<Im1Chk>1</Im1Chk>
<Im2Chk>0</Im2Chk>
<OnChipMemories>
<Ocm1>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm1>
<Ocm2>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm2>
<Ocm3>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm3>
<Ocm4>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm4>
<Ocm5>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm5>
<Ocm6>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</Ocm6>
<IRAM>
<Type>0</Type>
<StartAddress>0x20000000</StartAddress>
<Size>0x50000</Size>
</IRAM>
<IROM>
<Type>1</Type>
<StartAddress>0x8000000</StartAddress>
<Size>0x100000</Size>
</IROM>
<XRAM>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</XRAM>
<OCR_RVCT1>
<Type>1</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT1>
<OCR_RVCT2>
<Type>1</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT2>
<OCR_RVCT3>
<Type>1</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT3>
<OCR_RVCT4>
<Type>1</Type>
<StartAddress>0x8000000</StartAddress>
<Size>0x100000</Size>
</OCR_RVCT4>
<OCR_RVCT5>
<Type>1</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT5>
<OCR_RVCT6>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT6>
<OCR_RVCT7>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT7>
<OCR_RVCT8>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT8>
<OCR_RVCT9>
<Type>0</Type>
<StartAddress>0x20000000</StartAddress>
<Size>0x50000</Size>
</OCR_RVCT9>
<OCR_RVCT10>
<Type>0</Type>
<StartAddress>0x0</StartAddress>
<Size>0x0</Size>
</OCR_RVCT10>
</OnChipMemories>
<RvctStartVector></RvctStartVector>
</ArmAdsMisc>
<Cads>
<interw>1</interw>
<Optim>4</Optim>
<oTime>0</oTime>
<SplitLS>0</SplitLS>
<OneElfS>1</OneElfS>
<Strict>0</Strict>
<EnumInt>0</EnumInt>
<PlainCh>0</PlainCh>
<Ropi>0</Ropi>
<Rwpi>0</Rwpi>
<wLevel>2</wLevel>
<uThumb>0</uThumb>
<uSurpInc>0</uSurpInc>
<uC99>1</uC99>
<uGnu>0</uGnu>
<useXO>0</useXO>
<v6Lang>3</v6Lang>
<v6LangP>3</v6LangP>
<vShortEn>1</vShortEn>
<vShortWch>1</vShortWch>
<v6Lto>0</v6Lto>
<v6WtE>0</v6WtE>
<v6Rtti>0</v6Rtti>
<VariousControls>
<MiscControls></MiscControls>
<Define>USE_HAL_DRIVER,STM32L496xx,NUCLEO_STM32L496ZG</Define>
<Undefine></Undefine>
<IncludePath>..\..\BSP\Inc;..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Inc;..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Inc\Legacy;..\..\..\..\platform\vendor_bsp\st\CMSIS\Device\ST\STM32L4xx\Include;..\..\..\..\platform\vendor_bsp\st\CMSIS\Include;..\..\..\..\arch\arm\arm-v7m\common\include;..\..\..\..\arch\arm\arm-v7m\cortex-m4\armcc;..\..\..\..\kernel\core\include;..\..\..\..\kernel\pm\include;..\..\..\..\osal\cmsis_os;..\..\..\..\examples\hello_world;..\..\TOS_CONFIG;..\..\..\..\net\at\include;..\..\..\..\kernel\hal\include;..\..\BSP\Hardware\Inc;..\..\..\..\components\ai\nnom\inc;..\..\..\..\components\ai\nnom\inc\layers;..\..\..\..\components\ai\nnom\port;..\..\..\..\examples\nnom_mnist\data</IncludePath>
</VariousControls>
</Cads>
<Aads>
<interw>1</interw>
<Ropi>0</Ropi>
<Rwpi>0</Rwpi>
<thumb>0</thumb>
<SplitLS>0</SplitLS>
<SwStkChk>0</SwStkChk>
<NoWarn>0</NoWarn>
<uSurpInc>0</uSurpInc>
<useXO>0</useXO>
<ClangAsOpt>4</ClangAsOpt>
<VariousControls>
<MiscControls></MiscControls>
<Define></Define>
<Undefine></Undefine>
<IncludePath></IncludePath>
</VariousControls>
</Aads>
<LDads>
<umfTarg>1</umfTarg>
<Ropi>0</Ropi>
<Rwpi>0</Rwpi>
<noStLib>0</noStLib>
<RepFail>1</RepFail>
<useFile>0</useFile>
<TextAddressRange>0x08000000</TextAddressRange>
<DataAddressRange>0x20000000</DataAddressRange>
<pXoBase></pXoBase>
<ScatterFile></ScatterFile>
<IncludeLibs></IncludeLibs>
<IncludeLibsPath></IncludeLibsPath>
<Misc></Misc>
<LinkerInputFile></LinkerInputFile>
<DisabledWarnings></DisabledWarnings>
</LDads>
</TargetArmAds>
</TargetOption>
<Groups>
<Group>
<GroupName>Application/MDK-ARM</GroupName>
<Files>
<File>
<FileName>startup_stm32l496xx.s</FileName>
<FileType>2</FileType>
<FilePath>startup_stm32l496xx.s</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>Application/User</GroupName>
<Files>
<File>
<FileName>main.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\main.c</FilePath>
</File>
<File>
<FileName>gpio.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\gpio.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_msp.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\stm32l4xx_hal_msp.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_it.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\stm32l4xx_it.c</FilePath>
</File>
<File>
<FileName>sys.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\sys.c</FilePath>
</File>
<File>
<FileName>usart.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\usart.c</FilePath>
</File>
<File>
<FileName>mcu_init.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\mcu_init.c</FilePath>
</File>
<File>
<FileName>dcmi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\dcmi.c</FilePath>
</File>
<File>
<FileName>dma.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\dma.c</FilePath>
</File>
<File>
<FileName>i2c.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\i2c.c</FilePath>
</File>
<File>
<FileName>spi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\spi.c</FilePath>
</File>
<File>
<FileName>tim.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\tim.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>Drivers/STM32L4xx_HAL_Driver</GroupName>
<Files>
<File>
<FileName>stm32l4xx_hal_uart.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_uart.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_uart_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_uart_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_i2c.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_i2c.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_i2c_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_i2c_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_rcc.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_rcc.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_rcc_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_rcc_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_flash.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_flash.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_flash_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_flash_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_flash_ramfunc.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_flash_ramfunc.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_gpio.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_gpio.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_dma.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_dma.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_dma_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_dma_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_pwr.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_pwr.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_pwr_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_pwr_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_cortex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_cortex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_exti.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_exti.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_tim.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_tim.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_tim_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_tim_ex.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_dcmi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_dcmi.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_spi.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_spi.c</FilePath>
</File>
<File>
<FileName>stm32l4xx_hal_spi_ex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\platform\vendor_bsp\st\STM32L4xx_HAL_Driver\Src\stm32l4xx_hal_spi_ex.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>Drivers/CMSIS</GroupName>
<Files>
<File>
<FileName>system_stm32l4xx.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Src\system_stm32l4xx.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>tos/arch</GroupName>
<Files>
<File>
<FileName>tos_cpu.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\arch\arm\arm-v7m\common\tos_cpu.c</FilePath>
</File>
<File>
<FileName>port_c.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\arch\arm\arm-v7m\cortex-m4\armcc\port_c.c</FilePath>
</File>
<File>
<FileName>port_s.S</FileName>
<FileType>2</FileType>
<FilePath>..\..\..\..\arch\arm\arm-v7m\cortex-m4\armcc\port_s.S</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>tos/kernel</GroupName>
<Files>
<File>
<FileName>tos_binary_heap.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_binary_heap.c</FilePath>
</File>
<File>
<FileName>tos_char_fifo.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_char_fifo.c</FilePath>
</File>
<File>
<FileName>tos_completion.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_completion.c</FilePath>
</File>
<File>
<FileName>tos_countdownlatch.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_countdownlatch.c</FilePath>
</File>
<File>
<FileName>tos_event.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_event.c</FilePath>
</File>
<File>
<FileName>tos_global.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_global.c</FilePath>
</File>
<File>
<FileName>tos_mail_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_mail_queue.c</FilePath>
</File>
<File>
<FileName>tos_message_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_message_queue.c</FilePath>
</File>
<File>
<FileName>tos_mmblk.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_mmblk.c</FilePath>
</File>
<File>
<FileName>tos_mmheap.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_mmheap.c</FilePath>
</File>
<File>
<FileName>tos_mutex.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_mutex.c</FilePath>
</File>
<File>
<FileName>tos_pend.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_pend.c</FilePath>
</File>
<File>
<FileName>tos_priority_mail_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_priority_mail_queue.c</FilePath>
</File>
<File>
<FileName>tos_priority_message_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_priority_message_queue.c</FilePath>
</File>
<File>
<FileName>tos_priority_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_priority_queue.c</FilePath>
</File>
<File>
<FileName>tos_ring_queue.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_ring_queue.c</FilePath>
</File>
<File>
<FileName>tos_robin.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_robin.c</FilePath>
</File>
<File>
<FileName>tos_sched.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_sched.c</FilePath>
</File>
<File>
<FileName>tos_sem.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_sem.c</FilePath>
</File>
<File>
<FileName>tos_sys.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_sys.c</FilePath>
</File>
<File>
<FileName>tos_task.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_task.c</FilePath>
</File>
<File>
<FileName>tos_tick.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_tick.c</FilePath>
</File>
<File>
<FileName>tos_time.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_time.c</FilePath>
</File>
<File>
<FileName>tos_timer.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\kernel\core\tos_timer.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>tos/cmsis_os</GroupName>
<Files>
<File>
<FileName>cmsis_os.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\osal\cmsis_os\cmsis_os.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>hal</GroupName>
<Files>
<File>
<FileName>delay.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\delay.c</FilePath>
</File>
<File>
<FileName>lcd_2inch4.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\lcd_2inch4.c</FilePath>
</File>
<File>
<FileName>lcd_config.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\lcd_config.c</FilePath>
</File>
<File>
<FileName>ov2640.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\ov2640.c</FilePath>
</File>
<File>
<FileName>sccb.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\BSP\Hardware\Src\sccb.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>examples</GroupName>
<Files>
<File>
<FileName>nnom_mnsit_example.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\examples\nnom_mnist\nnom_mnsit_example.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>nnom</GroupName>
<Files>
<File>
<FileName>nnom_local.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\backends\nnom_local.c</FilePath>
</File>
<File>
<FileName>nnom.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\core\nnom.c</FilePath>
</File>
<File>
<FileName>nnom_layers.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\core\nnom_layers.c</FilePath>
</File>
<File>
<FileName>nnom_tensor.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\core\nnom_tensor.c</FilePath>
</File>
<File>
<FileName>nnom_utils.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\core\nnom_utils.c</FilePath>
</File>
<File>
<FileName>nnom_activation.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_activation.c</FilePath>
</File>
<File>
<FileName>nnom_avgpool.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_avgpool.c</FilePath>
</File>
<File>
<FileName>nnom_baselayer.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_baselayer.c</FilePath>
</File>
<File>
<FileName>nnom_concat.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_concat.c</FilePath>
</File>
<File>
<FileName>nnom_conv2d.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_conv2d.c</FilePath>
</File>
<File>
<FileName>nnom_conv2d_trans.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_conv2d_trans.c</FilePath>
</File>
<File>
<FileName>nnom_cropping.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_cropping.c</FilePath>
</File>
<File>
<FileName>nnom_dense.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_dense.c</FilePath>
</File>
<File>
<FileName>nnom_dw_conv2d.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_dw_conv2d.c</FilePath>
</File>
<File>
<FileName>nnom_flatten.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_flatten.c</FilePath>
</File>
<File>
<FileName>nnom_global_pool.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_global_pool.c</FilePath>
</File>
<File>
<FileName>nnom_gru_cell.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_gru_cell.c</FilePath>
</File>
<File>
<FileName>nnom_input.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_input.c</FilePath>
</File>
<File>
<FileName>nnom_lambda.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_lambda.c</FilePath>
</File>
<File>
<FileName>nnom_lstm_cell.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_lstm_cell.c</FilePath>
</File>
<File>
<FileName>nnom_matrix.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_matrix.c</FilePath>
</File>
<File>
<FileName>nnom_maxpool.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_maxpool.c</FilePath>
</File>
<File>
<FileName>nnom_output.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_output.c</FilePath>
</File>
<File>
<FileName>nnom_rnn.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_rnn.c</FilePath>
</File>
<File>
<FileName>nnom_simple_cell.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_simple_cell.c</FilePath>
</File>
<File>
<FileName>nnom_softmax.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_softmax.c</FilePath>
</File>
<File>
<FileName>nnom_sumpool.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_sumpool.c</FilePath>
</File>
<File>
<FileName>nnom_upsample.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_upsample.c</FilePath>
</File>
<File>
<FileName>nnom_zero_padding.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\layers\nnom_zero_padding.c</FilePath>
</File>
<File>
<FileName>nnom_local_q15.c</FileName>
<FileType>1</FileType>
<FilePath>..\..\..\..\components\ai\nnom\src\backends\nnom_local_q15.c</FilePath>
</File>
</Files>
</Group>
<Group>
<GroupName>::CMSIS</GroupName>
</Group>
</Groups>
</Target>
</Targets>
<RTE>
<apis/>
<components>
<component Cclass="CMSIS" Cgroup="CORE" Cvendor="ARM" Cversion="5.4.0" condition="ARMv6_7_8-M Device">
<package name="CMSIS" schemaVersion="1.3" url="http://www.keil.com/pack/" vendor="ARM" version="5.7.0"/>
<targetInfos>
<targetInfo name="TencentOS_tiny"/>
</targetInfos>
</component>
</components>
<files/>
</RTE>
<LayerInfo>
<Layers>
<Layer>
<LayName>&lt;Project Info&gt;</LayName>
<LayDesc></LayDesc>
<LayUrl></LayUrl>
<LayKeys></LayKeys>
<LayCat></LayCat>
<LayLic></LayLic>
<LayTarg>0</LayTarg>
<LayPrjMark>1</LayPrjMark>
</Layer>
</Layers>
</LayerInfo>
</Project>

View File

@@ -0,0 +1,63 @@
<html>
<body>
<pre>
<h1><EFBFBD>Vision Build Log</h1>
<h2>Tool Versions:</h2>
IDE-Version: <20><>Vision V5.30.0.0
Copyright (C) 2020 ARM Ltd and ARM Germany GmbH. All rights reserved.
License Information: 1 2, 3, LIC=VGXG8-3EBEY-FWM2N-Y5VPW-1RV7D-LEXKU
Tool Versions:
Toolchain: MDK-ARM Plus Version: 5.30.0.0
Toolchain Path: D:\software\Keil\ARM\ARMCC\Bin
C Compiler: Armcc.exe V5.06 update 6 (build 750)
Assembler: Armasm.exe V5.06 update 6 (build 750)
Linker/Locator: ArmLink.exe V5.06 update 6 (build 750)
Library Manager: ArmAr.exe V5.06 update 6 (build 750)
Hex Converter: FromElf.exe V5.06 update 6 (build 750)
CPU DLL: SARMCM3.DLL V5.30.0.0
Dialog DLL: DCM.DLL V1.17.3.0
Target DLL: STLink\ST-LINKIII-KEIL_SWO.dll V3.0.7.0
Dialog DLL: TCM.DLL V1.42.0.0
<h2>Project:</h2>
D:\Code\Project\tencentos\TencentOS-tiny\board\NUCLEO_STM32L496ZG\KEIL\nnom_mnist\TencentOS_tiny.uvprojx
Project File Date: 09/08/2021
<h2>Output:</h2>
*** Using Compiler 'V5.06 update 6 (build 750)', folder: 'D:\software\Keil\ARM\ARMCC\Bin'
Build target 'TencentOS_tiny'
compiling nnom_mnsit_example.c...
..\..\..\..\examples\nnom_mnist\data\weights.h(420): warning: #1-D: last line of file ends without a newline
}
..\..\..\..\examples\nnom_mnist\nnom_mnsit_example.c: 1 warning, 0 errors
linking...
Program Size: Code=27340 RO-data=98532 RW-data=112 ZI-data=65136
FromELF: creating hex file...
"TencentOS_tiny\TencentOS_tiny.axf" - 0 Error(s), 1 Warning(s).
<h2>Software Packages used:</h2>
Package Vendor: ARM
http://www.keil.com/pack/ARM.CMSIS.5.7.0.pack
ARM.CMSIS.5.7.0
CMSIS (Cortex Microcontroller Software Interface Standard)
* Component: CORE Version: 5.4.0
Package Vendor: Keil
https://www.keil.com/pack/Keil.STM32L4xx_DFP.2.5.0.pack
Keil.STM32L4xx_DFP.2.5.0
STMicroelectronics STM32L4 Series Device Support, Drivers and Examples
<h2>Collection of Component include folders:</h2>
.\RTE\_TencentOS_tiny
D:\software\Keil\ARM\CMSIS\5.7.0\CMSIS\Core\Include
D:\software\Keil\Keil\STM32L4xx_DFP\2.5.0\Drivers\CMSIS\Device\ST\STM32L4xx\Include
<h2>Collection of Component Files used:</h2>
* Component: ARM::CMSIS:CORE:5.4.0
Build Time Elapsed: 00:00:04
</pre>
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,16 @@
; *************************************************************
; *** Scatter-Loading Description File generated by uVision ***
; *************************************************************
LR_IROM1 0x08000000 0x00100000 { ; load region size_region
ER_IROM1 0x08000000 0x00100000 { ; load address = execution address
*.o (RESET, +First)
*(InRoot$$Sections)
.ANY (+RO)
.ANY (+XO)
}
RW_IRAM1 0x20000000 0x00050000 { ; RW data
.ANY (+RW +ZI)
}
}

View File

@@ -0,0 +1,450 @@
;*******************************************************************************
;* File Name : startup_stm32l496xx.s
;* Author : MCD Application Team
;* Description : STM32L496xx Ultra Low Power devices vector table for MDK-ARM toolchain.
;* This module performs:
;* - Set the initial SP
;* - Set the initial PC == Reset_Handler
;* - Set the vector table entries with the exceptions ISR address
;* - Branches to __main in the C library (which eventually
;* calls main()).
;* After Reset the Cortex-M4 processor is in Thread mode,
;* priority is Privileged, and the Stack is set to Main.
;* <<< Use Configuration Wizard in Context Menu >>>
;*******************************************************************************
;*
;* <h2><center>&copy; Copyright (c) 2017 STMicroelectronics.
;* All rights reserved.</center></h2>
;*
;* This software component is licensed by ST under BSD 3-Clause license,
;* the "License"; You may not use this file except in compliance with the
;* License. You may obtain a copy of the License at:
;* opensource.org/licenses/BSD-3-Clause
;*
;*******************************************************************************
;
; Amount of memory (in bytes) allocated for Stack
; Tailor this value to your application needs
; <h> Stack Configuration
; <o> Stack Size (in Bytes) <0x0-0xFFFFFFFF:8>
; </h>
Stack_Size EQU 0x400
AREA STACK, NOINIT, READWRITE, ALIGN=3
Stack_Mem SPACE Stack_Size
__initial_sp
; <h> Heap Configuration
; <o> Heap Size (in Bytes) <0x0-0xFFFFFFFF:8>
; </h>
Heap_Size EQU 0x200
AREA HEAP, NOINIT, READWRITE, ALIGN=3
__heap_base
Heap_Mem SPACE Heap_Size
__heap_limit
PRESERVE8
THUMB
; Vector Table Mapped to Address 0 at Reset
AREA RESET, DATA, READONLY
EXPORT __Vectors
EXPORT __Vectors_End
EXPORT __Vectors_Size
__Vectors DCD __initial_sp ; Top of Stack
DCD Reset_Handler ; Reset Handler
DCD NMI_Handler ; NMI Handler
DCD HardFault_Handler ; Hard Fault Handler
DCD MemManage_Handler ; MPU Fault Handler
DCD BusFault_Handler ; Bus Fault Handler
DCD UsageFault_Handler ; Usage Fault Handler
DCD 0 ; Reserved
DCD 0 ; Reserved
DCD 0 ; Reserved
DCD 0 ; Reserved
DCD SVC_Handler ; SVCall Handler
DCD DebugMon_Handler ; Debug Monitor Handler
DCD 0 ; Reserved
DCD PendSV_Handler ; PendSV Handler
DCD SysTick_Handler ; SysTick Handler
; External Interrupts
DCD WWDG_IRQHandler ; Window WatchDog
DCD PVD_PVM_IRQHandler ; PVD/PVM1/PVM2/PVM3/PVM4 through EXTI Line detection
DCD TAMP_STAMP_IRQHandler ; Tamper and TimeStamps through the EXTI line
DCD RTC_WKUP_IRQHandler ; RTC Wakeup through the EXTI line
DCD FLASH_IRQHandler ; FLASH
DCD RCC_IRQHandler ; RCC
DCD EXTI0_IRQHandler ; EXTI Line0
DCD EXTI1_IRQHandler ; EXTI Line1
DCD EXTI2_IRQHandler ; EXTI Line2
DCD EXTI3_IRQHandler ; EXTI Line3
DCD EXTI4_IRQHandler ; EXTI Line4
DCD DMA1_Channel1_IRQHandler ; DMA1 Channel 1
DCD DMA1_Channel2_IRQHandler ; DMA1 Channel 2
DCD DMA1_Channel3_IRQHandler ; DMA1 Channel 3
DCD DMA1_Channel4_IRQHandler ; DMA1 Channel 4
DCD DMA1_Channel5_IRQHandler ; DMA1 Channel 5
DCD DMA1_Channel6_IRQHandler ; DMA1 Channel 6
DCD DMA1_Channel7_IRQHandler ; DMA1 Channel 7
DCD ADC1_2_IRQHandler ; ADC1, ADC2
DCD CAN1_TX_IRQHandler ; CAN1 TX
DCD CAN1_RX0_IRQHandler ; CAN1 RX0
DCD CAN1_RX1_IRQHandler ; CAN1 RX1
DCD CAN1_SCE_IRQHandler ; CAN1 SCE
DCD EXTI9_5_IRQHandler ; External Line[9:5]s
DCD TIM1_BRK_TIM15_IRQHandler ; TIM1 Break and TIM15
DCD TIM1_UP_TIM16_IRQHandler ; TIM1 Update and TIM16
DCD TIM1_TRG_COM_TIM17_IRQHandler ; TIM1 Trigger and Commutation and TIM17
DCD TIM1_CC_IRQHandler ; TIM1 Capture Compare
DCD TIM2_IRQHandler ; TIM2
DCD TIM3_IRQHandler ; TIM3
DCD TIM4_IRQHandler ; TIM4
DCD I2C1_EV_IRQHandler ; I2C1 Event
DCD I2C1_ER_IRQHandler ; I2C1 Error
DCD I2C2_EV_IRQHandler ; I2C2 Event
DCD I2C2_ER_IRQHandler ; I2C2 Error
DCD SPI1_IRQHandler ; SPI1
DCD SPI2_IRQHandler ; SPI2
DCD USART1_IRQHandler ; USART1
DCD USART2_IRQHandler ; USART2
DCD USART3_IRQHandler ; USART3
DCD EXTI15_10_IRQHandler ; External Line[15:10]
DCD RTC_Alarm_IRQHandler ; RTC Alarm (A and B) through EXTI Line
DCD DFSDM1_FLT3_IRQHandler ; DFSDM1 Filter 3 global Interrupt
DCD TIM8_BRK_IRQHandler ; TIM8 Break Interrupt
DCD TIM8_UP_IRQHandler ; TIM8 Update Interrupt
DCD TIM8_TRG_COM_IRQHandler ; TIM8 Trigger and Commutation Interrupt
DCD TIM8_CC_IRQHandler ; TIM8 Capture Compare Interrupt
DCD ADC3_IRQHandler ; ADC3 global Interrupt
DCD FMC_IRQHandler ; FMC
DCD SDMMC1_IRQHandler ; SDMMC1
DCD TIM5_IRQHandler ; TIM5
DCD SPI3_IRQHandler ; SPI3
DCD UART4_IRQHandler ; UART4
DCD UART5_IRQHandler ; UART5
DCD TIM6_DAC_IRQHandler ; TIM6 and DAC1&2 underrun errors
DCD TIM7_IRQHandler ; TIM7
DCD DMA2_Channel1_IRQHandler ; DMA2 Channel 1
DCD DMA2_Channel2_IRQHandler ; DMA2 Channel 2
DCD DMA2_Channel3_IRQHandler ; DMA2 Channel 3
DCD DMA2_Channel4_IRQHandler ; DMA2 Channel 4
DCD DMA2_Channel5_IRQHandler ; DMA2 Channel 5
DCD DFSDM1_FLT0_IRQHandler ; DFSDM1 Filter 0 global Interrupt
DCD DFSDM1_FLT1_IRQHandler ; DFSDM1 Filter 1 global Interrupt
DCD DFSDM1_FLT2_IRQHandler ; DFSDM1 Filter 2 global Interrupt
DCD COMP_IRQHandler ; COMP Interrupt
DCD LPTIM1_IRQHandler ; LP TIM1 interrupt
DCD LPTIM2_IRQHandler ; LP TIM2 interrupt
DCD OTG_FS_IRQHandler ; USB OTG FS
DCD DMA2_Channel6_IRQHandler ; DMA2 Channel 6
DCD DMA2_Channel7_IRQHandler ; DMA2 Channel 7
DCD LPUART1_IRQHandler ; LP UART1 interrupt
DCD QUADSPI_IRQHandler ; Quad SPI global interrupt
DCD I2C3_EV_IRQHandler ; I2C3 event
DCD I2C3_ER_IRQHandler ; I2C3 error
DCD SAI1_IRQHandler ; Serial Audio Interface 1 global interrupt
DCD SAI2_IRQHandler ; Serial Audio Interface 2 global interrupt
DCD SWPMI1_IRQHandler ; Serial Wire Interface 1 global interrupt
DCD TSC_IRQHandler ; Touch Sense Controller global interrupt
DCD LCD_IRQHandler ; LCD global interrupt
DCD 0 ; Reserved
DCD RNG_IRQHandler ; RNG global interrupt
DCD FPU_IRQHandler ; FPU
DCD CRS_IRQHandler ; CRS error
DCD I2C4_EV_IRQHandler ; I2C4 event
DCD I2C4_ER_IRQHandler ; I2C4 error
DCD DCMI_IRQHandler ; DCMI global interrupt
DCD CAN2_TX_IRQHandler ; CAN2 TX
DCD CAN2_RX0_IRQHandler ; CAN2 RX0
DCD CAN2_RX1_IRQHandler ; CAN2 RX1
DCD CAN2_SCE_IRQHandler ; CAN2 SCE
DCD DMA2D_IRQHandler ; DMA2D global interrupt
__Vectors_End
__Vectors_Size EQU __Vectors_End - __Vectors
AREA |.text|, CODE, READONLY
; Reset handler
Reset_Handler PROC
EXPORT Reset_Handler [WEAK]
IMPORT SystemInit
IMPORT __main
LDR R0, =SystemInit
BLX R0
LDR R0, =__main
BX R0
ENDP
; Dummy Exception Handlers (infinite loops which can be modified)
NMI_Handler PROC
EXPORT NMI_Handler [WEAK]
B .
ENDP
HardFault_Handler\
PROC
EXPORT HardFault_Handler [WEAK]
B .
ENDP
MemManage_Handler\
PROC
EXPORT MemManage_Handler [WEAK]
B .
ENDP
BusFault_Handler\
PROC
EXPORT BusFault_Handler [WEAK]
B .
ENDP
UsageFault_Handler\
PROC
EXPORT UsageFault_Handler [WEAK]
B .
ENDP
SVC_Handler PROC
EXPORT SVC_Handler [WEAK]
B .
ENDP
DebugMon_Handler\
PROC
EXPORT DebugMon_Handler [WEAK]
B .
ENDP
PendSV_Handler PROC
EXPORT PendSV_Handler [WEAK]
B .
ENDP
SysTick_Handler PROC
EXPORT SysTick_Handler [WEAK]
B .
ENDP
Default_Handler PROC
EXPORT WWDG_IRQHandler [WEAK]
EXPORT PVD_PVM_IRQHandler [WEAK]
EXPORT TAMP_STAMP_IRQHandler [WEAK]
EXPORT RTC_WKUP_IRQHandler [WEAK]
EXPORT FLASH_IRQHandler [WEAK]
EXPORT RCC_IRQHandler [WEAK]
EXPORT EXTI0_IRQHandler [WEAK]
EXPORT EXTI1_IRQHandler [WEAK]
EXPORT EXTI2_IRQHandler [WEAK]
EXPORT EXTI3_IRQHandler [WEAK]
EXPORT EXTI4_IRQHandler [WEAK]
EXPORT DMA1_Channel1_IRQHandler [WEAK]
EXPORT DMA1_Channel2_IRQHandler [WEAK]
EXPORT DMA1_Channel3_IRQHandler [WEAK]
EXPORT DMA1_Channel4_IRQHandler [WEAK]
EXPORT DMA1_Channel5_IRQHandler [WEAK]
EXPORT DMA1_Channel6_IRQHandler [WEAK]
EXPORT DMA1_Channel7_IRQHandler [WEAK]
EXPORT ADC1_2_IRQHandler [WEAK]
EXPORT CAN1_TX_IRQHandler [WEAK]
EXPORT CAN1_RX0_IRQHandler [WEAK]
EXPORT CAN1_RX1_IRQHandler [WEAK]
EXPORT CAN1_SCE_IRQHandler [WEAK]
EXPORT EXTI9_5_IRQHandler [WEAK]
EXPORT TIM1_BRK_TIM15_IRQHandler [WEAK]
EXPORT TIM1_UP_TIM16_IRQHandler [WEAK]
EXPORT TIM1_TRG_COM_TIM17_IRQHandler [WEAK]
EXPORT TIM1_CC_IRQHandler [WEAK]
EXPORT TIM2_IRQHandler [WEAK]
EXPORT TIM3_IRQHandler [WEAK]
EXPORT TIM4_IRQHandler [WEAK]
EXPORT I2C1_EV_IRQHandler [WEAK]
EXPORT I2C1_ER_IRQHandler [WEAK]
EXPORT I2C2_EV_IRQHandler [WEAK]
EXPORT I2C2_ER_IRQHandler [WEAK]
EXPORT SPI1_IRQHandler [WEAK]
EXPORT SPI2_IRQHandler [WEAK]
EXPORT USART1_IRQHandler [WEAK]
EXPORT USART2_IRQHandler [WEAK]
EXPORT USART3_IRQHandler [WEAK]
EXPORT EXTI15_10_IRQHandler [WEAK]
EXPORT RTC_Alarm_IRQHandler [WEAK]
EXPORT DFSDM1_FLT3_IRQHandler [WEAK]
EXPORT TIM8_BRK_IRQHandler [WEAK]
EXPORT TIM8_UP_IRQHandler [WEAK]
EXPORT TIM8_TRG_COM_IRQHandler [WEAK]
EXPORT TIM8_CC_IRQHandler [WEAK]
EXPORT ADC3_IRQHandler [WEAK]
EXPORT FMC_IRQHandler [WEAK]
EXPORT SDMMC1_IRQHandler [WEAK]
EXPORT TIM5_IRQHandler [WEAK]
EXPORT SPI3_IRQHandler [WEAK]
EXPORT UART4_IRQHandler [WEAK]
EXPORT UART5_IRQHandler [WEAK]
EXPORT TIM6_DAC_IRQHandler [WEAK]
EXPORT TIM7_IRQHandler [WEAK]
EXPORT DMA2_Channel1_IRQHandler [WEAK]
EXPORT DMA2_Channel2_IRQHandler [WEAK]
EXPORT DMA2_Channel3_IRQHandler [WEAK]
EXPORT DMA2_Channel4_IRQHandler [WEAK]
EXPORT DMA2_Channel5_IRQHandler [WEAK]
EXPORT DFSDM1_FLT0_IRQHandler [WEAK]
EXPORT DFSDM1_FLT1_IRQHandler [WEAK]
EXPORT DFSDM1_FLT2_IRQHandler [WEAK]
EXPORT COMP_IRQHandler [WEAK]
EXPORT LPTIM1_IRQHandler [WEAK]
EXPORT LPTIM2_IRQHandler [WEAK]
EXPORT OTG_FS_IRQHandler [WEAK]
EXPORT DMA2_Channel6_IRQHandler [WEAK]
EXPORT DMA2_Channel7_IRQHandler [WEAK]
EXPORT LPUART1_IRQHandler [WEAK]
EXPORT QUADSPI_IRQHandler [WEAK]
EXPORT I2C3_EV_IRQHandler [WEAK]
EXPORT I2C3_ER_IRQHandler [WEAK]
EXPORT SAI1_IRQHandler [WEAK]
EXPORT SAI2_IRQHandler [WEAK]
EXPORT SWPMI1_IRQHandler [WEAK]
EXPORT TSC_IRQHandler [WEAK]
EXPORT LCD_IRQHandler [WEAK]
EXPORT RNG_IRQHandler [WEAK]
EXPORT FPU_IRQHandler [WEAK]
EXPORT CRS_IRQHandler [WEAK]
EXPORT I2C4_EV_IRQHandler [WEAK]
EXPORT I2C4_ER_IRQHandler [WEAK]
EXPORT DCMI_IRQHandler [WEAK]
EXPORT CAN2_TX_IRQHandler [WEAK]
EXPORT CAN2_RX0_IRQHandler [WEAK]
EXPORT CAN2_RX1_IRQHandler [WEAK]
EXPORT CAN2_SCE_IRQHandler [WEAK]
EXPORT DMA2D_IRQHandler [WEAK]
WWDG_IRQHandler
PVD_PVM_IRQHandler
TAMP_STAMP_IRQHandler
RTC_WKUP_IRQHandler
FLASH_IRQHandler
RCC_IRQHandler
EXTI0_IRQHandler
EXTI1_IRQHandler
EXTI2_IRQHandler
EXTI3_IRQHandler
EXTI4_IRQHandler
DMA1_Channel1_IRQHandler
DMA1_Channel2_IRQHandler
DMA1_Channel3_IRQHandler
DMA1_Channel4_IRQHandler
DMA1_Channel5_IRQHandler
DMA1_Channel6_IRQHandler
DMA1_Channel7_IRQHandler
ADC1_2_IRQHandler
CAN1_TX_IRQHandler
CAN1_RX0_IRQHandler
CAN1_RX1_IRQHandler
CAN1_SCE_IRQHandler
EXTI9_5_IRQHandler
TIM1_BRK_TIM15_IRQHandler
TIM1_UP_TIM16_IRQHandler
TIM1_TRG_COM_TIM17_IRQHandler
TIM1_CC_IRQHandler
TIM2_IRQHandler
TIM3_IRQHandler
TIM4_IRQHandler
I2C1_EV_IRQHandler
I2C1_ER_IRQHandler
I2C2_EV_IRQHandler
I2C2_ER_IRQHandler
SPI1_IRQHandler
SPI2_IRQHandler
USART1_IRQHandler
USART2_IRQHandler
USART3_IRQHandler
EXTI15_10_IRQHandler
RTC_Alarm_IRQHandler
DFSDM1_FLT3_IRQHandler
TIM8_BRK_IRQHandler
TIM8_UP_IRQHandler
TIM8_TRG_COM_IRQHandler
TIM8_CC_IRQHandler
ADC3_IRQHandler
FMC_IRQHandler
SDMMC1_IRQHandler
TIM5_IRQHandler
SPI3_IRQHandler
UART4_IRQHandler
UART5_IRQHandler
TIM6_DAC_IRQHandler
TIM7_IRQHandler
DMA2_Channel1_IRQHandler
DMA2_Channel2_IRQHandler
DMA2_Channel3_IRQHandler
DMA2_Channel4_IRQHandler
DMA2_Channel5_IRQHandler
DFSDM1_FLT0_IRQHandler
DFSDM1_FLT1_IRQHandler
DFSDM1_FLT2_IRQHandler
COMP_IRQHandler
LPTIM1_IRQHandler
LPTIM2_IRQHandler
OTG_FS_IRQHandler
DMA2_Channel6_IRQHandler
DMA2_Channel7_IRQHandler
LPUART1_IRQHandler
QUADSPI_IRQHandler
I2C3_EV_IRQHandler
I2C3_ER_IRQHandler
SAI1_IRQHandler
SAI2_IRQHandler
SWPMI1_IRQHandler
TSC_IRQHandler
LCD_IRQHandler
RNG_IRQHandler
FPU_IRQHandler
CRS_IRQHandler
I2C4_EV_IRQHandler
I2C4_ER_IRQHandler
DCMI_IRQHandler
CAN2_TX_IRQHandler
CAN2_RX0_IRQHandler
CAN2_RX1_IRQHandler
CAN2_SCE_IRQHandler
DMA2D_IRQHandler
B .
ENDP
ALIGN
;*******************************************************************************
; User Stack and Heap initialization
;*******************************************************************************
IF :DEF:__MICROLIB
EXPORT __initial_sp
EXPORT __heap_base
EXPORT __heap_limit
ELSE
IMPORT __use_two_region_memory
EXPORT __user_initial_stackheap
__user_initial_stackheap
LDR R0, = Heap_Mem
LDR R1, =(Stack_Mem + Stack_Size)
LDR R2, = (Heap_Mem + Heap_Size)
LDR R3, = Stack_Mem
BX LR
ALIGN
ENDIF
END
;************************ (C) COPYRIGHT STMicroelectronics *****END OF FILE*****

View File

@@ -0,0 +1,77 @@
// File: STM32L4x5_4x6.dbgconf
// Version: 1.0.0
// Note: refer to STM32L4x5 and STM32L4x6 Reference manual (RM0351)
// refer to STM32L475xx STM32L476xx STM32L486xx STM32L496xx STM32L4A6xx datasheets
// <<< Use Configuration Wizard in Context Menu >>>
// <h> Debug MCU configuration register (DBGMCU_CR)
// <o.2> DBG_STANDBY <i> Debug Standby mode
// <o.1> DBG_STOP <i> Debug Stop mode
// <o.0> DBG_SLEEP <i> Debug Sleep mode
// </h>
DbgMCU_CR = 0x00000007;
// <h> Debug MCU APB1 freeze register1 (DBGMCU_APB1FZR1)
// <i> Reserved bits must be kept at reset value
// <o.31> DBG_LPTIM1_STOP <i> LPTIM1 counter stopped when core is halted
// <o.26> DBG_CAN2_STOP <i> bxCAN2 stopped when core is halted
// <o.25> DBG_CAN1_STOP <i> bxCAN1 stopped when core is halted
// <o.23> DBG_I2C3_STOP <i> I2C3 SMBUS timeout counter stopped when core is halted
// <o.22> DBG_I2C2_STOP <i> I2C2 SMBUS timeout counter stopped when core is halted
// <o.21> DBG_I2C1_STOP <i> I2C1 SMBUS timeout counter stopped when core is halted
// <o.12> DBG_IWDG_STOP <i> Independent watchdog counter stopped when core is halted
// <o.11> DBG_WWDG_STOP <i> Window watchdog counter stopped when core is halted
// <o.10> DBG_RTC_STOP <i> RTC counter stopped when core is halted
// <o.5> DBG_TIM7_STOP <i> TIM7 counter stopped when core is halted
// <o.4> DBG_TIM6_STOP <i> TIM6 counter stopped when core is halted
// <o.3> DBG_TIM5_STOP <i> TIM5 counter stopped when core is halted
// <o.2> DBG_TIM4_STOP <i> TIM4 counter stopped when core is halted
// <o.1> DBG_TIM3_STOP <i> TIM3 counter stopped when core is halted
// <o.0> DBG_TIM2_STOP <i> TIM2 counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz1 = 0x00000000;
// <h> Debug MCU APB1 freeze register 2 (DBGMCU_APB1FZR2)
// <i> Reserved bits must be kept at reset value
// <o.5> DBG_LPTIM2_STOP <i> LPTIM2 counter stopped when core is halted
// <o.1> DBG_I2C4_STOP <i> I2C4 SMBUS timeout counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz2 = 0x00000000;
// <h> Debug MCU APB2 freeze register (DBGMCU_APB2FZR)
// <i> Reserved bits must be kept at reset value
// <o.18> DBG_TIM17_STOP <i> TIM17 counter stopped when core is halted
// <o.17> DBG_TIM16_STOP <i> TIM16 counter stopped when core is halted
// <o.16> DBG_TIM15_STOP <i> TIM15 counter stopped when core is halted
// <o.13> DBG_TIM8_STOP <i> TIM8 counter stopped when core is halted
// <o.11> DBG_TIM1_STOP <i> TIM1 counter stopped when core is halted
// </h>
DbgMCU_APB2_Fz = 0x00000000;
// <h> TPIU Pin Routing (TRACECLK fixed on Pin PE2)
// <i> TRACECLK: Pin PE2
// <o1> TRACED0
// <i> ETM Trace Data 0
// <0x00040003=> Pin PE3
// <0x00020001=> Pin PC1
// <o2> TRACED1
// <i> ETM Trace Data 1
// <0x00040004=> Pin PE4
// <0x0002000A=> Pin PC10
// <o3> TRACED2
// <i> ETM Trace Data 2
// <0x00040005=> Pin PE5
// <0x00030002=> Pin PD2
// <o4> TRACED3
// <i> ETM Trace Data 3
// <0x00040006=> Pin PE6
// <0x0002000C=> Pin PC12
// </h>
TraceClk_Pin = 0x00040002;
TraceD0_Pin = 0x00040003;
TraceD1_Pin = 0x00040004;
TraceD2_Pin = 0x00040005;
TraceD3_Pin = 0x00040006;
// <<< end of configuration section >>>

View File

@@ -0,0 +1,21 @@
/*
* Auto generated Run-Time-Environment Configuration File
* *** Do not modify ! ***
*
* Project: 'TencentOS_tiny'
* Target: 'TencentOS_tiny'
*/
#ifndef RTE_COMPONENTS_H
#define RTE_COMPONENTS_H
/*
* Define the Device Header File:
*/
#define CMSIS_device_header "stm32l4xx.h"
#endif /* RTE_COMPONENTS_H */

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,56 @@
<html>
<body>
<pre>
<h1><EFBFBD>Vision Build Log</h1>
<h2>Tool Versions:</h2>
IDE-Version: <20><>Vision V5.30.0.0
Copyright (C) 2020 ARM Ltd and ARM Germany GmbH. All rights reserved.
License Information: 1 2, 3, LIC=VGXG8-3EBEY-FWM2N-Y5VPW-1RV7D-LEXKU
Tool Versions:
Toolchain: MDK-ARM Plus Version: 5.30.0.0
Toolchain Path: D:\software\Keil\ARM\ARMCLANG\Bin
C Compiler: ArmClang.exe V6.14
Assembler: Armasm.exe V6.14
Linker/Locator: ArmLink.exe V6.14
Library Manager: ArmAr.exe V6.14
Hex Converter: FromElf.exe V6.14
CPU DLL: SARMCM3.DLL V5.30.0.0
Dialog DLL: DCM.DLL V1.17.3.0
Target DLL: STLink\ST-LINKIII-KEIL_SWO.dll V3.0.7.0
Dialog DLL: TCM.DLL V1.42.0.0
<h2>Project:</h2>
D:\Code\Project\tencentos\TencentOS-tiny\board\NUCLEO_STM32L496ZG\KEIL\tflitemicro_person_detection\TencentOS_tiny.uvprojx
Project File Date: 09/08/2021
<h2>Output:</h2>
*** Using Compiler 'V6.14', folder: 'D:\software\Keil\ARM\ARMCLANG\Bin'
Build target 'TencentOS_tiny'
"TencentOS_tiny\TencentOS_tiny.axf" - 0 Error(s), 0 Warning(s).
<h2>Software Packages used:</h2>
Package Vendor: ARM
http://www.keil.com/pack/ARM.CMSIS.5.7.0.pack
ARM.CMSIS.5.7.0
CMSIS (Cortex Microcontroller Software Interface Standard)
* Component: CORE Version: 5.4.0
Package Vendor: Keil
https://www.keil.com/pack/Keil.STM32L4xx_DFP.2.5.0.pack
Keil.STM32L4xx_DFP.2.5.0
STMicroelectronics STM32L4 Series Device Support, Drivers and Examples
<h2>Collection of Component include folders:</h2>
.\RTE\_TencentOS_tiny
D:\software\Keil\ARM\CMSIS\5.7.0\CMSIS\Core\Include
D:\software\Keil\Keil\STM32L4xx_DFP\2.5.0\Drivers\CMSIS\Device\ST\STM32L4xx\Include
<h2>Collection of Component Files used:</h2>
* Component: ARM::CMSIS:CORE:5.4.0
Build Time Elapsed: 00:00:02
</pre>
</body>
</html>

View File

@@ -0,0 +1,16 @@
; *************************************************************
; *** Scatter-Loading Description File generated by uVision ***
; *************************************************************
LR_IROM1 0x08000000 0x00100000 { ; load region size_region
ER_IROM1 0x08000000 0x00100000 { ; load address = execution address
*.o (RESET, +First)
*(InRoot$$Sections)
.ANY (+RO)
.ANY (+XO)
}
RW_IRAM1 0x20000000 0x00050000 { ; RW data
.ANY (+RW +ZI)
}
}

View File

@@ -0,0 +1,77 @@
// File: STM32L4x5_4x6.dbgconf
// Version: 1.0.0
// Note: refer to STM32L4x5 and STM32L4x6 Reference manual (RM0351)
// refer to STM32L475xx STM32L476xx STM32L486xx STM32L496xx STM32L4A6xx datasheets
// <<< Use Configuration Wizard in Context Menu >>>
// <h> Debug MCU configuration register (DBGMCU_CR)
// <o.2> DBG_STANDBY <i> Debug Standby mode
// <o.1> DBG_STOP <i> Debug Stop mode
// <o.0> DBG_SLEEP <i> Debug Sleep mode
// </h>
DbgMCU_CR = 0x00000007;
// <h> Debug MCU APB1 freeze register1 (DBGMCU_APB1FZR1)
// <i> Reserved bits must be kept at reset value
// <o.31> DBG_LPTIM1_STOP <i> LPTIM1 counter stopped when core is halted
// <o.26> DBG_CAN2_STOP <i> bxCAN2 stopped when core is halted
// <o.25> DBG_CAN1_STOP <i> bxCAN1 stopped when core is halted
// <o.23> DBG_I2C3_STOP <i> I2C3 SMBUS timeout counter stopped when core is halted
// <o.22> DBG_I2C2_STOP <i> I2C2 SMBUS timeout counter stopped when core is halted
// <o.21> DBG_I2C1_STOP <i> I2C1 SMBUS timeout counter stopped when core is halted
// <o.12> DBG_IWDG_STOP <i> Independent watchdog counter stopped when core is halted
// <o.11> DBG_WWDG_STOP <i> Window watchdog counter stopped when core is halted
// <o.10> DBG_RTC_STOP <i> RTC counter stopped when core is halted
// <o.5> DBG_TIM7_STOP <i> TIM7 counter stopped when core is halted
// <o.4> DBG_TIM6_STOP <i> TIM6 counter stopped when core is halted
// <o.3> DBG_TIM5_STOP <i> TIM5 counter stopped when core is halted
// <o.2> DBG_TIM4_STOP <i> TIM4 counter stopped when core is halted
// <o.1> DBG_TIM3_STOP <i> TIM3 counter stopped when core is halted
// <o.0> DBG_TIM2_STOP <i> TIM2 counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz1 = 0x00000000;
// <h> Debug MCU APB1 freeze register 2 (DBGMCU_APB1FZR2)
// <i> Reserved bits must be kept at reset value
// <o.5> DBG_LPTIM2_STOP <i> LPTIM2 counter stopped when core is halted
// <o.1> DBG_I2C4_STOP <i> I2C4 SMBUS timeout counter stopped when core is halted
// </h>
DbgMCU_APB1_Fz2 = 0x00000000;
// <h> Debug MCU APB2 freeze register (DBGMCU_APB2FZR)
// <i> Reserved bits must be kept at reset value
// <o.18> DBG_TIM17_STOP <i> TIM17 counter stopped when core is halted
// <o.17> DBG_TIM16_STOP <i> TIM16 counter stopped when core is halted
// <o.16> DBG_TIM15_STOP <i> TIM15 counter stopped when core is halted
// <o.13> DBG_TIM8_STOP <i> TIM8 counter stopped when core is halted
// <o.11> DBG_TIM1_STOP <i> TIM1 counter stopped when core is halted
// </h>
DbgMCU_APB2_Fz = 0x00000000;
// <h> TPIU Pin Routing (TRACECLK fixed on Pin PE2)
// <i> TRACECLK: Pin PE2
// <o1> TRACED0
// <i> ETM Trace Data 0
// <0x00040003=> Pin PE3
// <0x00020001=> Pin PC1
// <o2> TRACED1
// <i> ETM Trace Data 1
// <0x00040004=> Pin PE4
// <0x0002000A=> Pin PC10
// <o3> TRACED2
// <i> ETM Trace Data 2
// <0x00040005=> Pin PE5
// <0x00030002=> Pin PD2
// <o4> TRACED3
// <i> ETM Trace Data 3
// <0x00040006=> Pin PE6
// <0x0002000C=> Pin PC12
// </h>
TraceClk_Pin = 0x00040002;
TraceD0_Pin = 0x00040003;
TraceD1_Pin = 0x00040004;
TraceD2_Pin = 0x00040005;
TraceD3_Pin = 0x00040006;
// <<< end of configuration section >>>

View File

@@ -0,0 +1,21 @@
/*
* Auto generated Run-Time-Environment Configuration File
* *** Do not modify ! ***
*
* Project: 'TencentOS_tiny'
* Target: 'TencentOS_tiny'
*/
#ifndef RTE_COMPONENTS_H
#define RTE_COMPONENTS_H
/*
* Define the Device Header File:
*/
#define CMSIS_device_header "stm32l4xx.h"
#endif /* RTE_COMPONENTS_H */

File diff suppressed because one or more lines are too long

View File

@@ -203,7 +203,7 @@
<Group>
<GroupName>Application/User</GroupName>
<tvExp>1</tvExp>
<tvExp>0</tvExp>
<tvExpOptDlg>0</tvExpOptDlg>
<cbSel>0</cbSel>
<RteFlg>0</RteFlg>

View File

@@ -0,0 +1,134 @@
<html>
<body>
<pre>
<h1><EFBFBD>Vision Build Log</h1>
<h2>Tool Versions:</h2>
IDE-Version: <20><>Vision V5.30.0.0
Copyright (C) 2020 ARM Ltd and ARM Germany GmbH. All rights reserved.
License Information: 1 2, 3, LIC=VGXG8-3EBEY-FWM2N-Y5VPW-1RV7D-LEXKU
Tool Versions:
Toolchain: MDK-ARM Plus Version: 5.30.0.0
Toolchain Path: D:\software\Keil\ARM\ARMCLANG\Bin
C Compiler: ArmClang.exe V6.14
Assembler: Armasm.exe V6.14
Linker/Locator: ArmLink.exe V6.14
Library Manager: ArmAr.exe V6.14
Hex Converter: FromElf.exe V6.14
CPU DLL: SARMCM3.DLL V5.30.0.0
Dialog DLL: DCM.DLL V1.17.3.0
Target DLL: STLink\ST-LINKIII-KEIL_SWO.dll V3.0.7.0
Dialog DLL: TCM.DLL V1.42.0.0
<h2>Project:</h2>
D:\Code\Project\tencentos\TencentOS-tiny\board\NUCLEO_STM32L496ZG\KEIL\tflitemicro_speech_detection\TencentOS_tiny.uvprojx
Project File Date: 09/08/2021
<h2>Output:</h2>
*** Using Compiler 'V6.14', folder: 'D:\software\Keil\ARM\ARMCLANG\Bin'
Build target 'TencentOS_tiny'
assembling startup_stm32l496xx.s...
compiling sys.c...
compiling gpio.c...
compiling usart.c...
compiling stm32l4xx_hal_msp.c...
compiling main.c...
compiling stm32l4xx_it.c...
compiling mcu_init.c...
compiling dma.c...
compiling dcmi.c...
compiling i2c.c...
compiling spi.c...
compiling tim.c...
compiling stm32l4xx_hal_uart_ex.c...
compiling stm32l4xx_hal_uart.c...
compiling stm32l4xx_hal.c...
compiling stm32l4xx_hal_rcc.c...
compiling stm32l4xx_hal_i2c_ex.c...
compiling stm32l4xx_hal_rcc_ex.c...
compiling stm32l4xx_hal_flash.c...
compiling stm32l4xx_hal_flash_ramfunc.c...
compiling stm32l4xx_hal_flash_ex.c...
compiling stm32l4xx_hal_gpio.c...
compiling stm32l4xx_hal_dma_ex.c...
compiling stm32l4xx_hal_dma.c...
compiling stm32l4xx_hal_pwr.c...
compiling stm32l4xx_hal_i2c.c...
compiling stm32l4xx_hal_cortex.c...
compiling stm32l4xx_hal_pwr_ex.c...
compiling stm32l4xx_hal_exti.c...
compiling stm32l4xx_hal_tim_ex.c...
compiling stm32l4xx_hal_dcmi.c...
compiling stm32l4xx_hal_spi_ex.c...
compiling system_stm32l4xx.c...
assembling port_s.S...
compiling stm32l4xx_hal_tim.c...
compiling tos_cpu.c...
compiling port_c.c...
compiling stm32l4xx_hal_spi.c...
compiling tos_char_fifo.c...
compiling tos_completion.c...
compiling tos_event.c...
compiling tos_countdownlatch.c...
compiling tos_binary_heap.c...
compiling tos_global.c...
compiling tos_mail_queue.c...
compiling tos_message_queue.c...
compiling tos_priority_message_queue.c...
compiling tos_mmblk.c...
compiling tos_mutex.c...
compiling tos_pend.c...
compiling tos_priority_mail_queue.c...
compiling tos_mmheap.c...
compiling tos_ring_queue.c...
compiling tos_sys.c...
compiling tos_sched.c...
compiling tos_sem.c...
compiling tos_robin.c...
compiling tos_priority_queue.c...
compiling tos_tick.c...
compiling delay.c...
compiling tos_time.c...
compiling lcd_2inch4.c...
compiling cmsis_os.c...
compiling tos_timer.c...
compiling tos_task.c...
compiling no_micro_features_data.cc...
compiling lcd_config.c...
compiling model.cc...
compiling sccb.c...
compiling ov2640.c...
compiling tflitemicro_speech_detection.c...
compiling retarget.c...
compiling yes_micro_features_data.cc...
compiling micro_speech.cc...
linking...
Program Size: Code=170694 RO-data=35030 RW-data=72 ZI-data=59616
FromELF: creating hex file...
"TencentOS_tiny\TencentOS_tiny.axf" - 0 Error(s), 0 Warning(s).
<h2>Software Packages used:</h2>
Package Vendor: ARM
http://www.keil.com/pack/ARM.CMSIS.5.7.0.pack
ARM.CMSIS.5.7.0
CMSIS (Cortex Microcontroller Software Interface Standard)
* Component: CORE Version: 5.4.0
Package Vendor: Keil
https://www.keil.com/pack/Keil.STM32L4xx_DFP.2.5.0.pack
Keil.STM32L4xx_DFP.2.5.0
STMicroelectronics STM32L4 Series Device Support, Drivers and Examples
<h2>Collection of Component include folders:</h2>
.\RTE\_TencentOS_tiny
D:\software\Keil\ARM\CMSIS\5.7.0\CMSIS\Core\Include
D:\software\Keil\Keil\STM32L4xx_DFP\2.5.0\Drivers\CMSIS\Device\ST\STM32L4xx\Include
<h2>Collection of Component Files used:</h2>
* Component: ARM::CMSIS:CORE:5.4.0
Build Time Elapsed: 00:00:43
</pre>
</body>
</html>

View File

@@ -0,0 +1,16 @@
; *************************************************************
; *** Scatter-Loading Description File generated by uVision ***
; *************************************************************
LR_IROM1 0x08000000 0x00100000 { ; load region size_region
ER_IROM1 0x08000000 0x00100000 { ; load address = execution address
*.o (RESET, +First)
*(InRoot$$Sections)
.ANY (+RO)
.ANY (+XO)
}
RW_IRAM1 0x20000000 0x00050000 { ; RW data
.ANY (+RW +ZI)
}
}

203
components/ai/nnom/LICENSE Normal file
View File

@@ -0,0 +1,203 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -0,0 +1,39 @@
# Neural Network on Microcontroller (NNoM)
[![Build Status](https://travis-ci.com/majianjia/nnom.svg?branch=master)](https://travis-ci.com/majianjia/nnom)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![DOI](https://zenodo.org/badge/166869630.svg)](https://zenodo.org/badge/latestdoi/166869630)
NNoM is a high-level inference Neural Network library specifically for microcontrollers.
介绍详情可以参考https://github.com/majianjia/nnom
原作者提供了基于keras的训练模型方法以及如何配置NNoM的详细文档介绍
本项目提供一个基于NNoM的软件包方便在tos上的快捷移植测试通过平台为stm32l496ZG
mnist示例可以参考board/NUCLEO_STM32L496ZG/KEIL/nnom_mnist
## 在TencentOS-tiny上的使用说明
1. 在keil工程里添加components / ai / nnom中的src文件夹下的backends、core、layers三个文件夹中的全部.c文件
2. 在keil工程中包含inc和port文件夹中的全部头文件
3. 在nnom_port.h指定内存使用方法测试示例中开启了 NNOM_USING_STATIC_MEMORY宏 若使用非静态内存方法需要将nnom_malloc(n)和nnom_free(n)定义为os本身的内存api对tos是tos_mmheap_alloc(n)和tos_mmheap_free(n)
4. 若使用静态内存则需要定义static_buf[size]并使用nnom_set_static_buf(static_buf, sizeof(static_buf))函数去指定静态内存地址与大小,并根据模型需要调整静态内存大小。
5. 编写示例函数参考example/nnom_mnsit中的nnom_mnist_example写法按照需要实现系统api比如使用tos_systick_get()去获取系统tick从而计算推理时间。
## 注意事项
在keil下确认printf已经成功实现检查microlib选项并注意选择ARM Compiler为Use default compiler version 5
## Licenses
NNoM is released under Apache License 2.0 since nnom-V0.2.0.
License and copyright information can be found within the code.

View File

@@ -0,0 +1,96 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_ACTIVATION_H__
#define __NNOM_ACTIVATION_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// activation layer
typedef struct _nnom_activation_layer_t
{
nnom_layer_t super;
nnom_activation_t *act;
} nnom_activation_layer_t;
// activation with fixed q format (tanh and sigmoid)
typedef struct _nnom_activation_fixed_q_t
{
nnom_activation_t super;
uint8_t dec_bit;
} nnom_activation_fixed_q_t;
// leaky relu
typedef struct _nnom_activation_leaky_relu_t
{
nnom_activation_t super;
q7_t alpha; // alpha is present by q0.7 format. (-128 = -1)
} nnom_activation_leaky_relu_t;
// advance relu (full ReLU)
typedef struct _nnom_activation_adv_relu_t
{
nnom_activation_t super;
q7_t negative_slope; // negative_slope is present by q0.7 format. (-128 = -1)
float max; // cap of the max value
float threshold; // threshold
} nnom_activation_adv_relu_t;
// method
nnom_status_t activation_run(nnom_layer_t* layer);
nnom_status_t activation_free(nnom_layer_t *layer);
// activation delete
void act_delete(nnom_activation_t* act);
// a direct api on tensor
nnom_status_t act_tensor_run(nnom_activation_t* act, nnom_tensor_t* tensor);
// Layer API
nnom_layer_t *Activation(nnom_activation_t *act);
nnom_layer_t *ReLU(void);
nnom_layer_t *LeakyReLU(float alpha);
nnom_layer_t *AdvReLU(float alpha, float max, float threshold);
nnom_layer_t *Sigmoid(int32_t dec_bit);
nnom_layer_t *TanH(int32_t dec_bit);
// Activation API.
nnom_activation_t* act_relu(void);
nnom_activation_t* act_leaky_relu(float alpha);
nnom_activation_t* act_adv_relu(float negative_slope, float max, float threshold);
nnom_activation_t* act_tanh(int32_t dec_bit);
nnom_activation_t* act_sigmoid(int32_t dec_bit);
nnom_activation_t* act_hard_tanh(int32_t dec_bit);
nnom_activation_t* act_hard_sigmoid(int32_t dec_bit);
// utils
int32_t act_get_dec_bit(nnom_activation_type_t type, int32_t dec_bit);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_ACTIVATION_H__ */

View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_AVGPOOL_H__
#define __NNOM_AVGPOOL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_maxpool.h"
// Avg Pooling
typedef nnom_maxpool_layer_t nnom_avgpool_layer_t;
// method
nnom_status_t avgpooling_build(nnom_layer_t *layer);
nnom_status_t avgpool_run(nnom_layer_t *layer);
// API
nnom_layer_t *avgpool_s(const nnom_pool_config_t * config);
nnom_layer_t *AvgPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_AVGPOOL_H__ */

View File

@@ -0,0 +1,43 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_BASELAYER_H__
#define __NNOM_BASELAYER_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_input.h"
// method
nnom_status_t default_build(nnom_layer_t *layer);
nnom_status_t default_run(nnom_layer_t *layer);
// API
nnom_layer_t *baselayer_s(const nnom_layer_config_t * config);
nnom_layer_t *BaseLayer(void);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_BASELAYER_H__ */

View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_CONCAT_H__
#define __NNOM_CONCAT_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// concatenate layer
typedef struct _nnom_concat_layer
{
nnom_layer_t super;
int8_t axis;
} nnom_concat_layer_t;
typedef struct _nnom_concat_config_t
{
nnom_layer_config_t super;
int8_t axis;
} nnom_concat_config_t;
// method
nnom_status_t concat_build(nnom_layer_t *layer);
nnom_status_t concat_run(nnom_layer_t *layer);
// API
nnom_layer_t *concat_s(const nnom_concat_config_t *config);
nnom_layer_t *Concat(int8_t axis);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_CONCAT_H__ */

View File

@@ -0,0 +1,83 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_CONV2D_H__
#define __NNOM_CONV2D_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// child layers parameters
typedef struct _nnom_conv2d_layer_t
{
nnom_layer_t super;
nnom_3d_shape_t kernel;
nnom_3d_shape_t stride;
nnom_3d_shape_t pad;
nnom_3d_shape_t dilation;
nnom_padding_t padding_type;
uint32_t filter_mult; // filter size (for conv) or multilplier (for depthwise)
nnom_tensor_t *weight;
nnom_tensor_t *bias;
// test
nnom_qformat_param_t * output_rshift;
nnom_qformat_param_t * bias_lshift;
} nnom_conv2d_layer_t;
// a machine interface for configuration
typedef struct _nnom_conv2d_config_t
{
nnom_layer_config_t super;
nnom_qtype_t qtype; //quantisation type(per channel or per layer)
nnom_tensor_t *weight;
nnom_tensor_t *bias;
nnom_qformat_param_t *output_shift;
nnom_qformat_param_t *bias_shift;
uint32_t filter_size;
int8_t kernel_size[2];
int8_t stride_size[2];
int8_t padding_size[2];
int8_t dilation_size[2];
nnom_padding_t padding_type;
} nnom_conv2d_config_t;
// method
nnom_status_t conv2d_run(nnom_layer_t *layer);
nnom_status_t conv2d_build(nnom_layer_t *layer);
nnom_status_t conv2d_free(nnom_layer_t *layer);
// utils
uint32_t conv_output_length(uint32_t input_length, uint32_t filter_size, nnom_padding_t padding, uint32_t stride, uint32_t dilation);
// API
nnom_layer_t *conv2d_s(const nnom_conv2d_config_t *config);
nnom_layer_t *Conv2D(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_CONV2D_H__ */

View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-30 Jianjia Ma The first version
*/
#ifndef __NNOM_DECONV2D_H__
#define __NNOM_DECONV2D_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_conv2d.h"
// child layers parameters
typedef nnom_conv2d_layer_t nnom_conv2d_trans_layer_t;
typedef nnom_conv2d_config_t nnom_conv2d_trans_config_t;
// method
nnom_status_t conv2d_trans_run(nnom_layer_t *layer);
nnom_status_t conv2d_trans_build(nnom_layer_t *layer);
// utils
uint32_t conv_trans_output_length(uint32_t input_length, uint32_t filter_size, nnom_padding_t padding, uint32_t stride, uint32_t dilation);
// API
nnom_layer_t *conv2d_trans_s(const nnom_conv2d_config_t *config);
nnom_layer_t *Conv2DTrans(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_DECONV2D_H__ */

View File

@@ -0,0 +1,48 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_CROPPING_H__
#define __NNOM_CROPPING_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_zero_padding.h"
// Cropping, same as zeropadding
typedef nnom_zero_padding_layer_t nnom_cropping_layer_t;
typedef nnom_zero_padding_config_t nnom_cropping_config_t;
// method
nnom_status_t cropping_build(nnom_layer_t *layer);
nnom_status_t cropping_run(nnom_layer_t *layer);
// API
nnom_layer_t * cropping_s(const nnom_cropping_config_t *config);
nnom_layer_t *Cropping(nnom_border_t pad);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_CROPPING_H__ */

View File

@@ -0,0 +1,63 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_DENSE_H__
#define __NNOM_DENSE_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
typedef struct _nnom_dense_layer_t
{
nnom_layer_t super;
size_t output_unit;
nnom_tensor_t *weight;
nnom_tensor_t *bias;
nnom_qformat_param_t *output_rshift;
nnom_qformat_param_t *bias_lshift;
} nnom_dense_layer_t;
// a machine interface for configuration
typedef struct _nnom_dense_config_t
{
nnom_layer_config_t super;
nnom_qtype_t qtype; //quantisation type(per channel or per layer)
nnom_tensor_t *weight;
nnom_tensor_t *bias;
nnom_qformat_param_t *output_shift;
nnom_qformat_param_t *bias_shift;
} nnom_dense_config_t;
// method
nnom_status_t dense_free(nnom_layer_t *layer);
nnom_status_t dense_build(nnom_layer_t *layer);
nnom_status_t dense_run(nnom_layer_t *layer);
// API
nnom_layer_t *dense_s(const nnom_dense_config_t *config);
nnom_layer_t *Dense(size_t output_unit, const nnom_weight_t *w, const nnom_bias_t *b);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_DENSE_H__ */

View File

@@ -0,0 +1,44 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_DW_CONV2D_H__
#define __NNOM_DW_CONV2D_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_conv2d.h"
// method
nnom_status_t dw_conv2d_build(nnom_layer_t *layer);
nnom_status_t dw_conv2d_run(nnom_layer_t *layer);
//API
nnom_layer_t *dw_conv2d_s(const nnom_conv2d_config_t *config);
nnom_layer_t *DW_Conv2D(uint32_t multiplier, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_DW_CONV2D_H__ */

View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_FLATTEN_H__
#define __NNOM_FLATTEN_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// no special parameters but we need it.
typedef struct _nnom_flatten_config_t{
nnom_layer_config_t super;
} nnom_flatten_config_t;
// method
nnom_status_t flatten_build(nnom_layer_t *layer);
nnom_status_t flatten_run(nnom_layer_t *layer);
// API
nnom_layer_t *flatten_s(const nnom_flatten_config_t *config);
nnom_layer_t *Flatten(void);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_FLATTEN_H__ */

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_GLOBAL_POOL_H__
#define __NNOM_GLOBAL_POOL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_maxpool.h"
typedef struct _nnom_global_pool_config_t
{
nnom_layer_config_t super;
int16_t output_shift;
}nnom_global_pool_config_t;
// method
nnom_status_t global_pool_build(nnom_layer_t *layer);
// API
nnom_layer_t * global_maxpool_s(const nnom_global_pool_config_t *config);
nnom_layer_t * global_avgpool_s(const nnom_global_pool_config_t *config);
nnom_layer_t * global_sumpool_s(const nnom_global_pool_config_t *config);
nnom_layer_t *GlobalMaxPool(void);
nnom_layer_t *GlobalAvgPool(void);
nnom_layer_t *GlobalSumPool(void);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_GLOBAL_POOL_H__ */

View File

@@ -0,0 +1,60 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-08-27 Jianjia Ma The first version
*/
#ifndef __NNOM_GRU_CELL_H__
#define __NNOM_GRU_CELL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "nnom_rnn.h"
#include "nnom_activation.h"
typedef struct _nnom_gru_cell_config_t
{
nnom_layer_config_t super;
nnom_tensor_t *weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t *bias;
nnom_qformat_param_t q_dec_z, q_dec_h; // z, r, h
uint16_t units;
} nnom_gru_cell_config_t;
typedef struct _nnom_gru_cell_t
{
nnom_rnn_cell_t super;
nnom_tensor_t* weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t* bias;
// decide later.
// z, r, h
nnom_qformat_param_t q_dec_z, q_dec_h;
nnom_qformat_param_t oshift_iw, oshift_hw, bias_shift;
} nnom_gru_cell_t;
// gru
nnom_rnn_cell_t *gru_cell_s(const nnom_gru_cell_config_t* config);
nnom_status_t gru_cell_free(nnom_rnn_cell_t* cell);
nnom_status_t gru_cell_build(nnom_rnn_cell_t* cell);
nnom_status_t gru_cell_run(nnom_rnn_cell_t* cell);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_GRU_CELL_H__ */

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_INPUT_H__
#define __NNOM_INPUT_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// IO layer
typedef struct _nnom_io_layer
{
nnom_layer_t super;
nnom_3d_shape_t shape;
nnom_qformat_param_t dec_bit;
void *buf; //input or output
} nnom_io_layer_t;
typedef struct _nnom_io_config_t
{
nnom_layer_config_t super;
nnom_tensor_t *tensor;
}nnom_io_config_t;
// method
nnom_status_t input_build(nnom_layer_t *layer);
nnom_status_t input_run(nnom_layer_t *layer);
// API
nnom_layer_t *input_s(const nnom_io_config_t* config);
nnom_layer_t *Input(nnom_3d_shape_t input_shape, void *p_buf);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_INPUT_H__ */

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_LAMBDA_H__
#define __NNOM_LAMBDA_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_input.h"
// lambda layer
typedef struct _nnom_lambda_layer_t
{
nnom_layer_t super;
void *parameters; // parameters for lambda
} nnom_lambda_layer_t;
// lambda layer
typedef struct _nnom_lambda_config_t
{
nnom_layer_config_t super;
nnom_status_t (*run_func_name)(nnom_layer_t *layer); // run method. required
nnom_status_t (*build_func_name)(nnom_layer_t *layer);// compute output buffer shape. can be left null, will call default_build()
nnom_status_t (*free_func_name)(nnom_layer_t *layer); // a callback to free private resources (comp buf not included) can be left null
void *parameters; // parameters for lambda
} nnom_lambda_config_t;
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_LAMBDA_H__ */

View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-08-24 Jianjia Ma The first version
*/
#ifndef __NNOM_LSTM_CELL_H__
#define __NNOM_LSTM_CELL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "nnom_rnn.h"
#include "nnom_activation.h"
// a machine interface for configuration
typedef struct _nnom_lstm_cell_config_t
{
nnom_layer_config_t super;
nnom_tensor_t *weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t *bias;
nnom_qformat_param_t q_dec_z, q_dec_h, q_dec_c; // z = iw + hw, c = cell state; h=output and memory
uint16_t units;
} nnom_lstm_cell_config_t;
typedef struct _nnom_lstm_cell_t
{
nnom_rnn_cell_t super;
nnom_tensor_t* weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t* bias;
// experimental,
// iw: input x weight
// hw: hidden state x recurrent weight
// h: hidden state (memor)
// c: cell state
nnom_qformat_param_t q_dec_z, q_dec_h, q_dec_c;
nnom_qformat_param_t oshift_iw, oshift_hw, oshift_zc, bias_shift;
} nnom_lstm_cell_t;
// LSTM
nnom_rnn_cell_t *lstm_cell_s(const nnom_lstm_cell_config_t* config);
nnom_status_t lstm_cell_free(nnom_rnn_cell_t* cell);
nnom_status_t lstm_cell_q7_q15_build(nnom_rnn_cell_t* cell);
nnom_status_t lstm_cell_q7_q15_run(nnom_rnn_cell_t* cell);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_LSTM_CELL_H__ */

View File

@@ -0,0 +1,63 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_MATRIX_H__
#define __NNOM_MATRIX_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// the maximum input layer hooked to this layer
#define MAX_INPUT_LAYER 8
// matrix layer
typedef struct _nnom_matrix_layer_t
{
nnom_layer_t super;
int16_t oshift; // output right shift
} nnom_matrix_layer_t;
typedef struct _nnom_matrix_config_t
{
nnom_layer_config_t super;
int16_t output_shift; // output right shift
} nnom_matrix_config_t;
// methods
nnom_layer_t* _same_shape_matrix_layer(void);
nnom_status_t add_run(nnom_layer_t *layer);
nnom_status_t sub_run(nnom_layer_t *layer);
nnom_status_t mult_run(nnom_layer_t *layer);
// API
nnom_layer_t *add_s(const nnom_matrix_config_t * config);
nnom_layer_t *sub_s(const nnom_matrix_config_t * config);
nnom_layer_t *mult_s(const nnom_matrix_config_t * config);
nnom_layer_t *Add(int16_t oshift);
nnom_layer_t *Sub(int16_t oshift);
nnom_layer_t *Mult(int16_t oshift);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_MATRIX_H__ */

View File

@@ -0,0 +1,63 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_MAXPOOL_H__
#define __NNOM_MAXPOOL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// Max Pooling
typedef struct _nnom_maxpool_layer_t
{
nnom_layer_t super;
nnom_3d_shape_t kernel;
nnom_3d_shape_t stride;
nnom_3d_shape_t pad;
nnom_padding_t padding_type;
int16_t output_shift; // reserve
} nnom_maxpool_layer_t;
// a machine interface for configuration
typedef struct _nnom_pool_config_t
{
nnom_layer_config_t super;
nnom_padding_t padding_type;
int16_t output_shift;
int8_t kernel_size[2];
int8_t stride_size[2];
int8_t num_dim;
} nnom_pool_config_t;
// method
nnom_status_t maxpool_build(nnom_layer_t *layer);
nnom_status_t maxpool_run(nnom_layer_t *layer);
// API
nnom_layer_t *maxpool_s(const nnom_pool_config_t * config);
nnom_layer_t *MaxPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_MATRIX_H__ */

View File

@@ -0,0 +1,43 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_OUTPUT_H__
#define __NNOM_OUTPUT_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_input.h"
// method
nnom_status_t output_build(nnom_layer_t *layer);
nnom_status_t output_run(nnom_layer_t *layer);
// API
nnom_layer_t *output_s(const nnom_io_config_t* config);
nnom_layer_t *Output(nnom_3d_shape_t output_shape, void *p_buf);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_OUTPUT_H__ */

View File

@@ -0,0 +1,85 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_RNN_H__
#define __NNOM_RNN_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// a machine interface for configuration
typedef struct _nnom_rnn_config_t
{
nnom_layer_config_t super;
bool return_sequence;
bool stateful;
bool go_backwards;
} nnom_rnn_config_t;
// RNN cell base type
typedef struct _nnom_rnn_cell_t
{
nnom_status_t (*run)(struct _nnom_rnn_cell_t* cell); // cell runner
nnom_status_t (*build)(struct _nnom_rnn_cell_t* cell); // cell builder, calculate buffer size, output data size
nnom_status_t (*free)(struct _nnom_rnn_cell_t* cell); //
nnom_layer_t *layer; // pointer to its layer holder
nnom_layer_config_t *config; // config for the cell event it is a layer type
nnom_rnn_cell_type_t type;
void *in_data; // input data
void *out_data; // output data
void *in_state; // input state data (or hidden state)
void *out_state; // output state data
size_t comp_buf_size; // the size of temporary buffer.
size_t state_size; // the size of hidden state
uint16_t units; // the output units
uint16_t feature_size; // the input feature size (vector size)
size_t macc; // stat of MAC count.
} nnom_rnn_cell_t;
typedef struct _nnom_rnn_layer_t
{
nnom_layer_t super;
nnom_rnn_cell_t *cell;
void *state_buf; // memory allocated to store state, size = 2 x size of state required by cell.
uint16_t timestamp_size;// size of timestamp
bool return_sequence; // whether to return the output for each unit (sequence)
bool stateful; // whether the states are kept after one inteference
bool go_backwards; // whether go backwards timestamping
} nnom_rnn_layer_t;
// rnn layer
nnom_layer_t *rnn_s(nnom_rnn_cell_t *cell, const nnom_rnn_config_t* config);
nnom_status_t rnn_run(nnom_layer_t* layer);
nnom_status_t rnn_build(nnom_layer_t* layer);
nnom_status_t rnn_free(nnom_layer_t* layer);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_RNN_H__ */

View File

@@ -0,0 +1,86 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-08-20 Jianjia Ma The first version
*/
#ifndef __NNOM_SIMPLE_CELL_H__
#define __NNOM_SIMPLE_CELL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "nnom_rnn.h"
#include "nnom_activation.h"
// This Simple Cell replicate the Keras's SimpleCell as blow
/*
def call(self, inputs, states, training=None):
prev_output = states[0] if nest.is_sequence(states) else states
h = K.dot(inputs, self.kernel)
h = K.bias_add(h, self.bias)
output = h + K.dot(prev_output, self.recurrent_kernel)
output = self.activation(output)
new_state = [output] if nest.is_sequence(states) else output
return output, new_state
*/
// a machine interface for configuration
typedef struct _nnom_simple_cell_config_t
{
nnom_layer_config_t super;
nnom_tensor_t *weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t *bias;
nnom_qformat_param_t q_dec_iw, q_dec_hw, q_dec_h;
nnom_activation_type_t act_type; // type of the activation
uint16_t units;
} nnom_simple_cell_config_t;
typedef struct _nnom_simple_cell_t
{
nnom_rnn_cell_t super;
nnom_activation_type_t act_type;
nnom_tensor_t* weights;
nnom_tensor_t* recurrent_weights;
nnom_tensor_t* bias;
// experimental,
// iw: input x weight
// hw: hidden state x recurrent weight
// h: hidden state
nnom_qformat_param_t q_dec_iw, q_dec_hw, q_dec_h;
nnom_qformat_param_t oshift_iw, oshift_hw, bias_shift;
} nnom_simple_cell_t;
// RNN cells
// The shape for RNN input is (batch, timestamp, feature), where batch is always 1.
//
// SimpleCell
nnom_rnn_cell_t *simple_cell_s(const nnom_simple_cell_config_t* config);
nnom_status_t simple_cell_free(nnom_rnn_cell_t* cell);
nnom_status_t simple_cell_build(nnom_rnn_cell_t* cell);
nnom_status_t simple_cell_run(nnom_rnn_cell_t* cell);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_SIMPLE_CELL_H__ */

View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_SOFTMAX_H__
#define __NNOM_SOFTMAX_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
typedef struct _nnom_softmax_config_t
{
nnom_layer_config_t super;
} nnom_softmax_config_t;
// method
nnom_status_t softmax_run(nnom_layer_t *layer);
nnom_status_t softmax_build(nnom_layer_t *layer);
// API
nnom_layer_t *softmax_s(const nnom_softmax_config_t * config);
nnom_layer_t *Softmax(void);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_SOFTMAX_H__ */

View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_SUMPOOL_H__
#define __NNOM_SUMPOOL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
#include "layers/nnom_maxpool.h"
// Sum Pooling
typedef nnom_maxpool_layer_t nnom_sumpool_layer_t;
// method
nnom_status_t sumpool_build(nnom_layer_t *layer);
nnom_status_t sumpool_run(nnom_layer_t *layer);
// API
nnom_layer_t *sumpool_s(const nnom_pool_config_t * config);
nnom_layer_t *SumPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_SUMPOOL_H__ */

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_UPSAMPLE_H__
#define __NNOM_UPSAMPLE_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
// Up Sampling layer (UnPooling)
typedef struct _nnom_upsample_layer_t
{
nnom_layer_t super;
nnom_3d_shape_t kernel;
} nnom_upsample_layer_t;
typedef struct _nnom_upsample_config_t
{
nnom_layer_config_t super;
nnom_shape_data_t kernel[2];
} nnom_upsample_config_t;
// API
nnom_layer_t *upsample_s(const nnom_upsample_config_t *config);
nnom_layer_t *UpSample(nnom_3d_shape_t kernel);
// Methods
nnom_status_t upsample_build(nnom_layer_t *layer);
nnom_status_t upsample_run(nnom_layer_t *layer);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_UPSAMPLE_H__ */

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-03 Jianjia Ma The first version
*/
#ifndef __NNOM_ZERO_PADDING_H__
#define __NNOM_ZERO_PADDING_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_layers.h"
#include "nnom_local.h"
#include "nnom_tensor.h"
typedef struct _nnom_zero_padding_config_t
{
nnom_layer_config_t super;
nnom_border_t pad;
} nnom_zero_padding_config_t;
// zero padding
typedef struct _nnom_zero_padding_layer_t
{
nnom_layer_t super;
nnom_border_t pad;
} nnom_zero_padding_layer_t;
// API
nnom_layer_t *zeropadding_s(const nnom_zero_padding_config_t* config);
nnom_layer_t *ZeroPadding(nnom_border_t pad);
// method
nnom_status_t zero_padding_build(nnom_layer_t *layer);
nnom_status_t zero_padding_run(nnom_layer_t *layer);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_ZERO_PADDING_H__ */

View File

@@ -0,0 +1,413 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
* 2019-02-10 Jianjia Ma Compiler supports dense net connection
*/
#ifndef __NNOM_H__
#define __NNOM_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <stdarg.h>
#include <math.h>
#include "nnom_port.h"
#define NNOM_ALIGN (sizeof(char*)) // alignment when doing memory ops. Equal to size of pointer in byte.
#define q7_t int8_t
#define q15_t int16_t
#define q31_t int32_t
#define q63_t int64_t
/* version */
#define NNOM_MAJORVERSION 0 /**< major version number */
#define NNOM_SUBVERSION 4 /**< minor version number */
#define NNOM_REVISION 3 /**< revise version number */
#define NNOM_VERSION ((NNOM_MAJORVERSION * 10000) + (NNOM_SUBVERSION * 100) + NNOM_REVISION)
#ifdef ARM_NN_TRUNCATE
#define NNOM_TRUNCATE
#endif
#ifndef NNOM_TRUNCATE
#define NNOM_ROUND(out_shift) ((0x1 << out_shift) >> 1 )
#else
#define NNOM_ROUND(out_shift) 0
#endif
typedef enum
{
NN_SUCCESS = 0, /**< No error */
NN_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */
NN_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */
NN_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation. */
NN_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */
NN_SINGULAR = -5, /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
NN_TEST_FAILURE = -6, /**< Test Failed */
NN_NO_MEMORY = -7,
NN_MORE_TODO = -8
} nnom_status_t;
typedef enum
{
NNOM_INVALID = 0,
NNOM_BASE,
NNOM_INPUT,
NNOM_OUTPUT,
NNOM_CONV_2D,
NNOM_DW_CONV_2D,
NNOM_CONV2D_TRANS,
NNOM_BATCHNORM,
NNOM_DENSE,
NNOM_ZERO_PADDING,
NNOM_CROPPING,
NNOM_RNN,
NNOM_ACTIVATION,
NNOM_RELU,
NNOM_LEAKY_RELU,
NNOM_ADV_RELU,
NNOM_SIGMOID,
NNOM_TANH,
NNOM_SOFTMAX,
NNOM_MAXPOOL,
NNOM_GLOBAL_MAXPOOL,
NNOM_AVGPOOL,
NNOM_GLOBAL_AVGPOOL,
NNOM_SUMPOOL,
NNOM_GLOBAL_SUMPOOL,
NNOM_UPSAMPLE,
NNOM_FLATTEN,
NNOM_LAMBDA,
NNOM_CONCAT,
NNOM_ADD,
NNOM_SUB,
NNOM_MULT,
NNOM_TYPE_MAX
} nnom_layer_type_t;
#define DEFUALT_LAYER_NAMES \
{ \
"Unknown", \
"Base", \
"Input", \
"Output", \
"Conv2D", \
"DW_Conv2D", \
"Conv2DTrsp", \
"BatchNorm", \
"Dense", \
"ZeroPad", \
"Cropping", \
"RNN", \
"Activation", \
"ReLU", \
"Leaky_ReLU", \
"Adv_ReLU", \
"Sigmoid", \
"Tanh", \
"Softmax", \
"MaxPool", \
"GL_MaxPool", \
"AvgPool", \
"GL_AvgPool", \
"SumPool", \
"GL_SumPool", \
"UpSample", \
"Flatten", \
"Lambda", \
"Concat", \
"Add", \
"Sub", \
"Mult", \
}
extern const char default_layer_names[][12];
// We dont count softmax an activation here, softmax is instanced as a layer
typedef enum
{
ACT_UNKNOWN = 0,
ACT_RELU,
ACT_LEAKY_RELU,
ACT_ADV_RELU,
ACT_TANH,
ACT_SIGMOID,
ACT_HARD_TANH,
ACT_HARD_SIGMOID
} nnom_activation_type_t;
#define ACTIVATION_NAMES \
{ \
"Unknown", \
"ReLU", \
"LkyReLU", \
"AdvReLU", \
"TanH", \
"Sigmoid", \
"HrdTanH", \
"HrdSigd", \
}
extern const char default_activation_names[][8];
// RNN cell type
typedef enum
{
NNOM_UNKOWN_CELL = 0,
NNOM_SIMPLE_CELL,
NNOM_GRU_CELL,
NNOM_LSTM_CELL,
NNOM_CELL_TYPE_MAX
} nnom_rnn_cell_type_t;
#define DEFUALT_CELL_NAMES \
{ \
"Unknown", \
"Simple", \
"GRU", \
"LSTM", \
}
extern const char default_cell_names[][8];
// parameters
typedef enum
{
PADDING_VALID = 0,
PADDING_SAME
} nnom_padding_t;
#define NNOM_TENSOR_BUF_NULL (0) // This buffer is not in used
#define NNOM_TENSOR_BUF_TEMP (1) // The memory in IO is temporary occupided, can be reused by other layer once the computation is done.
#define NNOM_TENSOR_BUF_RESERVED (2) // the mem is reserve for this layer only (not to be reused by other layer.
// currently used in compiling.
#define NNOM_BUF_EMPTY (0)
#define NNOM_BUF_FILLED (1)
// basic types
#define nnom_qformat_param_t int32_t // this should match the backend, need a better way to do it.
#define nnom_shape_data_t uint16_t
typedef struct _nnom_3d_shape_t
{
nnom_shape_data_t h, w, c;
} nnom_3d_shape_t;
typedef struct _nnom_border_t
{
nnom_shape_data_t top, bottom, left, right;
} nnom_border_t;
// nnom_3d_shape_axis_t type provide the axis[] format access to nnom_3d_shape_t
typedef union {
nnom_3d_shape_t s;
nnom_shape_data_t axis[sizeof(nnom_3d_shape_t) / sizeof(nnom_shape_data_t)];
} nnom_3d_shape_axis_t;
// tensor quantisation types
typedef enum
{
NNOM_QTYPE_PER_TENSOR = 0,
NNOM_QTYPE_PER_AXIS = 1
} nnom_qtype_t;
typedef struct _nnom_weights
{
const void *p_value;
nnom_qformat_param_t shift;
} nnom_weight_t;
typedef struct _nnom_bias
{
const void *p_value;
nnom_qformat_param_t shift;
} nnom_bias_t;
// experimental
typedef struct _nnom_tensor_t
{
void* p_data; // value
nnom_shape_data_t *dim; // dimension of this tensor
nnom_qformat_param_t *q_dec; // number of decimal bit for Q format (scale)
nnom_qformat_param_t *q_offset; // offset for each channel
nnom_qtype_t qtype; // the quantisation type
uint8_t num_dim; // the number of dimension
uint8_t bitwidth; // the data bit width, only support 8bit now
} nnom_tensor_t;
// nn wrappers
typedef struct _nnom_layer_t nnom_layer_t;
typedef struct _nnom_layer_io_t nnom_layer_io_t;
typedef struct _nnom_layer_hook_t nnom_layer_hook_t;
typedef struct _nnom_mem_block_t nnom_mem_block_t;
// activation wrapper
typedef struct _nnom_activation_t nnom_activation_t;
typedef struct _nnom_buf
{
nnom_mem_block_t *mem;
size_t size;
uint8_t type;
} nnom_buf_t;
// a memory block to store pre-assign memories during compiling. then assigned to each tensor after.
struct _nnom_mem_block_t
{
void *blk; // data block location
size_t size; // the maximum size for this block
uint8_t owners; // how many layers own this block
uint8_t state; // empty? filled? for static nn, currently only used in compiling
};
typedef struct _nnom_stat_t
{
size_t macc; //num. of mac operation
uint32_t time;
} nnom_layer_stat_t;
struct _nnom_layer_hook_t
{
nnom_layer_io_t *io; // hooked io
nnom_layer_hook_t *next; // next hook include secondary hooked layer
};
struct _nnom_layer_io_t
{
nnom_layer_hook_t hook; // for example: (layer->out)--hook--(layer->in)
nnom_layer_io_t *aux; // point to auxilary I/O (multiple I/O layer)
nnom_tensor_t *tensor; // experimental
nnom_mem_block_t *mem; // memory blocks handles for compiling only. The memory are now pass by tensor. trying to remove it.
nnom_layer_t *owner; // which layer owns this io.
uint8_t type;
};
// structured configuration base type
typedef struct _nnom_layer_config_t
{
char* name; // the name of the layer prequantiesd model (the model trained by user before converted to nnom)
} nnom_layer_config_t;
// layers base
struct _nnom_layer_t
{
nnom_layer_t *shortcut; // shortcut points to the next layer, applied on compiling
nnom_status_t (*run)(nnom_layer_t *layer); // run method. required
nnom_status_t (*build)(nnom_layer_t *layer); // compute output buffer shape. can be left null, will call default_build()
nnom_status_t (*free)(nnom_layer_t *layer); // a callback to free private resources (comp buf not included) can be left null
nnom_buf_t *comp; // computational buf
nnom_activation_t *actail; // I have an activation, I have a tail, wooo haaaa, act-tail!!!
nnom_layer_config_t *config; // point to the configuration of the layers. for machine api only.
nnom_layer_type_t type; // layer types
nnom_layer_io_t *in; // IO buff, last*layer, states
nnom_layer_io_t *out; // IO buff, next*layer, states
nnom_layer_stat_t stat; // stats, timing, ops
};
// activation base
struct _nnom_activation_t
{
nnom_status_t (*run)(struct _nnom_activation_t *act);
nnom_tensor_t *tensor;
nnom_activation_type_t type;
};
// local static functions when libc is not available
#ifdef NNOM_USING_STATIC_MEMORY
void nnom_set_static_buf(void* buf, size_t size);
void *nnom_malloc(size_t size);
void nnom_free(void* p);
#endif //NNOM_USING_STATIC_BUF
typedef struct _nnom_model nnom_model_t;
#include "nnom_tensor.h"
#include "nnom_layers.h"
#include "nnom_utils.h"
// models, I dont want to make model class as a child of layer class yet
struct _nnom_model
{
nnom_layer_t *head;
nnom_layer_t *tail;
// model constructor
nnom_status_t (*add)(struct _nnom_model *m, nnom_layer_t *layer); // has too pass a raw value
nnom_layer_t *(*hook)(nnom_layer_t *curr, nnom_layer_t *last); // create hook between 2 layers' primary IO.
nnom_layer_t *(*merge)(nnom_layer_t *method, nnom_layer_t *in1, nnom_layer_t *in2); // an older interface of merge 2 inputs.
nnom_layer_t *(*mergex)(nnom_layer_t *method, int num, ...); // merge a few layers using mutiple input method (concate, add, ...)
nnom_layer_t *(*active)(nnom_activation_t *act, nnom_layer_t *target_layer); // add the activation to the existing layer's tail
// callback
nnom_status_t (*layer_callback)(nnom_model_t *m, nnom_layer_t *layer); // layer callback will be called after each layer(after actail).
// block memory for layers
nnom_mem_block_t blocks[NNOM_BLOCK_NUM];
size_t total_ops;
bool is_inited; // is this structure initialized
bool is_allocated; // is this structure allocated by nnom (not by user)
};
#define NNOM_NULL_CHECK(p) \
if ((p) == NULL) \
{ \
NNOM_LOG("Error: NULL object.\n"); \
return NN_ARGUMENT_ERROR; \
}
// utils
size_t nnom_alignto(size_t value, uint32_t alignment);
size_t nnom_io_length(nnom_layer_io_t *io);
size_t nnom_hook_length(nnom_layer_hook_t *hook);
// memory (malloc + memeset 0)
void *nnom_mem(size_t size);
// get how much memory has been taken
size_t nnom_mem_stat(void);
// Model APIs
// create or init a model
nnom_model_t *new_model(nnom_model_t *m);
// compile as sequencial model
nnom_status_t sequencial_compile(nnom_model_t *m);
// compile as functional model
nnom_status_t model_compile(nnom_model_t *m, nnom_layer_t *input, nnom_layer_t *output);
// run a prediction
nnom_status_t model_run(nnom_model_t *m);
// delete model.
void model_delete(nnom_model_t *m);
// check version
nnom_status_t check_model_version(unsigned long model_version);
// callback, called after each layer has finished the calculation.
// this callback must return NN_SUCCESS for continually run the model. otherwise, model will be returned with the ERROR code.
// this function return NN_LENGTH_ERROR if the callback is already set to other.
nnom_status_t model_set_callback(nnom_model_t *m, nnom_status_t (*layer_callback)(nnom_model_t *m, nnom_layer_t *layer));
// delete callback.
void model_delete_callback(nnom_model_t *m);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_H__ */

View File

@@ -0,0 +1,191 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
*/
#ifndef __NNOM_LAYERS_H__
#define __NNOM_LAYERS_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
// properties
nnom_3d_shape_t shape(size_t h, size_t w, size_t c);
nnom_3d_shape_t kernel(size_t h, size_t w);
nnom_3d_shape_t stride(size_t h, size_t w);
nnom_3d_shape_t dilation(size_t h, size_t w);
nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right);
//nnom_qformat_t qformat(int8_t m, int8_t n);
size_t shape_size(nnom_3d_shape_t* s);
// this function is to add a new IO to current inited IO
// input, the targeted IO that the new IO will be added to
// output , the new IO
nnom_layer_io_t* io_add_aux(nnom_layer_io_t* targeted_io);
nnom_layer_io_t *io_init(void *owner_layer, nnom_layer_io_t *io);
#define NN_CEILIF(x,y) ((x+y-1)/y)
#include "layers/nnom_activation.h"
#include "layers/nnom_concat.h"
#include "layers/nnom_conv2d.h"
#include "layers/nnom_cropping.h"
#include "layers/nnom_conv2d_trans.h"
#include "layers/nnom_dense.h"
#include "layers/nnom_dw_conv2d.h"
#include "layers/nnom_flatten.h"
#include "layers/nnom_global_pool.h"
#include "layers/nnom_input.h"
#include "layers/nnom_lambda.h"
#include "layers/nnom_matrix.h"
#include "layers/nnom_maxpool.h"
#include "layers/nnom_avgpool.h"
#include "layers/nnom_output.h"
#include "layers/nnom_rnn.h"
#include "layers/nnom_softmax.h"
#include "layers/nnom_sumpool.h"
#include "layers/nnom_upsample.h"
#include "layers/nnom_zero_padding.h"
#include "layers/nnom_rnn.h"
#include "layers/nnom_simple_cell.h"
#include "layers/nnom_lstm_cell.h"
#include "layers/nnom_gru_cell.h"
// Layer APIs ******
// (a summary for each individual layer's files)
// input/output
nnom_layer_t *Input(nnom_3d_shape_t input_shape, void *p_buf);
nnom_layer_t *Output(nnom_3d_shape_t output_shape, void *p_buf);
// Pooling
nnom_layer_t *MaxPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad);
nnom_layer_t *AvgPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad);
nnom_layer_t *SumPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad);
nnom_layer_t *GlobalMaxPool(void);
nnom_layer_t *GlobalAvgPool(void);
nnom_layer_t *GlobalSumPool(void);
// padding, cropping, upsample
nnom_layer_t *UpSample(nnom_3d_shape_t kernel);
nnom_layer_t *ZeroPadding(nnom_border_t pad);
nnom_layer_t *Cropping(nnom_border_t pad);
// Activation
nnom_layer_t *Activation(nnom_activation_t *act);
nnom_layer_t *ReLU(void);
nnom_layer_t *LeakyReLU(float alpha);
nnom_layer_t *Softmax(void);
nnom_layer_t *Sigmoid(int32_t dec_bit); // input dec bit
nnom_layer_t *TanH(int32_t dec_bit); // input dec bit
// Matrix
nnom_layer_t *Add(int16_t oshift); // output shift
nnom_layer_t *Sub(int16_t oshift); // output shift
nnom_layer_t *Mult(int16_t oshift); // output shift
nnom_layer_t *Flatten(void);
nnom_layer_t *Concat(int8_t axis);
// -- NN Constructers --
// conv2d
nnom_layer_t *Conv2D(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad,
const nnom_weight_t *w, const nnom_bias_t *b);
// deconv2d
nnom_layer_t *Conv2DTrans(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad,
const nnom_weight_t *w, const nnom_bias_t *b);
// depthwise_convolution
nnom_layer_t *DW_Conv2D(uint32_t multiplier, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad,
const nnom_weight_t *w, const nnom_bias_t *b);
// fully connected, dense
nnom_layer_t *Dense(size_t output_unit, const nnom_weight_t *w, const nnom_bias_t *b);
// Lambda Layers
nnom_layer_t *Lambda(nnom_status_t (*run)(nnom_layer_t *), // run method, required
nnom_status_t (*build)(nnom_layer_t *), // optional, call default_build() if left null
nnom_status_t (*free)(nnom_layer_t *), // not required if no resources needs to be deleted, can be left null.
void *parameters); // user private parameters for run method, left null if not needed.
// building methods
nnom_status_t default_build(nnom_layer_t* layer);
nnom_status_t input_build(nnom_layer_t* layer);
nnom_status_t conv2d_build(nnom_layer_t* layer);
nnom_status_t dw_conv2d_build(nnom_layer_t* layer);
nnom_status_t conv2d_trans_build(nnom_layer_t* layer);
nnom_status_t dense_build(nnom_layer_t* layer);
nnom_status_t rnn_build(nnom_layer_t* layer);
nnom_status_t upsample_build(nnom_layer_t* layer);
nnom_status_t zero_padding_build(nnom_layer_t* layer);
nnom_status_t cropping_build(nnom_layer_t* layer);
nnom_status_t maxpool_build(nnom_layer_t* layer);
nnom_status_t avgpool_build(nnom_layer_t* layer);
nnom_status_t sumpool_build(nnom_layer_t* layer);
nnom_status_t global_pool_build(nnom_layer_t* layer);
nnom_status_t flatten_build(nnom_layer_t* layer);
nnom_status_t concat_build(nnom_layer_t* layer);
// run
nnom_status_t input_run(nnom_layer_t* layer);
nnom_status_t output_run(nnom_layer_t* layer);
nnom_status_t flatten_run(nnom_layer_t* layer);
nnom_status_t default_run(nnom_layer_t* layer); // simply copy data from input to output
nnom_status_t dw_conv2d_run(nnom_layer_t* layer);
nnom_status_t conv2d_run(nnom_layer_t* layer);
nnom_status_t conv2d_trans_run(nnom_layer_t* layer);
nnom_status_t dense_run(nnom_layer_t* layer);
nnom_status_t rnn_run(nnom_layer_t* layer);
nnom_status_t upsample_run(nnom_layer_t* layer);
nnom_status_t zero_padding_run(nnom_layer_t* layer);
nnom_status_t cropping_run(nnom_layer_t* layer);
nnom_status_t activation_run(nnom_layer_t* layer);
nnom_status_t softmax_run(nnom_layer_t* layer);
nnom_status_t maxpool_run(nnom_layer_t* layer);
nnom_status_t avgpool_run(nnom_layer_t* layer);
nnom_status_t sumpool_run(nnom_layer_t* layer);
nnom_status_t concat_run(nnom_layer_t* layer);
nnom_status_t add_run(nnom_layer_t* layer);
nnom_status_t sub_run(nnom_layer_t* layer);
nnom_status_t mult_run(nnom_layer_t* layer);
// Activation APIs
// Softmax is not considered as activation in NNoM, Softmax is in layer API.
nnom_activation_t* act_relu(void);
nnom_activation_t* act_leaky_relu(float alpha);
nnom_activation_t* act_sigmoid(int32_t dec_bit);
nnom_activation_t* act_tanh(int32_t dec_bit);
// direct API
nnom_status_t act_tensor_run(nnom_activation_t* act, nnom_tensor_t* tensor);
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_LAYERS_H__ */

View File

@@ -0,0 +1,974 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Notice:
* Code in this file inlcudes derivative works from CMSIS, which is released under alternative license.
* Please check the LICENSE file for detial.
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
* 2019-03-19 Jianjia Ma Local C implementation partly from CMSIS-NN
*/
#ifndef __NNOM_LOCAL_H__
#define __NNOM_LOCAL_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "stdint.h"
#include "nnom_port.h"
#ifdef ARM_NN_TRUNCATE
#define NNOM_TRUNCATE
#endif
// SSAT implementation with C code
#ifndef __NNOM_SSAT
static inline int __NNOM_SSAT(int32_t value, int32_t bit) {
int32_t min = -(1<<(bit-1));
int32_t max = (1<<(bit-1)) - 1;
if (value < min)
return min;
else if (value > max)
return max;
else
return value;
}
#endif
// USAT implementation with C code
#ifndef __NNOM_USAT
static inline int __NNOM_USAT(int32_t value, int32_t bit) {
int32_t max = (1<<(bit-1)) - 1;
if (value < 0)
return 0;
else if (value > max)
return max;
else
return value;
}
#endif
#define MAX(A, B) ((A) > (B) ? (A) : (B))
#define MIN(A, B) ((A) < (B) ? (A) : (B))
// Those functions/tables below are partially modifed from CMSIS-NN lib
// https://github.com/ARM-software/CMSIS_5
//
void local_avepool_q7_HWC(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, NULL by now
q7_t *Im_out);
void local_avepool_q7_CHW(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, NULL by now
q7_t *Im_out);
// modified from CMSIS-NN test_ref
void local_maxpool_q7_HWC(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t * bufferA, // a buffer for local storage, NULL by now
q7_t * Im_out);
void local_maxpool_q7_CHW(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t * bufferA, // a buffer for local storage, NULL by now
q7_t * Im_out);
void local_sumpool_q7_HWC(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t * bufferA, // a buffer for local storage, size = 4*output_size
q7_t * Im_out);
void local_sumpool_q7_CHW(const q7_t * Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t * bufferA, // a buffer for local storage, size = 4*output_size
q7_t * Im_out);
// customised up sample pooling
void local_up_sampling_q7_HWC(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // NULL
q7_t *Im_out);
void local_up_sampling_q7_CHW(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // NULL
q7_t *Im_out);
void local_convolve_HWC_q7_nonsquare(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_convolve_CHW_q7_nonsquare(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_conv_trans_HWC_q7_nonsquare(const int8_t * Im_in,
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,// input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_depthwise_separable_conv_CHW_q7_nonsquare(const q7_t *Im_in,// input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_zero_padding_HWC_q7(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_zero_padding_CHW_q7(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_cropping_HWC_q7(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_cropping_CHW_q7(const q7_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q7_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_fully_connected_q7_opt(const q7_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, q7_t * pOut, // output operand
q15_t * vec_buffer);
void local_fully_connected_q7(const q7_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, q7_t * pOut, // output operand
q15_t * vec_buffer);
// matrix dot,
// it takes reorderd weight as input, (see dense layer for detail. this is basiclly a dense opt without bias)
void local_dot_q7_opt(const q7_t *pV, // pointer to vector
const q7_t *pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t out_shift, // amount of right-shift for output
q7_t *pOut); // result buffer
void local_dot_q7(const q7_t *pV, // pointer to vector
const q7_t *pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t out_shift, // amount of right-shift for output
q7_t *pOut); // output operand)
// softmax
void local_softmax_q7(const q7_t * vec_in, const uint32_t dim_vec, q7_t * p_out);
// sigmoid
void local_sigmoid_q7(q7_t * data, uint32_t size, int16_t int_width);
// tanh
void local_tanh_q7(q7_t * data, uint32_t size, int16_t int_width);
// relu
void local_relu_q7(q7_t * data, uint32_t size);
// leaky relu
void local_leaky_relu_q7(q7_t *data, q7_t alpha, uint32_t size);
// alpha in q7 format with dec_bit=7
// max and threshold has the same Q format with the activation
void local_adv_relu_q7(q7_t *data, q7_t alpha, q7_t max, q7_t threshold, uint32_t size);
// hard sigmoid,
// y=-1 if x < -2.5
// y=1 if x > 2.5
// otherwise y = 0.2 * x + 0.5 (y=0.20315 * x + 0.5)
void local_hard_sigmoid_q7(q7_t *data, uint32_t size, int16_t dec_bit);
// hard tanh
// y=-1 if x < -1
// y=1 if x > 1
// otherwise y = x
void local_hard_tanh_q7(q7_t *data, uint32_t size, int16_t dec_bit);
// matrix ops
void local_mult_q7(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// add
void local_add_q7(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// sub
void local_sub_q7(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// take multiple blocks (>2) as input
void local_multiple_add_q7( q7_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q7_t **p_src);
void local_multiple_mult_q7( q7_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q7_t **p_src);
void local_multiple_sub_q7( q7_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q7_t **p_src);
// Below tables credit to CMSIS
// For more info. check CMSIS-NN lib
// https://github.com/ARM-software/CMSIS_5/blob/develop/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c
static const q7_t nnom_sigmoid_table_q7[256] = {
0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e,
0x50, 0x52, 0x53, 0x55, 0x57, 0x59, 0x5a, 0x5c,
0x5e, 0x5f, 0x61, 0x62, 0x63, 0x65, 0x66, 0x67,
0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
0x71, 0x72, 0x72, 0x73, 0x74, 0x74, 0x75, 0x76,
0x76, 0x77, 0x77, 0x78, 0x78, 0x79, 0x79, 0x7a,
0x7a, 0x7a, 0x7b, 0x7b, 0x7b, 0x7c, 0x7c, 0x7c,
0x7c, 0x7c, 0x7d, 0x7d, 0x7d, 0x7d, 0x7d, 0x7e,
0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04,
0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06,
0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09,
0x0a, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, 0x0e, 0x0e,
0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
0x17, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f, 0x21,
0x22, 0x24, 0x26, 0x27, 0x29, 0x2b, 0x2d, 0x2e,
0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
};
static const q7_t nnom_tanh_table_q7[256] = {
0x00, 0x08, 0x10, 0x18, 0x1f, 0x27, 0x2e, 0x35,
0x3b, 0x41, 0x47, 0x4c, 0x51, 0x56, 0x5a, 0x5e,
0x61, 0x65, 0x68, 0x6a, 0x6d, 0x6f, 0x71, 0x72,
0x74, 0x75, 0x76, 0x78, 0x78, 0x79, 0x7a, 0x7b,
0x7b, 0x7c, 0x7c, 0x7d, 0x7d, 0x7e, 0x7e, 0x7e,
0x7e, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x81,
0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x82,
0x82, 0x82, 0x82, 0x82, 0x83, 0x83, 0x84, 0x84,
0x85, 0x85, 0x86, 0x87, 0x88, 0x88, 0x8a, 0x8b,
0x8c, 0x8e, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9b,
0x9f, 0xa2, 0xa6, 0xaa, 0xaf, 0xb4, 0xb9, 0xbf,
0xc5, 0xcb, 0xd2, 0xd9, 0xe1, 0xe8, 0xf0, 0xf8,
};
// ------------ 16bit ops --------------------
void local_avepool_q15_HWC(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_avepool_q15_CHW(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_maxpool_q15_HWC(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_maxpool_q15_CHW(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_sumpool_q15_HWC(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, size = 4*output_size
q15_t *Im_out);
void local_sumpool_q15_CHW(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t padding_x, // padding sizes
const uint16_t padding_y, // padding sizes
const uint16_t stride_x, // stride
const uint16_t stride_y, // stride
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
const uint16_t output_shift, // output right shift
q7_t *bufferA, // a buffer for local storage, size = 4*output_size
q15_t *Im_out);
void local_up_sampling_q15_HWC(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_up_sampling_q15_CHW(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimension x or W
const uint16_t dim_im_in_y, // input image dimension y or H
const uint16_t ch_im_in, // number of input image channels
const uint16_t dim_kernel_x, // window kernel size
const uint16_t dim_kernel_y, // window kernel size
const uint16_t dim_im_out_x, // output image dimension x or W
const uint16_t dim_im_out_y, // output image dimension y or H
q7_t *bufferA, // a buffer for local storage, NULL by now
q15_t *Im_out);
void local_convolve_HWC_q15_nonsquare(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_convolve_CHW_q15_nonsquare(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_conv_trans_HWC_q15_nonsquare(const int8_t * Im_in,
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_depthwise_separable_conv_HWC_q15_nonsquare(const q15_t *Im_in,// input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_depthwise_separable_conv_CHW_q15_nonsquare(const q15_t *Im_in,// input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const q7_t *wt, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const uint16_t dilation_x, // dilation x
const uint16_t dilation_y, // dilation y
const q7_t *bias, // bias
const nnom_qformat_param_t *bias_shift, // bias shifts
const nnom_qformat_param_t *out_shift, // output shift
const nnom_qtype_t q_type, // per channel or per tensor
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y, // output image dimension y
q15_t *bufferA, //buffer space for input
q7_t *bufferB //buffer space for output
);
void local_zero_padding_HWC_q15(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_zero_padding_CHW_q15(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_cropping_HWC_q15(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_cropping_CHW_q15(const q15_t *Im_in, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const uint16_t padding_top, // padding sizes y
const uint16_t padding_bottom, // padding sizes y
const uint16_t padding_left, // padding sizes x
const uint16_t padding_right, // padding sizes x
q15_t *Im_out, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y); // output image dimension y
void local_dot_q15(const q15_t *pV, // pointer to vector
const q15_t *pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t out_shift, // amount of right-shift for output
q15_t *pOut); // output operand)
void local_dot_q15_opt(const q15_t * pV,
const q15_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t out_shift,
q15_t * pOut);
// original implementation
// this support none bias, the it will perform like a dot.
// set the `bias=NULL` to work
void local_fully_connected_mat_q7_vec_q15(const q15_t * pV, // pointer to vector
const q7_t * pM, // pointer to matrix
const uint16_t dim_vec, // length of the vector
const uint16_t num_of_rows, // numCol of A
const uint16_t bias_shift, // amount of left-shift for bias
const uint16_t out_shift, // amount of right-shift for output
const q7_t * bias, // bias
q15_t * pOut, // output
q15_t * vec_buffer); // not used but to keep the interface same as the ARM's version
// work on recorder matrix
// this support none bias, set the bias=NULL to work
void local_fully_connected_mat_q7_vec_q15_opt(const q15_t * pV,
const q7_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t bias_shift,
const uint16_t out_shift,
const q7_t * bias,
q15_t * pOut,
q15_t * vec_buffer);
// matrix operation Q15
void local_multiple_add_q15( q15_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q15_t **p_src);
void local_multiple_mult_q15( q15_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q15_t **p_src);
void local_multiple_sub_q15( q15_t *p_dst,
const int16_t out_shift,
uint32_t block_size,
uint32_t num_block,
q15_t **p_src);
void local_mult_q15(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// add
void local_add_q15(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// sub
void local_sub_q15(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
// Convert Q7 to Q15
void local_q7_to_q15_no_shift(const q7_t *src, q15_t *des, uint32_t size);
void local_q7_to_q15(const q7_t *src, q15_t *des, uint32_t size);
// q15 shift to q7
void local_q15_to_q7(const q15_t *src, q7_t *des, uint32_t shift, uint32_t size);
// y = 1 - x
void local_1_minor_z_q15(q15_t *src, q15_t *des, uint16_t dec_bit, uint32_t size);
void local_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out);
void local_hard_sigmoid_q15(q15_t *data, uint32_t size, int16_t dec_bit);
void local_hard_tanh_q15(q15_t *data, uint32_t size, int16_t dec_bit);
void local_relu_q15(q15_t *data, uint32_t size);
void local_leaky_relu_q15(q15_t *data, q7_t alpha, uint32_t size);
void local_adv_relu_q15(q15_t *data, q7_t negative_slope, q15_t max, q15_t threshold, uint32_t size);
void local_sigmoid_q15(q15_t * data, uint32_t size, uint16_t int_width);
void local_tanh_q15(q15_t * data, uint32_t size, uint16_t int_width);
static const q15_t nnom_sigmoid_table_q15[256] = {
0x4000, 0x4200, 0x43ff, 0x45fc, 0x47f5, 0x49eb, 0x4bdc, 0x4dc8,
0x4fad, 0x518a, 0x5360, 0x552c, 0x56ef, 0x58a8, 0x5a57, 0x5bfb,
0x5d93, 0x5f20, 0x60a1, 0x6216, 0x637f, 0x64db, 0x662b, 0x676f,
0x68a6, 0x69d2, 0x6af1, 0x6c05, 0x6d0d, 0x6e09, 0x6efb, 0x6fe2,
0x70be, 0x7190, 0x7258, 0x7316, 0x73cc, 0x7478, 0x751b, 0x75b7,
0x764a, 0x76d6, 0x775b, 0x77d8, 0x784f, 0x78c0, 0x792a, 0x798f,
0x79ee, 0x7a48, 0x7a9d, 0x7aed, 0x7b39, 0x7b80, 0x7bc4, 0x7c03,
0x7c3f, 0x7c78, 0x7cad, 0x7ce0, 0x7d0f, 0x7d3c, 0x7d66, 0x7d8d,
0x7db3, 0x7dd6, 0x7df7, 0x7e16, 0x7e33, 0x7e4f, 0x7e69, 0x7e81,
0x7e98, 0x7eae, 0x7ec2, 0x7ed5, 0x7ee7, 0x7ef8, 0x7f08, 0x7f17,
0x7f25, 0x7f32, 0x7f3e, 0x7f4a, 0x7f55, 0x7f5f, 0x7f69, 0x7f72,
0x7f7b, 0x7f83, 0x7f8a, 0x7f91, 0x7f98, 0x7f9e, 0x7fa4, 0x7faa,
0x7faf, 0x7fb4, 0x7fb8, 0x7fbd, 0x7fc1, 0x7fc5, 0x7fc8, 0x7fcc,
0x7fcf, 0x7fd2, 0x7fd5, 0x7fd7, 0x7fda, 0x7fdc, 0x7fde, 0x7fe0,
0x7fe2, 0x7fe4, 0x7fe6, 0x7fe7, 0x7fe9, 0x7fea, 0x7feb, 0x7fed,
0x7fee, 0x7fef, 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff4,
0x000b, 0x000c, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
0x0012, 0x0013, 0x0015, 0x0016, 0x0017, 0x0019, 0x001a, 0x001c,
0x001e, 0x0020, 0x0022, 0x0024, 0x0026, 0x0029, 0x002b, 0x002e,
0x0031, 0x0034, 0x0038, 0x003b, 0x003f, 0x0043, 0x0048, 0x004c,
0x0051, 0x0056, 0x005c, 0x0062, 0x0068, 0x006f, 0x0076, 0x007d,
0x0085, 0x008e, 0x0097, 0x00a1, 0x00ab, 0x00b6, 0x00c2, 0x00ce,
0x00db, 0x00e9, 0x00f8, 0x0108, 0x0119, 0x012b, 0x013e, 0x0152,
0x0168, 0x017f, 0x0197, 0x01b1, 0x01cd, 0x01ea, 0x0209, 0x022a,
0x024d, 0x0273, 0x029a, 0x02c4, 0x02f1, 0x0320, 0x0353, 0x0388,
0x03c1, 0x03fd, 0x043c, 0x0480, 0x04c7, 0x0513, 0x0563, 0x05b8,
0x0612, 0x0671, 0x06d6, 0x0740, 0x07b1, 0x0828, 0x08a5, 0x092a,
0x09b6, 0x0a49, 0x0ae5, 0x0b88, 0x0c34, 0x0cea, 0x0da8, 0x0e70,
0x0f42, 0x101e, 0x1105, 0x11f7, 0x12f3, 0x13fb, 0x150f, 0x162e,
0x175a, 0x1891, 0x19d5, 0x1b25, 0x1c81, 0x1dea, 0x1f5f, 0x20e0,
0x226d, 0x2405, 0x25a9, 0x2758, 0x2911, 0x2ad4, 0x2ca0, 0x2e76,
0x3053, 0x3238, 0x3424, 0x3615, 0x380b, 0x3a04, 0x3c01, 0x3e00,
};
static const q15_t nnom_tanh_table_q15[256] = {
0x0000, 0x07fd, 0x0feb, 0x17b9, 0x1f59, 0x26bf, 0x2ddf, 0x34ae,
0x3b27, 0x4142, 0x46fd, 0x4c56, 0x514d, 0x55e2, 0x5a1a, 0x5df6,
0x617c, 0x64b0, 0x6797, 0x6a37, 0x6c95, 0x6eb5, 0x709e, 0x7254,
0x73dc, 0x753a, 0x7672, 0x7788, 0x787f, 0x795b, 0x7a1e, 0x7acb,
0x7b65, 0x7bee, 0x7c66, 0x7cd1, 0x7d30, 0x7d84, 0x7dce, 0x7e0f,
0x7e49, 0x7e7d, 0x7eaa, 0x7ed2, 0x7ef5, 0x7f14, 0x7f30, 0x7f48,
0x7f5e, 0x7f71, 0x7f82, 0x7f91, 0x7f9e, 0x7fa9, 0x7fb3, 0x7fbc,
0x7fc4, 0x7fcb, 0x7fd1, 0x7fd7, 0x7fdc, 0x7fe0, 0x7fe4, 0x7fe7,
0x7fea, 0x7fed, 0x7fef, 0x7ff1, 0x7ff3, 0x7ff4, 0x7ff6, 0x7ff7,
0x7ff8, 0x7ff9, 0x7ffa, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffc, 0x7ffd,
0x7ffd, 0x7ffd, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffe, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001,
0x8001, 0x8001, 0x8001, 0x8002, 0x8002, 0x8002, 0x8002, 0x8003,
0x8003, 0x8003, 0x8004, 0x8004, 0x8005, 0x8006, 0x8006, 0x8007,
0x8008, 0x8009, 0x800a, 0x800c, 0x800d, 0x800f, 0x8011, 0x8013,
0x8016, 0x8019, 0x801c, 0x8020, 0x8024, 0x8029, 0x802f, 0x8035,
0x803c, 0x8044, 0x804d, 0x8057, 0x8062, 0x806f, 0x807e, 0x808f,
0x80a2, 0x80b8, 0x80d0, 0x80ec, 0x810b, 0x812e, 0x8156, 0x8183,
0x81b7, 0x81f1, 0x8232, 0x827c, 0x82d0, 0x832f, 0x839a, 0x8412,
0x849b, 0x8535, 0x85e2, 0x86a5, 0x8781, 0x8878, 0x898e, 0x8ac6,
0x8c24, 0x8dac, 0x8f62, 0x914b, 0x936b, 0x95c9, 0x9869, 0x9b50,
0x9e84, 0xa20a, 0xa5e6, 0xaa1e, 0xaeb3, 0xb3aa, 0xb903, 0xbebe,
0xc4d9, 0xcb52, 0xd221, 0xd941, 0xe0a7, 0xe847, 0xf015, 0xf803,
};
#ifdef __cplusplus
}
#endif
#endif /* __NNOM_LOCAL_H__ */

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
* 2019-02-10 Jianjia Ma Compiler supports dense net connection
*/
#ifndef __NNOM_TENSOR_H__
#define __NNOM_TENSOR_H__
#ifdef __cplusplus
extern "C" {
#endif
#include "nnom.h"
void delete_tensor(nnom_tensor_t* t);
nnom_tensor_t* new_tensor(nnom_qtype_t type, uint32_t num_dim, uint32_t num_channel);
// set tensor by value
// for tensor with quantized type NNOM_QTYPE_PER_TENSOR
nnom_tensor_t* tensor_set_attr_v(nnom_tensor_t* t,
nnom_qformat_param_t dec_bit, nnom_qformat_param_t offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth);
nnom_tensor_t* tensor_set_attr(nnom_tensor_t* t,
nnom_qformat_param_t*dec_bit, nnom_qformat_param_t *offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth);
nnom_tensor_t* tensor_cpy_attr(nnom_tensor_t* des, nnom_tensor_t* src);
size_t tensor_get_num_channel(nnom_tensor_t* t);
size_t tensor_size(nnom_tensor_t* t);
size_t tensor_size_byte(nnom_tensor_t* t);
// only support 3d tensor
// change format from CHW to HWC
// the shape of the data, input data, output data
void tensor_hwc2chw_q7(nnom_tensor_t* des, nnom_tensor_t* src);
// change format from CHW to HWC
// the shape of the data, input data, output data
void tensor_chw2hwc_q7(nnom_tensor_t* des, nnom_tensor_t* src);
// deprecated.
void hwc2chw_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out);
void chw2hwc_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out);
#ifdef __cplusplus
}
#endif
#endif /*__NNOM_TENSOR_H__ */

View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
*/
#ifndef __NNOM_UTILS_H__
#define __NNOM_UTILS_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
typedef struct _nnom_predict_t
{
uint16_t *confusion_mat; // confusiong matrix
uint32_t *top_k; // which stored the num of prediction in rank_k, example: Top-2 = top_k[0]+top_k[1]
nnom_model_t *model; // the model to run
int8_t *buf_prediction; // the pointer to the output of softmax layer(normally the end of classifier).
// setting
uint32_t label_num; // number of types in classification
uint32_t top_k_size; // number of k that wants to know.
// running
uint32_t predict_count; // how many prediction is done
//timing
uint32_t t_run_total; // total running time
uint32_t t_predict_start; // when it is initial
uint32_t t_predict_total; // total time of the whole test
} nnom_predict_t;
// create a prediction
// input model, the buf pointer to the softwmax output (Temporary, this can be extract from model)
// the size of softmax output (the num of lable)
// the top k that wants to record.
nnom_predict_t *prediction_create(nnom_model_t *m, int8_t *buf_prediction, size_t label_num, size_t top_k_size); // currently int8_t
// after a new data is set in input
// feed data to prediction
// input the current label, (range from 0 to total number of label -1)
// (the current input data should be set by user manully to the input buffer of the model.)
// return NN_ARGUMENT_ERROR if parameter error
nnom_status_t prediction_run(nnom_predict_t *pre, uint32_t true_label, uint32_t* predict_label, float* prob);
// to mark prediction finished
void prediction_end(nnom_predict_t *pre);
// free all resources
void prediction_delete(nnom_predict_t *pre);
// print matrix
void prediction_matrix(nnom_predict_t *pre);
// print top-k
void prediction_top_k(nnom_predict_t *pre);
// this function is to print sumarry
void prediction_summary(nnom_predict_t *pre);
// -------------------------------
// stand alone prediction API
// this api test one set of data, return the prediction
// return the predicted label
// return NN_ARGUMENT_ERROR if parameter error
nnom_status_t nnom_predict(nnom_model_t *m, uint32_t *label, float *prob);
void model_stat(nnom_model_t *m);
void model_io_format(nnom_model_t *m);
#ifdef __cplusplus
}
#endif
#endif /*__NNOM_UTILS_H__ */

View File

@@ -0,0 +1,61 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
* 2021-09-08 derekduke add tos support
*/
#ifndef __NNOM_PORT_H__
#define __NNOM_PORT_H__
#include <stdlib.h>
#include <stdio.h>
/* use static memory */
#define NNOM_USING_STATIC_MEMORY // enable to use built in memory allocation on a large static memory block
// must set buf using "nnom_set_static_buf()" before creating a model.
/* dynamic memory interfaces */
/* when libc is not available, you shall implement the below memory interfaces (libc equivalents). */
#ifndef NNOM_USING_STATIC_MEMORY
//#define nnom_malloc(n) malloc(n)
//#define nnom_free(p) free(p)
#define nnom_malloc(n) tos_mmheap_alloc(n)
#define nnom_free(n) tos_mmheap_free(n)
#endif
/* memory interface */
/* when libc is not available, you shall implement your equivalent functions here */
#define nnom_memset(p,v,s) memset(p,v,s)
#define nnom_memcpy(dst,src,len) memcpy(dst,src,len)
/* runtime & debug */
#define nnom_us_get() 0 // return a microsecond timestamp
#define nnom_ms_get() 0 // return a millisecond timestamp
#define NNOM_LOG(...) printf(__VA_ARGS__)
/* NNoM configuration */
#define NNOM_BLOCK_NUM (16) // maximum number of memory blocks, increase it when log request.
#define DENSE_WEIGHT_OPT (1) // if used fully connected layer optimized weights.
//#define NNOM_TRUNCATE // disable: backend ops use round to the nearest int (default). enable: floor
/* Backend format configuration */
//#define NNOM_USING_CHW // uncomment if using CHW format. otherwise using default HWC format.
// Notes, CHW is incompatible with CMSIS-NN.
// CHW must be used when using hardware accelerator such as KPU in K210 chip
/* Backend selection */
//#define NNOM_USING_CMSIS_NN // uncomment if use CMSIS-NN for optimation
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,83 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
size_t shape_size(nnom_3d_shape_t *s)
{
if (s == NULL)
return 0;
return s->h * s->w * s->c;
}
nnom_3d_shape_t shape(size_t h, size_t w, size_t c)
{
nnom_3d_shape_t s;
s.h = h;
s.w = w;
s.c = c;
return s;
}
nnom_3d_shape_t kernel(size_t h, size_t w)
{
return shape(h, w, 1);
}
nnom_3d_shape_t stride(size_t h, size_t w)
{
return shape(h, w, 1);
}
nnom_3d_shape_t dilation(size_t h, size_t w)
{
return shape(h, w, 1);
}
nnom_border_t border(size_t top, size_t bottom, size_t left, size_t right)
{
nnom_border_t b;
b.top = top;
b.bottom = bottom;
b.left = left;
b.right = right;
return b;
}
// this function has to be used while assign a io for a layer.
// because the io needs to know who is its owner.
nnom_layer_io_t *io_init(void *owner_layer, nnom_layer_io_t *io)
{
io->owner = (nnom_layer_t *)owner_layer;
return io;
}
// this function is to add a new IO to current inited IO
// input, the targeted IO that the new IO will be added to
// output , the new IO
nnom_layer_io_t *io_add_aux(nnom_layer_io_t *targeted_io)
{
nnom_layer_io_t *new_io;
// check if the targeted io is inited, and its aux = NULL
if (targeted_io == NULL || targeted_io->owner == NULL || targeted_io->aux != NULL)
return NULL;
// create new io, init it
new_io = nnom_mem(sizeof(nnom_layer_io_t));
if (new_io == NULL)
return NULL;
// add to aux
targeted_io->aux = new_io;
return io_init(targeted_io->owner, new_io);
}

View File

@@ -0,0 +1,245 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
* 2019-02-14 Jianjia Ma Add layer.free() method.
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <stdarg.h>
#include "nnom.h"
#include "nnom_tensor.h"
// tensor size
size_t tensor_size(nnom_tensor_t* t)
{
size_t size = 0;
if (t != NULL)
{
size = t->dim[0];
for (int i = 1; i < t->num_dim; i++)
size *= t->dim[i];
}
return size;
}
size_t tensor_size_byte(nnom_tensor_t* t)
{
return tensor_size(t)*t->bitwidth/8;
}
size_t tensor_get_num_channel(nnom_tensor_t* t)
{
// this will need to be changed to support batch.
#ifdef NNOM_USING_CHW
// channel first
//return t->dim[0];
return t->dim[t->num_dim -1]; // we are always using hwc to describe even our data is in CHW
#else
// channel last
return t->dim[t->num_dim -1];
#endif
}
// initialise/create new tensor
nnom_tensor_t* new_tensor(nnom_qtype_t type, uint32_t num_dim, uint32_t num_channel)
{
nnom_tensor_t* t = NULL;
uint32_t q_len;
if(type == NNOM_QTYPE_PER_AXIS)
{
q_len = num_channel;
}
else if (type == NNOM_QTYPE_PER_TENSOR)
{
q_len = 1;
}
else
{
NNOM_LOG("ERROR: tensor type not specified\n");
return NULL;
}
t = nnom_mem(nnom_alignto(sizeof(nnom_tensor_t), NNOM_ALIGN)
+ num_dim*sizeof(nnom_shape_data_t)
+ q_len*sizeof(nnom_qformat_param_t)*2);
if(t == NULL)
return t;
t->dim = (nnom_shape_data_t*)((uint8_t*)t + sizeof(nnom_tensor_t)); // should add alignment
t->q_dec = (nnom_qformat_param_t*)((uint8_t*)t->dim + num_dim*sizeof(nnom_shape_data_t));
t->q_offset = (nnom_qformat_param_t*)((uint8_t*)t->q_dec + q_len*sizeof(nnom_qformat_param_t));
t->num_dim = num_dim;
t->qtype = type;
return t;
}
void delete_tensor(nnom_tensor_t* t)
{
if (t)
nnom_free(t);
}
// set tensor by value
// for tensor with quantized type NNOM_QTYPE_PER_TENSOR
nnom_tensor_t* tensor_set_attr_v(nnom_tensor_t* t,
nnom_qformat_param_t dec_bit, nnom_qformat_param_t offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth)
{
// copy dim
t->num_dim = num_dim;
nnom_memcpy(t->dim, dim, sizeof(nnom_shape_data_t) * num_dim);
// bitwidth
t->bitwidth = bitwidth;
// copy the offset and q format
*(t->q_dec) = dec_bit;
*(t->q_offset) = offset;
return t;
}
// set tensor by pointer
// for tensor with quantized type NNOM_QTYPE_PER_AXIS
nnom_tensor_t* tensor_set_attr(nnom_tensor_t* t,
nnom_qformat_param_t*dec_bit, nnom_qformat_param_t *offset, nnom_shape_data_t* dim, uint32_t num_dim, uint8_t bitwidth)
{
size_t size;
// copy dim
t->num_dim = num_dim;
nnom_memcpy(t->dim, dim, sizeof(nnom_shape_data_t) * num_dim);
// get the q format data size
if(t->qtype == NNOM_QTYPE_PER_AXIS)
size = sizeof(nnom_qformat_param_t) * tensor_get_num_channel(t);
else
size = sizeof(nnom_qformat_param_t);
// bitwidth
t->bitwidth = bitwidth;
// copy the offset and q format
nnom_memcpy(t->q_dec, dec_bit, size);
nnom_memcpy(t->q_offset, offset, size);
return t;
}
// this method copy the attributes of a tensor to a new tensor
// before that, src and des tensor must already have QTYPE and NUM_OF_DIM set.
// Note, the tensors must have the same lenght. this method wont cpy the memory pointer data (we will assign memory later after building)
nnom_tensor_t* tensor_cpy_attr(nnom_tensor_t* des, nnom_tensor_t* src)
{
size_t size;
if(src->qtype != des->qtype || src->num_dim != des->num_dim)
return NULL;
if(src->qtype == NNOM_QTYPE_PER_AXIS)
size = sizeof(nnom_qformat_param_t) * tensor_get_num_channel(src);
else
size = sizeof(nnom_qformat_param_t);
// bit
des->bitwidth = src->bitwidth;
// copy quantisation parameters
nnom_memcpy(des->q_dec, src->q_dec, size);
nnom_memcpy(des->q_offset, src->q_offset, size);
// copy number of dimension
des->num_dim = src->num_dim;
nnom_memcpy(des->dim, src->dim, src->num_dim * sizeof(nnom_shape_data_t));
return des;
}
// change format from CHW to HWC
// the shape of the data, input data, output data
void tensor_hwc2chw_q7(nnom_tensor_t* des, nnom_tensor_t* src)
{
q7_t* p_out = des->p_data;
q7_t* p_in = src->p_data;
for (int c = 0; c < src->dim[2]; c++)
{
for (int h = 0; h < src->dim[0]; h++)
{
for (int w = 0; w < src->dim[1]; w++)
{
*p_out = p_in[(h * src->dim[1] + w) * src->dim[2] + c];
p_out++;
}
}
}
}
// only support 3d tensor
// change format from CHW to HWC
void tensor_chw2hwc_q7(nnom_tensor_t* des, nnom_tensor_t* src)
{
q7_t* p_out = des->p_data;
q7_t* p_in = src->p_data;
int im_size;
int h_step;
im_size = src->dim[0] * src->dim[1]; // H*W
for (int h = 0; h < src->dim[0]; h++)
{
h_step = src->dim[1] * h;
for (int w = 0; w < src->dim[1]; w++)
{
for (int c = 0; c < src->dim[2]; c++)
{
*p_out = p_in[im_size * c + h_step + w];
p_out++;
}
}
}
}
// (deprecated by tensor_hwc2chw version)
// change format from CHW to HWC
// the shape of the data, input data, output data
void hwc2chw_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out)
{
for (int c = 0; c < shape.c; c++)
{
for (int h = 0; h < shape.h; h++)
{
for (int w = 0; w < shape.w; w++)
{
*p_out = p_in[(h * shape.w + w) * shape.c + c];
p_out++;
}
}
}
}
// (deprecated)
// change format from CHW to HWC
// the shape of the data, input data, output data
void chw2hwc_q7(nnom_3d_shape_t shape, q7_t* p_in, q7_t* p_out)
{
int im_size = shape.w * shape.h;
int h_step;
for (int h = 0; h < shape.h; h++)
{
h_step = shape.w * h;
for (int w = 0; w < shape.w; w++)
{
for (int c = 0; c < shape.c; c++)
{
*p_out = p_in[im_size * c + h_step + w];
p_out++;
}
}
}
}

View File

@@ -0,0 +1,417 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-02-05 Jianjia Ma The first version
*/
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_utils.h"
static nnom_predict_t *_predict_create_instance(nnom_model_t *m, size_t label_num, size_t top_k_size)
{
nnom_predict_t *pre;
// allocate memory
pre = (nnom_predict_t *)nnom_malloc(sizeof(nnom_predict_t));
if(pre == NULL)
return NULL;
pre->top_k = (uint32_t *)nnom_malloc(top_k_size * sizeof(uint32_t));
pre->confusion_mat = (uint16_t *)nnom_malloc(label_num * label_num * sizeof(uint16_t));
if(pre->top_k == NULL || pre->confusion_mat == NULL)
{
nnom_free(pre->top_k); nnom_free(pre->confusion_mat); nnom_free(pre);
return NULL;
}
nnom_memset(pre->top_k, 0, top_k_size * sizeof(uint32_t));
nnom_memset(pre->confusion_mat, 0, label_num * label_num * sizeof(uint16_t));
// config
pre->label_num = label_num;
pre->top_k_size = top_k_size;
pre->predict_count = 0;
// run
pre->model = m;
pre->t_run_total = 0; // model running time in total
pre->t_predict_start = 0; // when it is initial
pre->t_predict_total = 0; // total time of the whole test
return pre;
}
static void _predict_delete_instance(nnom_predict_t *pre)
{
if(pre == NULL)
return;
nnom_free(pre->top_k);
nnom_free(pre->confusion_mat);
nnom_free(pre);
}
// create a prediction
// input model, the buf pointer to the softwmax output (Temporary, this can be extract from model)
// the size of softmax output (the num of lable)
// the top k that wants to record.
nnom_predict_t *prediction_create(nnom_model_t *m, int8_t *buf_prediction, size_t label_num, size_t top_k_size)
{
nnom_predict_t *pre = _predict_create_instance(m, label_num, top_k_size);
if (!pre)
return NULL;
if (!m)
{
_predict_delete_instance(pre);
return NULL;
}
// set the output buffer of model to the prediction instance
pre->buf_prediction = buf_prediction;
// mark start time.
pre->t_predict_start = nnom_ms_get();
return pre;
}
// after a new data is set in input
// feed data to prediction
// input the current label, (range from 0 to total number of label -1)
// (the current input data should be set by user manully to the input buffer of the model.)
nnom_status_t prediction_run(nnom_predict_t *pre, uint32_t true_label, uint32_t*predict_label, float* prob)
{
int max_val;
int max_index;
uint32_t true_ranking = 0;
uint32_t start;
uint32_t sum = 0;
if (!pre)
return NN_ARGUMENT_ERROR;
// now run model
start = nnom_ms_get();
model_run(pre->model);
pre->t_run_total += nnom_ms_get() - start;
// only draw matrix and top k when number of label > 1
if (pre->label_num > 1)
{
// find how many prediction is bigger than the ground true.
// Raning rules, same as tensorflow. however, predictions in MCU is more frequencly to have equal probability since it is using fixed-point.
// if ranking is 1, 2, =2(true), 4, 5, 6. the result will be top 3.
// if ranking is 1, 2(true), =2, 4, 5, 6. the result will be top 2.
// find the ranking of the prediced label.
for (uint32_t j = 0; j < pre->label_num; j++)
{
if (j == true_label)
continue;
if (pre->buf_prediction[true_label] < pre->buf_prediction[j])
true_ranking++;
// while value[label] = value[j]. only when label > j, label is the second of j
else if (pre->buf_prediction[true_label] == pre->buf_prediction[j] && j < true_label)
true_ranking++;
}
if (true_ranking < pre->top_k_size)
pre->top_k[true_ranking]++;
// Find top 1 and return the current prediction.
// If there are several maximum prediction, return the first one.
max_val = pre->buf_prediction[0];
max_index = 0;
for (uint32_t j = 1; j < pre->label_num; j++)
{
if (pre->buf_prediction[j] > max_val)
{
max_val = pre->buf_prediction[j];
max_index = j;
}
sum += pre->buf_prediction[j];
}
// result
if (max_val != 0)
*prob = (float)max_val / 127.f;
else
*prob = 0;
*predict_label = max_index;
// fill confusion matrix
pre->confusion_mat[true_label * pre->label_num + max_index] += 1;
}
// only one neural as output.
else
{
*prob = (float)pre->buf_prediction[0] / 127.f;
if (*prob >= 0.5f)
*predict_label = 1;
else
*predict_label = 0;
}
// prediction count
pre->predict_count++;
// return the prediction
return NN_SUCCESS;
}
void prediction_end(nnom_predict_t *pre)
{
if (!pre)
return;
pre->t_predict_total = nnom_ms_get() - pre->t_predict_start;
}
void prediction_delete(nnom_predict_t *pre)
{
_predict_delete_instance(pre);
}
void prediction_matrix(nnom_predict_t *pre)
{
if (!pre)
return;
// print titles
NNOM_LOG("\nConfusion matrix:\n");
NNOM_LOG("predict");
for (int i = 0; i < pre->label_num; i++)
{
NNOM_LOG("%6d", i);
}
NNOM_LOG("\n");
NNOM_LOG("actual\n");
// print the matrix
for (int i = 0; i < pre->label_num; i++)
{
uint32_t row_total = 0;
NNOM_LOG(" %3d | ", i);
for (int j = 0; j < pre->label_num; j++)
{
row_total += pre->confusion_mat[i * pre->label_num + j];
NNOM_LOG("%6d", pre->confusion_mat[i * pre->label_num + j]);
}
NNOM_LOG(" |%4d%%\n", pre->confusion_mat[i * pre->label_num + i] * 100 / row_total);
row_total = 0;
}
NNOM_LOG("\n");
}
// top-k
void prediction_top_k(nnom_predict_t *pre)
{
uint32_t top = 0;
if (!pre)
return;
for (int i = 0; i < pre->top_k_size; i++)
{
top += pre->top_k[i];
if (top != pre->predict_count)
NNOM_LOG("Top %d Accuracy: %d.%02d%% \n", i + 1, (top * 100) / pre->predict_count,
((top * 100 * 100) / pre->predict_count)%100);
else
NNOM_LOG("Top %d Accuracy: 100%% \n", i + 1);
}
}
// this function is to print sumarry
void prediction_summary(nnom_predict_t *pre)
{
if (!pre)
return;
// sumamry
NNOM_LOG("\nPrediction summary:\n");
NNOM_LOG("Test frames: %d\n", pre->predict_count);
NNOM_LOG("Test running time: %d sec\n", pre->t_predict_total / 1000);
NNOM_LOG("Model running time: %d ms\n", pre->t_run_total);
if(pre->predict_count !=0)
NNOM_LOG("Average prediction time: %d us\n", (pre->t_run_total * 1000) / pre->predict_count);
if(pre->t_run_total != 0)
NNOM_LOG("Average effeciency: %d.%02d ops/us\n", (int)(((uint64_t)pre->model->total_ops * pre->predict_count) / (pre->t_run_total * 1000)),
(int)(((uint64_t)pre->model->total_ops * pre->predict_count)*100 / (pre->t_run_total * 1000))%100);
if(pre->t_run_total !=0 && pre->predict_count !=0)
NNOM_LOG("Average frame rate: %d.%d Hz\n", 1000 / (pre->t_run_total / pre->predict_count),
(1000*10 / (pre->t_run_total / pre->predict_count))%10);
// only valid for multiple labels
if(pre->label_num > 1)
{
// print top-k
prediction_top_k(pre);
// print confusion matrix
prediction_matrix(pre);
}
}
// stand alone prediction API
// this api test one set of data, return the prediction
nnom_status_t nnom_predict(nnom_model_t *m, uint32_t *label, float *prob)
{
int32_t max_val, max_index, sum;
int8_t *output;
if (!m)
return NN_ARGUMENT_ERROR;
model_run(m);
// get the output memory
output = m->tail->out->tensor->p_data;
// multiple neural output
if (tensor_size(m->tail->out->tensor) > 1)
{
// Top 1
max_val = output[0];
max_index = 0;
sum = max_val;
for (uint32_t i = 1; i < tensor_size(m->tail->out->tensor); i++)
{
if (output[i] > max_val)
{
max_val = output[i];
max_index = i;
}
sum += output[i];
}
// send results
*label = max_index;
if(max_val !=0)
*prob = (float)max_val/127.f;
else
*prob = 0;
}
// single neural output
else
{
*prob = (float)output[0] / 127.f;
if (*prob >= 0.5f)
*label = 1;
else
*label = 0;
}
return NN_SUCCESS;
}
static void layer_stat(nnom_layer_t *layer)
{
// layer stat
if(layer->type != NNOM_RNN)
NNOM_LOG("%-10s - ", default_layer_names[layer->type]);
else
{
NNOM_LOG("%-3s/", default_layer_names[layer->type]);
NNOM_LOG("%-6s - ", default_cell_names[((nnom_rnn_layer_t*)layer)->cell->type]);
}
NNOM_LOG(" %8d ", layer->stat.time);
// MAC operation
if(layer->stat.macc == 0)
NNOM_LOG(" ");
else if (layer->stat.macc < 10000)
NNOM_LOG("%7d ", (uint32_t)layer->stat.macc);
else if (layer->stat.macc < 1000*1000)
NNOM_LOG("%6dk ", (uint32_t)(layer->stat.macc/1000));
else if (layer->stat.macc < 1000*1000*1000)
NNOM_LOG("%3d.%02dM ", (uint32_t)(layer->stat.macc/(1000*1000)), (uint32_t)(layer->stat.macc%(1000*1000)/(10*1000))); // xxx.xx M
else
NNOM_LOG("%3d.%02dG ", (uint32_t)(layer->stat.macc/(1000*1000*1000)), (uint32_t)(layer->stat.macc%(1000*1000*1000)/(10*1000*1000))); // xxx.xx G
// layer efficiency
if (layer->stat.macc != 0 && layer->stat.time != 0)
NNOM_LOG("%d.%02d\n", (uint32_t)(layer->stat.macc / layer->stat.time), (uint32_t)((layer->stat.macc * 100) / (layer->stat.time) % 100));
else
NNOM_LOG("\n");
}
void model_stat(nnom_model_t *m)
{
size_t total_ops = 0;
size_t total_time = 0;
nnom_layer_t *layer;
uint32_t run_num = 0;
if (!m)
return;
layer = m->head;
NNOM_LOG("\nPrint running stat..\n");
NNOM_LOG("Layer(#) - Time(us) ops(MACs) ops/us \n");
NNOM_LOG("--------------------------------------------------------\n");
while (layer)
{
run_num++;
NNOM_LOG("#%-3d", run_num);
total_ops += layer->stat.macc;
total_time += layer->stat.time;
layer_stat(layer);
if (layer->shortcut == NULL)
break;
layer = layer->shortcut;
}
NNOM_LOG("\nSummary:\n");
NNOM_LOG("Total ops (MAC): %d", (uint32_t)(total_ops));
NNOM_LOG("(%d.%02dM)\n", (uint32_t) (total_ops/(1000*1000)), (uint32_t)(total_ops%(1000*1000)/(10000)));
NNOM_LOG("Prediction time :%dus\n", (uint32_t)total_time);
if(total_time != 0)
NNOM_LOG("Efficiency %d.%02d ops/us\n",
(uint32_t)(total_ops / total_time),
(uint32_t)((total_ops * 100) / (total_time) % 100));
NNOM_LOG("Total memory:%d\n", (uint32_t)nnom_mem_stat());
}
void model_io_format(nnom_model_t *m)
{
nnom_layer_t *layer;
uint32_t run_num = 0;
if (!m)
return;
layer = m->head;
NNOM_LOG("\nPrint layer input/output..\n");
NNOM_LOG("Layer(#) - Input(Qnm) Output(Qnm) Oshape \n");
NNOM_LOG("----------------------------------------------------------\n");
while (layer)
{
run_num++;
NNOM_LOG("#%-3d", run_num);
if(layer->type != NNOM_RNN)
NNOM_LOG("%-10s - ", default_layer_names[layer->type]);
else
{
NNOM_LOG("%-3s/", default_layer_names[layer->type]);
NNOM_LOG("%-6s - ", default_cell_names[((nnom_rnn_layer_t*)layer)->cell->type]);
}
NNOM_LOG(" %2d.%2d", 7-layer->in->tensor->q_dec[0], layer->in->tensor->q_dec[0]);
NNOM_LOG(" %2d.%2d", 7-layer->out->tensor->q_dec[0], layer->out->tensor->q_dec[0]);
NNOM_LOG(" (");
for (int i = 0; i < 3; i++)
{
if (layer->out->tensor->num_dim > i)
NNOM_LOG("%4d,", layer->out->tensor->dim[i]);
else
NNOM_LOG(" ");
}
NNOM_LOG(")\n");
if (layer->shortcut == NULL)
break;
layer = layer->shortcut;
}
}

View File

@@ -0,0 +1,369 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_activation.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_layer_t *Activation(nnom_activation_t *act)
{
nnom_activation_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_activation_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_activation_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_ACTIVATION;
layer->super.run = activation_run;
layer->super.build = default_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_NULL; // when a layer's io is set to NULL, both will point to same mem.
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
// set activation to layer
layer->act = act;
// set free method
layer->super.free = activation_free;
return (nnom_layer_t *)layer;
}
nnom_layer_t *ReLU(void)
{
nnom_layer_t *layer = Activation(act_relu());
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_RELU;
return layer;
}
nnom_layer_t *LeakyReLU(float alpha)
{
nnom_layer_t *layer = Activation(act_leaky_relu(alpha));
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_LEAKY_RELU;
return layer;
}
nnom_layer_t *AdvReLU(float alpha, float max, float threshold)
{
nnom_layer_t *layer = Activation(act_adv_relu(alpha, max, threshold));
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_ADV_RELU;
return layer;
}
nnom_layer_t *Sigmoid(int32_t dec_bit)
{
nnom_layer_t *layer = Activation(act_sigmoid(dec_bit));
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_SIGMOID;
return layer;
}
nnom_layer_t *TanH(int32_t dec_bit)
{
nnom_layer_t *layer = Activation(act_tanh(dec_bit));
if (layer == NULL)
return NULL;
// set type in layer parent
layer->type = NNOM_TANH;
return layer;
}
void act_delete(nnom_activation_t* act){
nnom_free(act);
}
// activation takes act instance which is created. therefore, it must be free when activation is deleted.
// this is the callback in layer->free
nnom_status_t activation_free(nnom_layer_t *layer)
{
if(layer)
act_delete(((nnom_activation_layer_t *)layer)->act);
return NN_SUCCESS;
}
nnom_status_t activation_run(nnom_layer_t *layer)
{
nnom_activation_layer_t *cl = (nnom_activation_layer_t *)layer;
return act_tensor_run(cl->act, layer->in->tensor);
}
// porting
static nnom_status_t relu_run(nnom_activation_t* act)
{
if(act->tensor->bitwidth == 16)
{
#ifdef NNOM_USING_CMSIS_NN
arm_relu_q15(act->tensor->p_data, tensor_size(act->tensor));
#else
local_relu_q15(act->tensor->p_data, tensor_size(act->tensor));
#endif
}
else
{
#ifdef NNOM_USING_CMSIS_NN
arm_relu_q7(act->tensor->p_data, tensor_size(act->tensor));
#else
local_relu_q7(act->tensor->p_data, tensor_size(act->tensor));
#endif
}
return NN_SUCCESS;
}
// leaky relu
static nnom_status_t leaky_relu_run(nnom_activation_t* act)
{
nnom_activation_leaky_relu_t* a = (nnom_activation_leaky_relu_t*) act;
if(act->tensor->bitwidth == 16)
local_leaky_relu_q15(act->tensor->p_data, a->alpha, tensor_size(act->tensor));
else
local_leaky_relu_q7(act->tensor->p_data, a->alpha, tensor_size(act->tensor));
return NN_SUCCESS;
}
// advance relu
static nnom_status_t adv_relu_run(nnom_activation_t* act)
{
nnom_activation_adv_relu_t* a = (nnom_activation_adv_relu_t*) act;
// we need to convert float to fixpoint in runtime where we can know the tensor's q format
if(act->tensor->bitwidth == 16)
{
q15_t max = 32767;
q15_t threshold = MIN(a->threshold * (1 << (15 - act->tensor->q_dec[0])), 32767);
q7_t max_scale = (1 << (15 - act->tensor->q_dec[0]));
if(a->max != INFINITY && a->max != 0x7fc00000)
if(a->max * max_scale < max)
max = a->max * max_scale;
local_adv_relu_q15(act->tensor->p_data, a->negative_slope, max, threshold, tensor_size(act->tensor));
}
// 8bit
else
{
q7_t max = 127;
q7_t threshold = MIN(a->threshold * (1 << (7 - act->tensor->q_dec[0])), 127);
q7_t max_scale = (1 << (7 - act->tensor->q_dec[0]));
if(a->max != INFINITY && a->max != 0x7fc00000) // QNAN 0x7fc00000 also represent infinity in script 0.4.1
if(a->max * max_scale < max)
max = a->max * max_scale;
local_adv_relu_q7(act->tensor->p_data, a->negative_slope, max, threshold, tensor_size(act->tensor));
}
return NN_SUCCESS;
}
static nnom_status_t tanh_run(nnom_activation_t* act)
{
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
// 16 bit
if(act->tensor->bitwidth == 16)
{
uint8_t int_bit = 15 - a->dec_bit;
#ifdef NNOM_USING_CMSIS_NN
arm_nn_activations_direct_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_TANH);
#else
local_tanh_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit);
#endif
}
else // 8bit
{
uint8_t int_bit = 7 - a->dec_bit;
// arm version cannot handle int_bit > 3
#ifdef NNOM_USING_CMSIS_NN
if(act->tensor->q_dec[0] <= 3)
arm_nn_activations_direct_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_TANH);
else
#endif
local_tanh_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit);
}
return NN_SUCCESS;
}
static nnom_status_t sigmoid_run( nnom_activation_t* act)
{
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
// 16 bit
if(act->tensor->bitwidth == 16)
{
uint8_t int_bit = 15 - a->dec_bit;
#ifdef NNOM_USING_CMSIS_NN
arm_nn_activations_direct_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_SIGMOID);
#else
local_sigmoid_q15(act->tensor->p_data, tensor_size(act->tensor), int_bit);
#endif
}
else // 8bit
{
uint8_t int_bit = 7 - a->dec_bit;
// arm version cannot handle int_bit > 3
#ifdef NNOM_USING_CMSIS_NN
if(act->tensor->q_dec[0] <= 3)
arm_nn_activations_direct_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit, ARM_TANH);
else
#endif
local_sigmoid_q7(act->tensor->p_data, tensor_size(act->tensor), int_bit);
}
return NN_SUCCESS;
}
static nnom_status_t hard_tanh_run( nnom_activation_t* act)
{
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
if(act->tensor->bitwidth == 16)
local_hard_tanh_q15(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit + 8); // a->dec is based on 8 bit.
else
local_hard_tanh_q7(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit);
return NN_SUCCESS;
}
static nnom_status_t hard_sigmoid_run( nnom_activation_t* act)
{
nnom_activation_fixed_q_t * a = (nnom_activation_fixed_q_t*)act;
if(act->tensor->bitwidth == 16)
local_hard_sigmoid_q15(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit + 8); // a->dec is based on 8 bit.
else
local_hard_sigmoid_q7(act->tensor->p_data, tensor_size(act->tensor), a->dec_bit);
return NN_SUCCESS;
}
//
nnom_activation_t* act_relu(void)
{
nnom_activation_t* act = nnom_mem(sizeof(nnom_activation_t));
act->run = relu_run;
act->type = ACT_RELU;
return act;
}
nnom_activation_t* act_leaky_relu(float alpha)
{
nnom_activation_leaky_relu_t* act = nnom_mem(sizeof(nnom_activation_leaky_relu_t));
act->super.run = leaky_relu_run;
act->super.type = ACT_LEAKY_RELU;
act->alpha = (q7_t)(alpha*128);
return (nnom_activation_t* )act;
}
nnom_activation_t* act_adv_relu(float negative_slope, float max, float threshold)
{
nnom_activation_adv_relu_t* act = nnom_mem(sizeof(nnom_activation_adv_relu_t));
act->super.run = adv_relu_run;
act->super.type = ACT_ADV_RELU;
act->negative_slope = (q7_t)(negative_slope*128);
act->max = max;
act->threshold = threshold;
return (nnom_activation_t* )act;
}
nnom_activation_t* act_tanh(int32_t dec_bit)
{
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
act->super.run = tanh_run;
act->super.type = ACT_TANH;
act->dec_bit = dec_bit;
return (nnom_activation_t*)act;
}
nnom_activation_t* act_sigmoid(int32_t dec_bit)
{
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
act->super.run = sigmoid_run;
act->super.type = ACT_SIGMOID;
act->dec_bit = dec_bit;
return (nnom_activation_t*)act;
}
nnom_activation_t* act_hard_tanh(int32_t dec_bit)
{
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
act->super.run = hard_tanh_run;
act->super.type = ACT_HARD_TANH;
act->dec_bit = dec_bit;
return (nnom_activation_t*)act;
}
nnom_activation_t* act_hard_sigmoid(int32_t dec_bit)
{
nnom_activation_fixed_q_t* act = nnom_mem(sizeof(nnom_activation_fixed_q_t));
act->super.run = hard_sigmoid_run;
act->super.type = ACT_HARD_SIGMOID;
act->dec_bit = dec_bit;
return (nnom_activation_t*)act;
}
// return the decimal bit if the activation will change the q format of the layer.
int32_t act_get_dec_bit(nnom_activation_type_t type, int32_t dec_bit)
{
switch(type)
{
case ACT_RELU:
case ACT_LEAKY_RELU:
case ACT_ADV_RELU:
break;
case ACT_TANH:
case ACT_HARD_TANH:
case ACT_SIGMOID:
case ACT_HARD_SIGMOID:
dec_bit = 7;
default:break;
}
return dec_bit;
}
// a direct api to run activate a tensor
nnom_status_t act_tensor_run(nnom_activation_t* act, nnom_tensor_t* tensor)
{
act->tensor = tensor;
return act->run(act);
}

View File

@@ -0,0 +1,167 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_avgpool.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_layer_t *avgpool_s(const nnom_pool_config_t * config)
{
nnom_avgpool_layer_t *cl;
if(config->num_dim == 1)
{
cl = (nnom_avgpool_layer_t *)AvgPool(kernel(1, config->kernel_size[0]),
stride(1, config->stride_size[0]),
config->padding_type);
}
else
{
cl = (nnom_avgpool_layer_t *)AvgPool(kernel(config->kernel_size[0], config->kernel_size[1]),
stride(config->stride_size[0], config->stride_size[1]),
config->padding_type);
}
if(cl)
{
cl->super.config = (void*) config;
cl->output_shift = config->output_shift; // no idea if we need it
}
return (nnom_layer_t *)cl;
}
nnom_layer_t *AvgPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type)
{
nnom_layer_t *layer = MaxPool(k, s, pad_type);
if (layer != NULL)
{
layer->type = NNOM_AVGPOOL;
layer->run = avgpool_run;
layer->build = avgpool_build;
}
return (nnom_layer_t *)layer;
}
nnom_status_t avgpool_build(nnom_layer_t *layer)
{
uint32_t size;
// avg pooling share the same output shape, stride, padding setting.
maxpool_build(layer);
#ifdef NNOM_USING_CMSIS_NN
// however, avg pooling require a computational buffer.
// bufferA size: 2*dim_im_out*ch_im_in
size = layer->out->tensor->dim[1] > layer->out->tensor->dim[0] ?
layer->out->tensor->dim[1] : layer->out->tensor->dim[0];
layer->comp->size = 2 * size * layer->in->tensor->dim[2];
#endif
return NN_SUCCESS;
}
nnom_status_t avgpool_run(nnom_layer_t *layer)
{
nnom_avgpool_layer_t *cl = (nnom_avgpool_layer_t *)(layer);
uint16_t out_x, out_y;
// if global pooling
if(layer->out->tensor->num_dim == 1)
{
out_x = 1; out_y = 1;
}
else // normal pooling.
{
out_x = layer->out->tensor->dim[1]; //W
out_y = layer->out->tensor->dim[0]; //h
}
// 16 bit
if(layer->in->tensor->bitwidth == 16)
{
#ifdef NNOM_USING_CHW
local_avepool_q15_CHW(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
cl->output_shift,
NULL,
layer->out->tensor->p_data);
#else
local_avepool_q15_HWC(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
cl->output_shift,
NULL,
layer->out->tensor->p_data);
#endif
}
// 8bit
else{
#ifdef NNOM_USING_CHW
local_avepool_q7_CHW(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
cl->output_shift,
NULL,
layer->out->tensor->p_data);
#else //end of CHW
#ifdef NNOM_USING_CMSIS_NN
// 2D, square
if (layer->in->tensor->dim[1] == layer->in->tensor->dim[0] &&
layer->out->tensor->dim[1] == layer->out->tensor->dim[0] &&
cl->output_shift == 0)
{
arm_avepool_q7_HWC(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->kernel.w, cl->pad.w, cl->stride.w,
layer->out->tensor->dim[1],
layer->comp->mem->blk,
layer->out->tensor->p_data);
}
// none square 2D, or 1D
else
#endif
{
// CMSIS-NN does not support none-square pooling, we have to use local implementation
local_avepool_q7_HWC(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
cl->output_shift,
NULL,
layer->out->tensor->p_data);
}
#endif
}
return NN_SUCCESS;
}

View File

@@ -0,0 +1,90 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_baselayer.h"
// this layer copys the input to the output
nnom_layer_t *baselayer_s(const nnom_layer_config_t * config)
{
nnom_layer_t *layer = BaseLayer();
if(layer)
layer->config = (void*) config;
return layer;
}
nnom_layer_t *BaseLayer()
{
nnom_io_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_io_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_io_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_BASE;
layer->super.run = default_run;
layer->super.build = default_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_NULL;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
return (nnom_layer_t *)layer;
}
// this is call while output shape is not defined.
// this will set the output shape same as input shape, and it set only the primary IO
// this cannot be used as first layer, of course...
nnom_status_t default_build(nnom_layer_t *layer)
{
// get the last layer's output as input shape
layer->in->tensor = layer->in->hook.io->tensor;
// output tensor
// 1. allocate a new tensor for output
// 2. set the same dim, qfmt to the new tensor.
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR,layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// now this build has passed the input tensors (shapes, formats) to the new tensors.
return NN_SUCCESS;
}
// simply copy input to output
nnom_status_t default_run(nnom_layer_t *layer)
{
if(layer->out->type != NNOM_TENSOR_BUF_NULL)
{
nnom_memcpy(layer->out->tensor->p_data, layer->in->tensor->p_data, tensor_size_byte(layer->in->tensor));
}
return NN_SUCCESS;
}

View File

@@ -0,0 +1,223 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_concat.h"
nnom_layer_t *concat_s(const nnom_concat_config_t *config)
{
nnom_layer_t* layer = Concat(config->axis);
if(layer)
layer->config = (void*) config;
return layer;
}
// concate method
// concate requires more than one input module. aux input will be allocated in model.merge()
nnom_layer_t *Concat(int8_t axis)
{
nnom_concat_layer_t *layer;
nnom_layer_io_t *in, *out;
size_t mem_size;
// apply a block memory for all the sub handles.
mem_size = sizeof(nnom_concat_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_concat_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_CONCAT;
layer->super.run = concat_run;
layer->super.build = concat_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
// axis
layer->axis = axis;
return (nnom_layer_t *)layer;
}
nnom_status_t concat_build(nnom_layer_t *layer)
{
nnom_concat_layer_t *cl = (nnom_concat_layer_t *)layer;
nnom_layer_io_t *in;
uint32_t in_num = 0;
int32_t num_dim;
// for each input module, copy the shape from the output of last layer
in = layer->in;
while (in != NULL)
{
//get the last layer's output as input shape
in->tensor = in->hook.io->tensor;
in = in->aux;
in_num++;
}
// allocate new tensor for output, keep the same dimension lenght
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// convert the axis.
if (cl->axis < 0)
cl->axis = (layer->in->tensor->num_dim + cl->axis);
else if (cl->axis >0)
cl->axis = cl->axis -1; // keras use axis start from 1. we are using 0, 1, 2 (check?)
// find out the concated axis
num_dim = layer->in->tensor->num_dim;
for (uint32_t i = 0; i < num_dim; i ++)
{
// exclue the concat axies
if (i == cl->axis)
{
layer->out->tensor->dim[i] = 0;
// add the same axis from all input up.
in = layer->in;
while (in != NULL)
{
layer->out->tensor->dim[i] += in->tensor->dim[i];
in = in->aux;
}
continue;
}
// check others, all other must be same shape
in = layer->in;
while (in != NULL && in->aux != NULL)
{
if (in->tensor->dim[i] != in->aux->tensor->dim[i])
return NN_ARGUMENT_ERROR;
in = in->aux;
}
// now set other axis
layer->out->tensor->dim[i] = layer->in->tensor->dim[i];
}
return NN_SUCCESS;
}
#ifdef NNOM_USING_CHW
// axis index converter between HWC and CHW
static inline int chw_i(int hwc, int num_dim)
{
num_dim = num_dim -1;
hwc = hwc + 1;
if(hwc>num_dim)
hwc = 0;
return hwc;
}
static inline int hwc_i(int chw, int num_dim)
{
num_dim = num_dim -1;
chw = chw - 1;
if(chw<num_dim)
chw = num_dim;
return chw;
}
#endif
nnom_status_t concat_run(nnom_layer_t *layer)
{
// by default, concat layer has mutiple (>=2) input and 1 output.
nnom_concat_layer_t *cl = (nnom_concat_layer_t *)layer;
nnom_layer_io_t *in;
uint32_t dwidth = layer->in->tensor->bitwidth/8; // data width in byte
#ifdef NNOM_USING_CHW
// Concatenate for HWC
uint8_t *pin;
uint8_t *pout = layer->out->tensor->p_data;
uint32_t block_size;
uint32_t n_block;
uint8_t num_dim = layer->in->tensor->num_dim;
// calcualte number of block to concat. the other shapes before the concat axis
n_block = 1;
for(int i= 0; i< chw_i(cl->axis, num_dim); i++)
{
n_block *= layer->in->tensor->dim[hwc_i(i, num_dim)];
}
// concat all input layers
for(int i=0; i<n_block; i++)
{
in = layer->in;
while (in != NULL)
{
// the block size of concat data in this layer
block_size = dwidth;
for(int j= num_dim-1; j >= chw_i(cl->axis, num_dim); j--)
block_size *= in->tensor->dim[hwc_i(j, num_dim)];
// concat
pin = (uint8_t *)in->tensor->p_data + i * block_size;
nnom_memcpy(pout, pin, block_size);
pout += block_size;
in = in->aux;
}
}
#else // end of CHW concate
// Concatenate for HWC
uint8_t* pin;
uint8_t* pout = layer->out->tensor->p_data;
uint32_t block_size;
uint32_t n_block;
uint8_t num_dim = layer->in->tensor->num_dim;
// calcualte the number of block to concat. (the other shapes before the concat axis)
n_block = 1;
for (int i = 0; i < cl->axis; i++)
n_block *= layer->in->tensor->dim[i];
// concat all input layers
for (int i = 0; i < n_block; i++)
{
in = layer->in;
while (in != NULL)
{
// the block size of concat data in this layer
block_size = dwidth;
for (int j = cl->axis; j < num_dim; j++)
block_size *= in->tensor->dim[j];
// concat
pin = (uint8_t*)in->tensor->p_data + i * block_size;
nnom_memcpy(pout, pin, block_size);
pout += block_size;
in = in->aux;
}
}
#endif
return NN_SUCCESS;
}

View File

@@ -0,0 +1,434 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_conv2d.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
// a machine friendly api, with suffix _s for structured configuration.
nnom_layer_t *conv2d_s(const nnom_conv2d_config_t *config)
{
nnom_conv2d_layer_t *layer;
nnom_buf_t *comp;
nnom_layer_io_t *in, *out;
size_t mem_size;
// allocate a block memory for all the sub handles and shifts.
mem_size = sizeof(nnom_conv2d_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_conv2d_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_CONV_2D;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
comp->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
#ifdef NNOM_USING_CMSIS_NN
layer->super.comp = comp;
#endif
// set run method & output shape
layer->super.run = conv2d_run;
layer->super.build = conv2d_build;
layer->super.free = conv2d_free;
// save the config
layer->super.config = (void*) config;
// get the private parameters
// test: for 1d input, expend h = 1
if(config->weight->num_dim == 3)
{
layer->kernel = kernel(1, config->kernel_size[0]);
layer->stride = stride(1, config->stride_size[0]);
layer->dilation = dilation(1, config->dilation_size[0]);
}
else
{
layer->kernel = kernel(config->kernel_size[0], config->kernel_size[1]);
layer->stride = stride(config->stride_size[0], config->stride_size[1]);
layer->dilation = dilation(config->dilation_size[0], config->dilation_size[1]);
}
layer->filter_mult = config->filter_size; // for convs, this means filter number
layer->padding_type = config->padding_type;
// get bias and weight tensor, this should be created by script.
layer->weight = config->weight;
layer->bias = config->bias;
// get shifts
layer->output_rshift = (nnom_qformat_param_t *)config->output_shift;
layer->bias_lshift = (nnom_qformat_param_t *)config->bias_shift;
// padding
if (layer->padding_type == PADDING_SAME)
{
layer->pad.h = layer->dilation.h * (layer->kernel.h - 1) / 2;
layer->pad.w = layer->dilation.w * (layer->kernel.w - 1) / 2;
layer->pad.c = (1 - 1) / 2;
}
return (nnom_layer_t *)layer;
}
// Conv2D
// multiplier of (output/input channel),
// shape of kernal, shape of strides, weight struct, bias struct
nnom_layer_t *Conv2D(uint32_t filters, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b)
{
nnom_conv2d_layer_t *layer;
nnom_buf_t *comp;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_conv2d_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_conv2d_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_CONV_2D;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
comp->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
#ifdef NNOM_USING_CMSIS_NN
layer->super.comp = comp;
#endif
// set run method & output shape
layer->super.run = conv2d_run;
layer->super.build = conv2d_build;
// get the private parameters
layer->kernel = k;
layer->stride = s;
layer->dilation = d;
layer->filter_mult = filters; // for convs, this means filter number
layer->padding_type = pad_type;
// create weight and bias tensor
layer->weight = new_tensor(NNOM_QTYPE_PER_TENSOR, 4, filters);
layer->bias = new_tensor(NNOM_QTYPE_PER_TENSOR, 1, filters);
// configure weight tensor manually to support new tensor based backends.
// needs to be very careful
{
// config weight
nnom_shape_data_t dim[4] = {k.h, k.w, k.c, filters};
*(layer->weight->q_offset) = 0; // we have no support of offset here
*(layer->weight->q_dec) = 0; // not using it
layer->weight->p_data = (void*)w->p_value;
layer->weight->bitwidth = 8;
layer->weight->qtype = NNOM_QTYPE_PER_TENSOR;
nnom_memcpy(layer->weight->dim, dim, layer->weight->num_dim * sizeof(nnom_shape_data_t));
// config bias
dim[0] = filters;
*(layer->bias->q_offset) = 0; // we have no support of offset here
*(layer->bias->q_dec) = 0; // not using it
layer->bias->p_data = (void*) b->p_value;
layer->bias->bitwidth = 8;
layer->weight->qtype = NNOM_QTYPE_PER_TENSOR;
nnom_memcpy(layer->bias->dim, dim, layer->bias->num_dim * sizeof(nnom_shape_data_t));
// output shift and bias shift
layer->output_rshift = (nnom_qformat_param_t *)&w->shift;
layer->bias_lshift = (nnom_qformat_param_t *)&b->shift;
}
return (nnom_layer_t *)layer;
}
// keras's implementation.
// source: https://github.com/keras-team/keras/blob/7a39b6c62d43c25472b2c2476bd2a8983ae4f682/keras/utils/conv_utils.py#L85
uint32_t conv_output_length(uint32_t input_length, uint32_t filter_size, nnom_padding_t padding, uint32_t stride, uint32_t dilation)
{
if (input_length == 0)
return 0;
uint32_t dilated_filter_size = (filter_size - 1) * dilation + 1;
uint32_t output_length;
if(padding == PADDING_SAME)
output_length = input_length;
else
output_length = input_length - dilated_filter_size + 1;
return (output_length + stride - 1) / stride;
}
nnom_status_t conv2d_build(nnom_layer_t *layer)
{
nnom_conv2d_layer_t *cl = (nnom_conv2d_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for the output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, cl->filter_mult);
// copy then change later.
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// calculate the output tensor q format, only support per tensor quantise now
layer->out->tensor->q_dec[0] = layer->in->tensor->q_dec[0] + cl->weight->q_dec[0] - cl->output_rshift[0]; // need some modification for 16bit.
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// now we set up the tensor shape, always HWC format
layer->out->tensor->dim[0] = conv_output_length(layer->in->tensor->dim[0], cl->kernel.h, cl->padding_type, cl->stride.h, cl->dilation.h);
layer->out->tensor->dim[1] = conv_output_length(layer->in->tensor->dim[1], cl->kernel.w, cl->padding_type, cl->stride.w, cl->dilation.w);
layer->out->tensor->dim[2] = cl->filter_mult; // channel stays the same
// fill padding
if (cl->padding_type == PADDING_SAME)
{
cl->pad.w = cl->dilation.w * (cl->kernel.w - 1) / 2;
cl->pad.h = cl->dilation.h * (cl->kernel.h - 1) / 2;
cl->pad.c = 0;
}
#ifdef NNOM_USING_CMSIS_NN
// bufferA size: (1D shape)
// 2*ch_im_in*dim_kernel*dim_kernel
layer->comp->size = 2 * 2 * layer->in->tensor->dim[2] * cl->kernel.w * cl->kernel.h;
#endif
// computational cost: K x K x Cin x Hour x Wout x Cout
layer->stat.macc = cl->kernel.w * cl->kernel.h * layer->in->tensor->dim[2] * tensor_size(layer->out->tensor);
return NN_SUCCESS;
}
nnom_status_t conv2d_free(nnom_layer_t *layer)
{
// free weight and bias tensor when we are not initialised from structured configuration.
if(!layer->config)
{
nnom_conv2d_layer_t* cl = (nnom_conv2d_layer_t*)layer;
delete_tensor(cl->weight);
delete_tensor(cl->bias);
}
return NN_SUCCESS;
}
nnom_status_t conv2d_run(nnom_layer_t *layer)
{
nnom_conv2d_layer_t *cl = (nnom_conv2d_layer_t *)layer;
#ifdef NNOM_USING_CHW
// CHW format
if(layer->in->tensor->bitwidth == 16)
local_convolve_CHW_q15_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
else
local_convolve_CHW_q7_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
return NN_SUCCESS;
#else
// HWC format
#ifdef NNOM_USING_CMSIS_NN
// current cmsis nn does not support dilation
if(cl->dilation.w == 1 && cl->dilation.h == 1 && cl->weight->qtype == NNOM_QTYPE_PER_TENSOR)
{
// 8 bit cmsis nn
if(layer->in->tensor->bitwidth == 8)
{
//RGB
// ch_im_in = 3, w = h
if (layer->in->tensor->dim[2] == 3 && layer->in->tensor->dim[0] == layer->in->tensor->dim[1])
// squared
if((cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
return (nnom_status_t)arm_convolve_HWC_q7_RGB(
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2],
cl->kernel.w, cl->pad.w, cl->stride.w,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data, layer->out->tensor->dim[1],
(q15_t *)(layer->comp->mem->blk), NULL);
// check if can use optimized function
// ch_im_in is multiple of 4
// ch_im_out is multiple of 2
if ((layer->in->tensor->dim[2] % 4 == 0) && (layer->out->tensor->dim[2] % 2 == 0))
{
// squared
if((layer->in->tensor->dim[0] == layer->in->tensor->dim[1])
&& (layer->out->tensor->dim[0] == layer->out->tensor->dim[1])
&& (cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
{
// 1x1 fast
if (cl->kernel.w == 1 && cl->kernel.h == 1 && cl->stride.w == 1 && cl->stride.h == 1 && cl->pad.w == 0 && cl->pad.h == 0)
return (nnom_status_t)arm_convolve_1x1_HWC_q7_fast_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data, layer->out->tensor->dim[1], layer->out->tensor->dim[0],
(q15_t *)(layer->comp->mem->blk), NULL);
// opt square shape
else
return (nnom_status_t)arm_convolve_HWC_q7_fast(
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2], cl->kernel.w, cl->pad.w, cl->stride.w,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data,
layer->out->tensor->dim[1], (q15_t *)(layer->comp->mem->blk), NULL);
}
// opt none square shape
else
return (nnom_status_t)arm_convolve_HWC_q7_fast_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
}
// none optimized
else
{
// none opt square shape
if ((layer->in->tensor->dim[0] == layer->in->tensor->dim[1] &&
layer->out->tensor->dim[0] == layer->out->tensor->dim[1]) &&
(cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
return (nnom_status_t)arm_convolve_HWC_q7_basic(
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2], cl->kernel.w, cl->pad.w, cl->stride.w,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data,
layer->out->tensor->dim[1], (q15_t *)(layer->comp->mem->blk), NULL);
// none opt none square shape
else
return (nnom_status_t)arm_convolve_HWC_q7_basic_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
} //end of cmsis-nn none-opt
} //end of 8 bit cmsis-nn
else if (layer->in->tensor->bitwidth == 16)
{
// fast opt
if ((layer->in->tensor->dim[2] % 2 == 0) && (layer->out->tensor->dim[2] % 2 == 0))
{
if((layer->in->tensor->dim[0] == layer->in->tensor->dim[1])
&& (layer->out->tensor->dim[0] == layer->out->tensor->dim[1])
&& (cl->kernel.w == cl->kernel.h) && (cl->pad.w == cl->pad.h) && (cl->stride.w == cl->stride.h))
return (nnom_status_t)arm_convolve_HWC_q15_fast(
layer->in->tensor->p_data, layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2], cl->kernel.w, cl->pad.w, cl->stride.w,
cl->bias->p_data, cl->bias_lshift[0],
cl->output_rshift[0], layer->out->tensor->p_data,
layer->out->tensor->dim[1], (q15_t *)(layer->comp->mem->blk), NULL);
else
return (nnom_status_t)arm_convolve_HWC_q15_fast_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h,
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
}
// none opt basic
else
{
local_convolve_HWC_q7_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
return NN_SUCCESS;
}
} // end of 16 bit cmsis-nn
} // end of dilation == 1
else
#endif // NNOM_USING_CMSIS_NN
{
if(layer->in->tensor->bitwidth == 16)
local_convolve_HWC_q15_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
else
local_convolve_HWC_q7_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
return NN_SUCCESS;
}
#endif // end of CHW/HWC
return NN_SUCCESS;
}

View File

@@ -0,0 +1,131 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-05-31 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_conv2d_trans.h"
nnom_layer_t *conv2d_trans_s(const nnom_conv2d_config_t *config)
{
nnom_layer_t *layer;
layer = conv2d_s(config);
if (layer)
{
layer->type = NNOM_CONV2D_TRANS;
layer->run = conv2d_trans_run;
layer->build = conv2d_trans_build;
}
return layer;
}
nnom_layer_t *Conv2DTrans(uint32_t multiplier, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b)
{
nnom_layer_t *layer = Conv2D(multiplier, k, s, d, pad_type, w, b);
if (layer != NULL)
{
layer->type = NNOM_CONV2D_TRANS;
layer->run = conv2d_trans_run;
layer->build = conv2d_trans_build;
}
return layer;
}
// utils, keras method
// https://github.com/keras-team/keras/blob/7a39b6c62d43c25472b2c2476bd2a8983ae4f682/keras/utils/conv_utils.py#L114
// https://github.com/tensorflow/tensorflow/blob/2b96f3662bd776e277f86997659e61046b56c315/tensorflow/python/layers/utils.py#L156
uint32_t conv_trans_output_length(uint32_t input_length, uint32_t kernel_size, nnom_padding_t padding, uint32_t stride_size, uint32_t dilation)
{
input_length *= stride_size;
if (padding == PADDING_VALID)
input_length += MAX(kernel_size - stride_size, 0);
return input_length;
}
nnom_status_t conv2d_trans_build(nnom_layer_t *layer)
{
nnom_conv2d_trans_layer_t *cl = (nnom_conv2d_trans_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for the output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, cl->filter_mult);
// copy then change later.
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// calculate the output tensor q format, only support per tensor quantise now
layer->out->tensor->q_dec[0] = layer->in->tensor->q_dec[0] + cl->weight->q_dec[0] - cl->output_rshift[0];
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// now we set up the tensor shape, always HWC format
layer->out->tensor->dim[0] = conv_trans_output_length(layer->in->tensor->dim[0], cl->kernel.h, cl->padding_type, cl->stride.h, cl->dilation.h);
layer->out->tensor->dim[1] = conv_trans_output_length(layer->in->tensor->dim[1], cl->kernel.w, cl->padding_type, cl->stride.w, cl->dilation.w);
layer->out->tensor->dim[2] = cl->filter_mult; // channel stays the same
// fill the correct padding
if(cl->padding_type == PADDING_SAME)
{
cl->pad.h = (cl->kernel.h - cl->stride.h) / 2; // the padding to the output.
cl->pad.w = (cl->kernel.w - cl->stride.w) / 2;
// cl->pad.h = (cl->kernel.h - 1)/2; // the padding to the output.
// cl->pad.w = (cl->kernel.w - 1)/2;
cl->pad.c = 0;
}
else
{
cl->pad.h = 0;
cl->pad.w = 0;
cl->pad.c = 0;
}
// bufferA size: (1D shape)
// 2*ch_im_in*dim_kernel*dim_kernel
//layer->comp->size = 2 * 2 * layer->in->tensor->dim[2] * cl->kernel.w * cl->kernel.h;
// computational cost: K x K x Cin x Hour x Wout x Cout
layer->stat.macc = cl->kernel.w * cl->kernel.h * layer->in->tensor->dim[2] * tensor_size(layer->out->tensor);
return NN_SUCCESS;
}
nnom_status_t conv2d_trans_run(nnom_layer_t *layer)
{
nnom_conv2d_trans_layer_t *cl = (nnom_conv2d_trans_layer_t *)layer;
#ifdef NNOM_USING_CHW
// no support for CHW yet
return NN_ARGUMENT_ERROR;
#else
//return conv2d_run(layer);
local_conv_trans_HWC_q7_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data, layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h, cl->pad.w, cl->pad.h, cl->stride.w, cl->stride.h, cl->dilation.w, cl->dilation.h,
cl->bias->p_data, cl->bias_lshift[0], cl->output_rshift[0],
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
return NN_SUCCESS;
#endif
}

View File

@@ -0,0 +1,88 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_cropping.h"
nnom_layer_t * cropping_s(const nnom_cropping_config_t *config)
{
nnom_layer_t *layer = Cropping(config->pad);
if(layer)
layer->config = (void*) config;
return layer;
}
// Cropping layer
nnom_layer_t *Cropping(nnom_border_t pad)
{
nnom_layer_t *layer;
// most setting are the same as zero padding
layer = ZeroPadding(pad);
// now change to cropping
layer->type = NNOM_CROPPING;
layer->run = cropping_run;
layer->build = cropping_build;
return layer;
}
nnom_status_t cropping_build(nnom_layer_t* layer)
{
nnom_cropping_layer_t *cl = (nnom_cropping_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
// copy then change later.
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// output shape
if(layer->in->tensor->dim[1] <= (cl->pad.left + cl->pad.right) ||
layer->in->tensor->dim[0] <= (cl->pad.top + cl->pad.bottom))
return NN_ARGUMENT_ERROR;
layer->out->tensor->dim[0] = layer->in->tensor->dim[0] - (cl->pad.top + cl->pad.bottom);
layer->out->tensor->dim[1] = layer->in->tensor->dim[1] - (cl->pad.left + cl->pad.right);
layer->out->tensor->dim[2] = layer->in->tensor->dim[2];
return NN_SUCCESS;
}
nnom_status_t cropping_run(nnom_layer_t * layer)
{
nnom_cropping_layer_t *cl = (nnom_cropping_layer_t*)layer;
#ifdef NNOM_USING_CHW
local_cropping_CHW_q7(
#else
local_cropping_HWC_q7(
#endif
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->pad.top,
cl->pad.bottom,
cl->pad.left,
cl->pad.right,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0]);
return NN_SUCCESS;
}

View File

@@ -0,0 +1,207 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_dense.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_layer_t *dense_s(const nnom_dense_config_t *config)
{
nnom_dense_layer_t *layer;
nnom_buf_t *comp;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_dense_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_dense_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_DENSE;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
comp->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
layer->super.comp = comp;
// set run and outshape methods
layer->super.run = dense_run;
layer->super.build = dense_build;
layer->super.free = dense_free;
// set parameters
layer->output_unit = tensor_get_num_channel(config->weight);
layer->bias = config->bias;
layer->weight = config->weight;
// set shifts
layer->output_rshift = (nnom_qformat_param_t *)config->output_shift;
layer->bias_lshift = (nnom_qformat_param_t *)config->bias_shift;
// set config
layer->super.config = (void*) config;
return (nnom_layer_t *)layer;
}
nnom_layer_t *Dense(size_t output_unit, const nnom_weight_t *w, const nnom_bias_t *b)
{
nnom_dense_layer_t *layer;
nnom_buf_t *comp;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_dense_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_dense_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_DENSE;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
comp->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
layer->super.comp = comp;
// set run and outshape methods
layer->super.run = dense_run;
layer->super.build = dense_build;
// set parameters
layer->output_unit = output_unit; // this is no longer needed. the information is contained in the weight tensor.
layer->weight = new_tensor(NNOM_QTYPE_PER_TENSOR, 2, output_unit);
layer->bias = new_tensor(NNOM_QTYPE_PER_TENSOR, 1, output_unit);
// configure weight tensor manually to support new tensor-based backends.
// needs to be very careful
{
// config weight
nnom_shape_data_t dim[2] = {0, output_unit}; // the first dim doesnt matter here. will be file in later.
*(layer->weight->q_offset) = 0; // we have no support of offset here
*(layer->weight->q_dec) = 0; // this is not even correct
layer->weight->p_data = (void*)w->p_value;
layer->weight->bitwidth = 8;
layer->weight->qtype = NNOM_QTYPE_PER_TENSOR;
nnom_memcpy(layer->weight->dim, dim, layer->weight->num_dim * sizeof(nnom_shape_data_t));
// config bias
dim[0] = output_unit;
*(layer->bias->q_offset) = 0; // we have no support of offset here
*(layer->bias->q_dec) = 0; // this is not even correct
layer->bias->p_data = (void*)b->p_value;
layer->bias->bitwidth = 8;
layer->weight->qtype = NNOM_QTYPE_PER_TENSOR;
nnom_memcpy(layer->bias->dim, dim, layer->bias->num_dim * sizeof(nnom_shape_data_t));
}
// set output shifts
layer->output_rshift = (nnom_qformat_param_t *)&w->shift;
layer->bias_lshift = (nnom_qformat_param_t *)&b->shift;
return (nnom_layer_t *)layer;
}
nnom_status_t dense_build(nnom_layer_t *layer)
{
nnom_dense_layer_t *cl = (nnom_dense_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, 1, tensor_get_num_channel(layer->in->tensor));
// setup new tensor
nnom_shape_data_t dim[1] = {cl->output_unit};
tensor_set_attr(layer->out->tensor, cl->weight->q_dec, cl->weight->q_offset, dim, 1, 8); // test, this is not correct
// calculate the output tensor q format, only support per tensor quantise now
layer->out->tensor->q_dec[0] = layer->in->tensor->q_dec[0] + cl->weight->q_dec[0] - cl->output_rshift[0];
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// vec_buffer size: dim_vec (*2, q7->q15) ? I am not sure this is right
layer->comp->size = tensor_size(layer->in->tensor)*2;
// computational cost: In * out
layer->stat.macc = tensor_size(layer->in->tensor) * tensor_size(layer->out->tensor);
return NN_SUCCESS;
}
nnom_status_t dense_free(nnom_layer_t *layer)
{
// free weight and bias tensor when we are not initialised from structured configuration.
if(!layer->config)
{
nnom_dense_layer_t* cl = (nnom_dense_layer_t*)layer;
delete_tensor(cl->weight);
delete_tensor(cl->bias);
}
return NN_SUCCESS;
}
nnom_status_t dense_run(nnom_layer_t *layer)
{
nnom_status_t result = NN_SUCCESS;
nnom_dense_layer_t *cl = (nnom_dense_layer_t *)(layer);
nnom_qformat_param_t bias_shift = cl->bias_lshift[0]; // this is not correct but a temporary fix solution for backward compatibility.
nnom_qformat_param_t output_shift = cl->output_rshift[0];
#if !(DENSE_WEIGHT_OPT)
#ifdef NNOM_USING_CMSIS_NN
result = (nnom_status_t)arm_fully_connected_q7(
#else
local_fully_connected_q7(
#endif
#else
#ifdef NNOM_USING_CMSIS_NN
result = (nnom_status_t)arm_fully_connected_q7_opt(
#else
local_fully_connected_q7_opt(
#endif
#endif
layer->in->tensor->p_data,
cl->weight->p_data,
tensor_size(layer->in->tensor), layer->out->tensor->dim[0],
bias_shift, output_shift,
cl->bias->p_data,
layer->out->tensor->p_data, (q15_t *)(layer->comp->mem->blk));
return result;
}

View File

@@ -0,0 +1,140 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_dw_conv2d.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_layer_t *dw_conv2d_s(const nnom_conv2d_config_t *config)
{
nnom_layer_t *layer;
layer = conv2d_s(config);
if (layer)
{
layer->type = NNOM_DW_CONV_2D;
layer->run = dw_conv2d_run;
layer->build = dw_conv2d_build;
}
return layer;
}
nnom_layer_t *DW_Conv2D(uint32_t multiplier, nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_3d_shape_t d, nnom_padding_t pad_type,
const nnom_weight_t *w, const nnom_bias_t *b)
{
nnom_layer_t *layer = Conv2D(multiplier, k, s, d, pad_type, w, b); // passing multiplier in .
if (layer != NULL)
{
layer->type = NNOM_DW_CONV_2D;
layer->run = dw_conv2d_run;
layer->build = dw_conv2d_build;
}
return layer;
}
nnom_status_t dw_conv2d_build(nnom_layer_t *layer)
{
nnom_conv2d_layer_t *cl = (nnom_conv2d_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor) * cl->filter_mult);
// copy then change later.
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// calculate the output tensor q format, only support per tensor quantise now
layer->out->tensor->q_dec[0] = layer->in->tensor->q_dec[0] + cl->weight->q_dec[0] - cl->output_rshift[0];
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// now we set up the tensor shape, always HWC format
layer->out->tensor->dim[0] = conv_output_length(layer->in->tensor->dim[0], cl->kernel.h, cl->padding_type, cl->stride.h, cl->dilation.h);
layer->out->tensor->dim[1] = conv_output_length(layer->in->tensor->dim[1], cl->kernel.w, cl->padding_type, cl->stride.w, cl->dilation.w);
layer->out->tensor->dim[2] = layer->in->tensor->dim[2] * cl->filter_mult; // channel stays the same
// fill padding
if (cl->padding_type == PADDING_SAME)
{
cl->pad.w = cl->dilation.w * (cl->kernel.w - 1) / 2;
cl->pad.h = cl->dilation.h * (cl->kernel.h - 1) / 2;
cl->pad.c = 0;
}
// bufferA size:
#ifdef NNOM_USING_CMSIS_NN
layer->comp->size = 2 * 2 * (layer->in->tensor->dim[2] / cl->filter_mult) * cl->kernel.w * cl->kernel.h;
#endif
// computational cost: K x K x Cin x Hout x Wout x Multiplier
// or : K x K x Cout x Hout x Wout
layer->stat.macc = cl->kernel.w * cl->kernel.h * tensor_size(layer->out->tensor);
return NN_SUCCESS;
}
nnom_status_t dw_conv2d_run(nnom_layer_t *layer)
{
nnom_status_t result = NN_SUCCESS;
nnom_conv2d_layer_t *cl = (nnom_conv2d_layer_t *)layer;
#ifndef NNOM_USING_CHW
#ifdef NNOM_USING_CMSIS_NN
// Current CMSIS-NN does not support dilation
if(cl->dilation.w ==1 && cl->dilation.h == 1 && cl->weight->qtype == NNOM_QTYPE_PER_TENSOR && cl->filter_mult == 1)
{
// CMSIS-NN only support 1 mulplipier in depthwise conv
if (layer->in->tensor->dim[2] % 2 != 0 || layer->out->tensor->dim[2] % 2)
return NN_ARGUMENT_ERROR;
result = (nnom_status_t)arm_depthwise_separable_conv_HWC_q7_nonsquare(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
cl->bias->p_data,
cl->bias_lshift[0], cl->output_rshift[0],
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], (q15_t *)(layer->comp->mem->blk), NULL);
}
else
#endif
local_depthwise_separable_conv_HWC_q7_nonsquare(
#else
local_depthwise_separable_conv_CHW_q7_nonsquare(
#endif
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->weight->p_data,
layer->out->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
cl->dilation.w, cl->dilation.h,
cl->bias->p_data,
cl->bias_lshift, cl->output_rshift, cl->weight->qtype,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0], NULL, NULL);
return result;
}

View File

@@ -0,0 +1,84 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_flatten.h"
nnom_layer_t *flatten_s(const nnom_flatten_config_t *config)
{
nnom_layer_t *layer = Flatten();
if(layer)
layer->config = (void*) config;
return layer;
}
nnom_layer_t *Flatten(void)
{
nnom_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->type = NNOM_FLATTEN;
layer->run = flatten_run;
layer->build = flatten_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
#ifdef NNOM_USING_CHW
out->type = NNOM_TENSOR_BUF_TEMP; // test for CHW format
#else
out->type = NNOM_TENSOR_BUF_NULL;
#endif
// put in & out on the layer.
layer->in = io_init(layer, in);
layer->out = io_init(layer, out);
return layer;
}
nnom_status_t flatten_build(nnom_layer_t *layer)
{
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
// setup new tensor
nnom_shape_data_t dim[1] = {tensor_size(layer->in->tensor)};
tensor_set_attr(layer->out->tensor, layer->in->tensor->q_dec, layer->in->tensor->q_offset, dim, 1, 8);
return NN_SUCCESS;
}
nnom_status_t flatten_run(nnom_layer_t *layer)
{
#ifdef NNOM_USING_CHW
// CHW format must reorder to HWC for dense layer and all other 1D layer (?)
tensor_chw2hwc_q7(layer->out->tensor, layer->in->tensor);
#endif
return NN_SUCCESS;
}

View File

@@ -0,0 +1,145 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_global_pool.h"
nnom_layer_t * global_maxpool_s(const nnom_global_pool_config_t *config)
{
nnom_maxpool_layer_t * cl = (nnom_maxpool_layer_t *)GlobalMaxPool();
if(cl)
{
cl->super.config = (void*) config;
cl->output_shift = config->output_shift;
}
return (nnom_layer_t *)cl;
}
nnom_layer_t * global_avgpool_s(const nnom_global_pool_config_t *config)
{
nnom_maxpool_layer_t * cl = (nnom_maxpool_layer_t *)GlobalAvgPool();
if(cl)
{
cl->super.config = (void*) config;
cl->output_shift = config->output_shift;
}
return (nnom_layer_t *)cl;
}
nnom_layer_t * global_sumpool_s(const nnom_global_pool_config_t *config)
{
nnom_maxpool_layer_t * cl = (nnom_maxpool_layer_t *)GlobalSumPool();
if(cl)
{
cl->super.config = (void*) config;
cl->output_shift = config->output_shift;
}
return (nnom_layer_t *)cl;
}
nnom_layer_t *GlobalMaxPool(void)
{
// create the normal pooling layer, the parameters are left empty to fill in later.
// parameters will be filled in in global_pooling_build()
nnom_layer_t *layer = MaxPool(kernel(0, 0), stride(0, 0), PADDING_VALID);
// change to global max pool
if (layer != NULL)
{
layer->type = NNOM_GLOBAL_MAXPOOL;
layer->build = global_pool_build;
}
return (nnom_layer_t *)layer;
}
nnom_layer_t *GlobalAvgPool(void)
{
// create the normal pooling layer, the parameters are left empty to fill in later.
// parameters will be filled in global_pooling_build() remotely
nnom_layer_t *layer = MaxPool(kernel(0, 0), stride(0, 0), PADDING_VALID);
// change some parameters to be recognised as avg pooling
if (layer != NULL)
{
layer->type = NNOM_GLOBAL_AVGPOOL;
layer->run = avgpool_run; // global and basic pooling share the same runner
layer->build = global_pool_build;
}
return (nnom_layer_t *)layer;
}
nnom_layer_t *GlobalSumPool(void)
{
// create the normal pooling layer, the parameters are left empty to fill in later.
// parameters will be filled in global_pooling_build() remotely
nnom_layer_t *layer = MaxPool(kernel(0, 0), stride(0, 0), PADDING_VALID);
// change some parameters to be recognised as avg pooling
if (layer != NULL)
{
layer->type = NNOM_GLOBAL_SUMPOOL;
layer->run = sumpool_run; // global and basic pooling share the same runner
layer->build = global_pool_build;
}
return (nnom_layer_t *)layer;
}
nnom_status_t global_pool_build(nnom_layer_t *layer)
{
nnom_maxpool_layer_t *cl = (nnom_maxpool_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, 1, tensor_get_num_channel(layer->in->tensor));
nnom_shape_data_t dim[1] = {tensor_get_num_channel(layer->in->tensor)}; // fill the first 2 dim later
tensor_set_attr_v(layer->out->tensor, layer->in->tensor->q_dec[0], 0, dim, sizeof(dim)/sizeof(nnom_shape_data_t), 8);
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// different from other *_build(), the kernel..padding left by layer API needs to be set in here
// due to the *_run() methods of global pooling are using the normall pooling's.
// fill in the parameters left by layer APIs (GlobalAvgPool and MaxAvgPool)
cl->kernel = shape(layer->in->tensor->dim[0], layer->in->tensor->dim[1], 1);
cl->stride = shape(1, 1, 1);
cl->pad = shape(0, 0, 0);
cl->padding_type = PADDING_VALID;
// additionally, avg pooling require computational buffer, which is 2*dim_im_out*ch_im_in
if (layer->type == NNOM_AVGPOOL || layer->type == NNOM_GLOBAL_AVGPOOL)
{
// bufferA size: 2*dim_im_out*ch_im_in
layer->comp->size = 2 * layer->out->tensor->dim[0] * layer->in->tensor->dim[2];
}
// additional for sumpool
if (layer->type == NNOM_SUMPOOL || layer->type == NNOM_GLOBAL_SUMPOOL)
layer->comp->size = 4 * tensor_size(layer->out->tensor);
return NN_SUCCESS;
}

View File

@@ -0,0 +1,338 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-08-24 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_gru_cell.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_rnn_cell_t *gru_cell_s(const nnom_gru_cell_config_t* config)
{
nnom_gru_cell_t *cell;
cell = nnom_mem(sizeof(nnom_gru_cell_t));
if (cell == NULL)
return NULL;
// set methods
cell->super.run = gru_cell_run;
cell->super.build = gru_cell_build;
cell->super.free = gru_cell_free;
cell->super.config = (void*) config;
cell->super.units = config->units;
cell->super.type = NNOM_GRU_CELL;
// set parameters
cell->bias = config->bias;
cell->weights = config->weights;
cell->recurrent_weights = config->recurrent_weights;
// q format for intermediate calculation
cell->q_dec_h = config->q_dec_h;
cell->q_dec_z = config->q_dec_z;
return (nnom_rnn_cell_t *)cell;
}
nnom_status_t gru_cell_free(nnom_rnn_cell_t* cell)
{
return NN_SUCCESS;
}
// the state buffer and computational buffer shape of the cell
nnom_status_t gru_cell_build(nnom_rnn_cell_t* cell)
{
nnom_layer_t *layer = cell->layer;
nnom_gru_cell_t *c = (nnom_gru_cell_t *)cell;
// calculate output shift for the 2 calculation.
// hw = the product of hidden x weight, iw = the product of input x weight
// due to the addition of them, they must have same q format.
// that is -> c->q_dec_z;
// for the dots in cell: output shift = input_dec + weight_dec - output_dec
c->oshift_hw = c->q_dec_h + c->recurrent_weights->q_dec[0] - c->q_dec_z;
c->oshift_iw = layer->in->tensor->q_dec[0] + c->weights->q_dec[0] - c->q_dec_z;
// bias shift = bias_dec - out_dec
c->bias_shift = layer->in->tensor->q_dec[0] + c->weights->q_dec[0] - c->bias->q_dec[0];
// state size = one timestamp output size.
cell->state_size = cell->units * 2; // Q15
// comp buffer size: not required
cell->comp_buf_size = cell->units * (3*3) * 2 + cell->feature_size * 2; //q15 + input q7->q15 buffer.
// finally, calculate the MAC for info for each timestamp
cell->macc = cell->feature_size * cell->units *3 // input: feature * state * 3 gates
+ cell->units * cell->units *8 // recurrent, state * output_unit * (5 gate + 3 mult)
+ cell->units * (3 + 3 + 5); // 3 gates, 3 mult, 5 addition
return NN_SUCCESS;
}
// keras implementation as below.
/*
def step(cell_inputs, cell_states):
"""Step function that will be used by Keras RNN backend."""
h_tm1 = cell_states[0]
# inputs projected by all gate matrices at once
matrix_x = K.dot(cell_inputs, kernel)
matrix_x = K.bias_add(matrix_x, input_bias)
x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1)
# hidden state projected by all gate matrices at once
matrix_inner = K.dot(h_tm1, recurrent_kernel)
matrix_inner = K.bias_add(matrix_inner, recurrent_bias)
recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner, 3,
axis=1)
z = nn.sigmoid(x_z + recurrent_z)
r = nn.sigmoid(x_r + recurrent_r)
hh = nn.tanh(x_h + r * recurrent_h)
# previous and candidate state mixed by update gate
h = z * h_tm1 + (1 - z) * hh
return h, [h]
*/
//
nnom_status_t gru_cell_run(nnom_rnn_cell_t* cell)
{
nnom_layer_t *layer = cell->layer;
nnom_gru_cell_t* c = (nnom_gru_cell_t*) cell;
int act_int_bit = 7 - c->q_dec_z;
// gate data
q15_t* x_z, *x_r, *x_h;
q15_t* recurrent_z, *recurrent_r, *recurrent_h;
q15_t* temp[3];
// bias
q7_t* bias = (q7_t*)c->bias->p_data;
q7_t* recurrent_bias = (q7_t*)c->bias->p_data + cell->units*3;
// state buffer
q15_t* h_tm1 = (q15_t*)cell->in_state;
q15_t* h_t = (q15_t*)cell->out_state;
// computing buffer
// low |-- buf0 --|-- buf1 --|-- buf2 --|-- input_q15 --|
q15_t *buf[3];
buf[0] = (q15_t*)layer->comp->mem->blk;
buf[1] = (q15_t*)layer->comp->mem->blk + cell->units*3;
buf[2] = (q15_t*)layer->comp->mem->blk + cell->units*6;
q15_t *in_q15_buf = (q15_t*)layer->comp->mem->blk + cell->units*9;
// input q7 cast to q15
local_q7_to_q15(cell->in_data, in_q15_buf, cell->feature_size);
// matrix_x = K.dot(cell_inputs, kernel) + bias --> buf0
#ifdef NNOM_USING_CMSIS_NN
arm_fully_connected_mat_q7_vec_q15_opt
#else
local_fully_connected_mat_q7_vec_q15_opt
#endif
(in_q15_buf, c->weights->p_data, cell->feature_size,
cell->units*3, c->bias_shift + 8, c->oshift_iw, bias, buf[0], NULL);
// matrix_intter = K.dot(h_tm1, recurrent_kernel) + bias -> buf1
#ifdef NNOM_USING_CMSIS_NN
arm_fully_connected_mat_q7_vec_q15_opt
#else
local_fully_connected_mat_q7_vec_q15_opt
#endif
(h_tm1, c->recurrent_weights->p_data, cell->units,
cell->units*3, c->bias_shift + 8, c->oshift_hw, recurrent_bias, buf[1], NULL);
// split to each gate
x_z = buf[0];
x_r = buf[0] + cell->units;
x_h = buf[0] + cell->units*2;
recurrent_z = buf[1];
recurrent_r = buf[1] + cell->units;
recurrent_h = buf[1] + cell->units*2;
// buffers
temp[0] = buf[2];
temp[1] = buf[2] + cell->units;
temp[2] = buf[2] + cell->units*2;
/* z = nn.sigmoid(x_z + recurrent_z) */
// 1. z1 = x_z + recurrent_z ---> temp[0]
local_add_q15(x_z, recurrent_z, temp[0], 0, cell->units);
// 2. z = sigmoid(z1)
local_sigmoid_q15(temp[0], cell->units, act_int_bit);
/* r = nn.sigmoid(x_r + recurrent_r) */
// 1. r1 = x_r + recurrent_r ---> temp[1]
local_add_q15(x_r, recurrent_r, temp[1], 0, cell->units);
// 2. r = sigmoid(r1)
local_sigmoid_q15(temp[1], cell->units, act_int_bit);
/* hh = nn.tanh(x_h + r * recurrent_h) */
// 1. hh1 = r * recurrent_h ---> temp[2]
local_mult_q15(temp[1], recurrent_h, temp[2], 15, cell->units);
// 2. hh2 = x_h + hh1 ---> temp[1]
local_add_q15(x_h, temp[2], temp[1], 0, cell->units);
// 3. hh = tanh(h2) ---> temp[1]
local_tanh_q15(temp[1], cell->units, act_int_bit);
/* h = z * h_tm1 + (1 - z) * hh */
// 1. h1 = z*h_tm1 ---> temp[2]
local_mult_q15(temp[0], h_tm1, temp[2], 15, cell->units);
// 2. h2 = 1 - z ---> h_t state buff
local_1_minor_z_q15(temp[0], h_t, 15, cell->units);
// 3. h3 = h2 * hh ---> temp[0]
local_mult_q15(h_t, temp[1], temp[0], 15, cell->units);
// h = h1 + h3
local_add_q15(temp[2], temp[0], h_t, 0, cell->units);
// finally, copy and convert state to output
local_q15_to_q7(h_t, cell->out_data, 8, cell->units);
return NN_SUCCESS;
}
// Researve for debugging, printing the intermediate variables/data.
#if 0
// delete after testing completed
static void print_variable_q15(q15_t *data,char*name, int dec_bit, int size)
{
printf("\n\n");
printf("%s", name);
for(int i = 0; i < size; i++)
{
if(i%8==0)
printf("\n");
printf("%f\t", (float) data[i] / (1 << dec_bit));
}
printf("\n");
}
//
nnom_status_t gru_cell_run(nnom_rnn_cell_t* cell)
{
nnom_layer_t *layer = cell->layer;
nnom_gru_cell_t* c = (nnom_gru_cell_t*) cell;
int act_int_bit = 7 - c->q_dec_z;
// gate data
q15_t* x_z, *x_r, *x_h;
q15_t* recurrent_z, *recurrent_r, *recurrent_h;
q15_t* temp[3];
// test
//nnom_memset(cell->in_data, 5 * (1<<layer->in->tensor->q_dec[0]), cell->feature_size);
// bias
q7_t* bias = (q7_t*)c->bias->p_data;
q7_t* recurrent_bias = (q7_t*)c->bias->p_data + cell->units*3;
// state buffer
q15_t* h_tm1 = (q15_t*)cell->in_state;
q15_t* h_t = (q15_t*)cell->out_state;
// computing buffer
// low |-- buf0 --|-- buf1 --|-- buf2 --|-- input_q15 --|
q15_t *buf[3];
buf[0] = (q15_t*)layer->comp->mem->blk;
buf[1] = (q15_t*)layer->comp->mem->blk + cell->units*3;
buf[2] = (q15_t*)layer->comp->mem->blk + cell->units*6;
q15_t *in_q15_buf = (q15_t*)layer->comp->mem->blk + cell->units*9;
// input q7 cast to q15
local_q7_to_q15(cell->in_data, in_q15_buf, cell->feature_size);
// matrix_x = K.dot(cell_inputs, kernel) + bias --> buf0
#ifdef NNOM_USING_CMSIS_NN
arm_fully_connected_mat_q7_vec_q15_opt
#else
local_fully_connected_mat_q7_vec_q15_opt
#endif
(in_q15_buf, c->weights->p_data, cell->feature_size,
cell->units*3, c->bias_shift + 8, c->oshift_iw, bias, buf[0], NULL);
// matrix_intter = K.dot(h_tm1, recurrent_kernel) + bias -> buf1
#ifdef NNOM_USING_CMSIS_NN
arm_fully_connected_mat_q7_vec_q15_opt
#else
local_fully_connected_mat_q7_vec_q15_opt
#endif
(h_tm1, c->recurrent_weights->p_data, cell->units,
cell->units*3, c->bias_shift + 8, c->oshift_hw, recurrent_bias, buf[1], NULL);
print_variable_q15(in_q15_buf, "input", layer->in->tensor->q_dec[0]+8, cell->feature_size);
print_variable_q15(buf[0], "matrix_x", c->q_dec_z+8, cell->units*3);
print_variable_q15(buf[1], "matrix_recurrent", c->q_dec_z+8, cell->units*3);
// split to each gate
x_z = buf[0];
x_r = buf[0] + cell->units;
x_h = buf[0] + cell->units*2;
recurrent_z = buf[1];
recurrent_r = buf[1] + cell->units;
recurrent_h = buf[1] + cell->units*2;
// buffers
temp[0] = buf[2];
temp[1] = buf[2] + cell->units;
temp[2] = buf[2] + cell->units*2;
// z = nn.sigmoid(x_z + recurrent_z)
// 1. z1 = x_z + recurrent_z ---> temp[0]
local_add_q15(x_z, recurrent_z, temp[0], 0, cell->units);
// 2. z = sigmoid(z1)
local_sigmoid_q15(temp[0], cell->units, act_int_bit);
print_variable_q15(temp[0], "z", 15, cell->units);
// r = nn.sigmoid(x_r + recurrent_r)
// 1. r1 = x_r + recurrent_r ---> temp[1]
local_add_q15(x_r, recurrent_r, temp[1], 0, cell->units);
// 2. r = sigmoid(r1)
local_sigmoid_q15(temp[1], cell->units, act_int_bit);
print_variable_q15(temp[1], "r", 15, cell->units);
// hh = nn.tanh(x_h + r * recurrent_h)
// 1. hh1 = r * recurrent_h ---> temp[2]
local_mult_q15(temp[1], recurrent_h, temp[2], 15, cell->units);
// 2. hh2 = x_h + h1 ---> temp[1]
local_add_q15(x_h, temp[2], temp[1], 0, cell->units);
// 3. hh = tanh(h2) ---> temp[1]
local_tanh_q15(temp[1], cell->units, act_int_bit);
print_variable_q15(temp[1], "hh", 15, cell->units);
// h = z * h_tm1 + (1 - z) * hh
// 1. h1 = z*h_tm1 ---> temp[2]
local_mult_q15(temp[0], h_tm1, temp[2], 15, cell->units);
print_variable_q15( temp[2], "h1", 15, cell->units);
// 2. h2 = 1 - z ---> h_t state buff
local_1_minor_z_q15(temp[0], h_t, 15, cell->units);
print_variable_q15( h_t, "h2", 15, cell->units);
// 3. h3 = h2 * hh ---> temp[0]
local_mult_q15(h_t, temp[1], temp[0], 15, cell->units);
print_variable_q15( temp[0], "h3", 15, cell->units);
// h = h1 + h3
local_add_q15(temp[2], temp[0], h_t, 0, cell->units);
print_variable_q15(h_t, "h", 15, cell->units);
// finally, copy and convert state to output
local_q15_to_q7(h_t, cell->out_data, 8, cell->units);
return NN_SUCCESS;
}
#endif

View File

@@ -0,0 +1,145 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_input.h"
nnom_layer_t *input_s(const nnom_io_config_t* config)
{
nnom_io_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
layer = nnom_mem(sizeof(nnom_io_layer_t) + sizeof(nnom_layer_io_t) * 2);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_io_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_INPUT;
layer->super.run = input_run;
layer->super.build = input_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_NULL;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
/*
// some other layers (Conv, pooling) are not supporting 12 d input, we still expand the 1,2 dimension to 3
// test -> native support 1,2,3 D input.
layer->super.in->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, config->tensor->num_dim, tensor_get_num_channel(config->tensor));
tensor_cpy_attr(layer->super.in->tensor, config->tensor);
layer->buf = config->tensor->p_data;
layer->dec_bit = config->tensor->q_dec[0];
*/
// set parameters
if(config->tensor->num_dim == 1) // test for 1d input, expend h = 1
layer->shape = shape(1, 1, config->tensor->dim[0]);
else if (config->tensor->num_dim == 2) // test for 1d input, expend h = 1
layer->shape = shape(1, config->tensor->dim[0], config->tensor->dim[1]);
else
layer->shape = shape(config->tensor->dim[0], config->tensor->dim[1], config->tensor->dim[2]);
layer->buf = config->tensor->p_data;
layer->dec_bit = config->tensor->q_dec[0];
// experimental: fixed input dim to 3
// input normally dont have a tensor, so we create one to store the initial data.
nnom_shape_data_t dim[3] = {layer->shape.h, layer->shape.w, layer->shape.c};
layer->super.in->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, 3, tensor_get_num_channel(config->tensor));
tensor_set_attr_v(layer->super.in->tensor, layer->dec_bit, 0, dim, sizeof(dim)/sizeof(nnom_shape_data_t), 8);
return (nnom_layer_t *)layer;
}
nnom_layer_t *Input(nnom_3d_shape_t input_shape, void *p_buf)
{
nnom_io_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
layer = nnom_mem(sizeof(nnom_io_layer_t) + sizeof(nnom_layer_io_t) * 2);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_io_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_INPUT;
layer->super.run = input_run;
layer->super.build = input_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_NULL;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
// set parameters
layer->shape = input_shape;
layer->buf = p_buf;
layer->dec_bit = 7;
// experimental: fixed input dim to 3
// input normally dont have a tensor, so we create one to store the initial data.
nnom_shape_data_t dim[3] = { input_shape.h, input_shape.w, input_shape.c };
layer->super.in->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, 3, input_shape.c);
tensor_set_attr_v(layer->super.in->tensor, layer->dec_bit, 0, dim, sizeof(dim)/sizeof(nnom_shape_data_t), 8);
return (nnom_layer_t *)layer;
}
nnom_status_t input_build(nnom_layer_t* layer)
{
// the input tensor of inputlayer has assigned previously
// output tensor
// 1. allocate a new tensor for output
// 2. set the same dim, qfmt to the new tensor.
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// now this build has passed the input tensors (shapes, formats) to the new tensors.
return NN_SUCCESS;
}
nnom_status_t input_run(nnom_layer_t *layer)
{
nnom_io_layer_t *cl = (nnom_io_layer_t *)layer;
#ifdef NNOM_USING_CHW
if(layer->in->tensor->num_dim == 3)
{
nnom_3d_shape_t shape = {layer->in->tensor->dim[0], layer->in->tensor->dim[1], layer->in->tensor->dim[2]};
hwc2chw_q7(shape, cl->buf, layer->in->tensor->p_data);
}
else if (layer->in->tensor->num_dim == 2)
{
nnom_3d_shape_t shape = {1, layer->in->tensor->dim[0], layer->in->tensor->dim[1]};
hwc2chw_q7(shape, cl->buf, layer->in->tensor->p_data);
}
else
#endif
nnom_memcpy(layer->in->tensor->p_data, cl->buf, tensor_size(layer->in->tensor));
return NN_SUCCESS;
}

View File

@@ -0,0 +1,81 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_lambda.h"
nnom_layer_t *lambda_s(const nnom_lambda_config_t * config)
{
nnom_lambda_layer_t *cl = (nnom_lambda_layer_t *)Lambda(
config->run_func_name,
config->build_func_name,
config->free_func_name,
config->parameters);
if(cl)
cl->super.config = (void*) config;
return (nnom_layer_t *)cl;
}
// TODO: extended to multiple IO layer
nnom_layer_t *Lambda(nnom_status_t (*run)(nnom_layer_t *),
nnom_status_t (*build)(nnom_layer_t *),
nnom_status_t (*free)(nnom_layer_t *),
void *parameters)
{
nnom_lambda_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_io_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_lambda_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set buf type.
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
// set io modules to the layer
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
// layer type
layer->super.type = NNOM_LAMBDA;
// user parameters
layer->parameters = parameters;
// free method
layer->super.free = free;
// output shape method. pass NULL in will use the default outshape method, which set the output shape same as input shape.
if (build == NULL)
layer->super.build = default_build;
else
layer->super.build = build;
// run method. default_run() will simply copy data from input tensor to output tensor.
if(run == NULL)
layer->super.run = default_run;
else
layer->super.run = run;
return (nnom_layer_t *)layer;
}

View File

@@ -0,0 +1,334 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-08-24 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_lstm_cell.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
// LSTM RNN
// unit = output shape
// type of activation
nnom_rnn_cell_t *lstm_cell_s(const nnom_lstm_cell_config_t* config)
{
nnom_lstm_cell_t *cell;
cell = nnom_mem(sizeof(nnom_lstm_cell_t));
if (cell == NULL)
return NULL;
// set methods
cell->super.run = lstm_cell_q7_q15_run;
cell->super.build = lstm_cell_q7_q15_build;
cell->super.free = lstm_cell_free;
cell->super.config = (void*) config;
cell->super.units = config->units;
cell->super.type = NNOM_LSTM_CELL;
// set parameters
cell->bias = config->bias;
cell->weights = config->weights;
cell->recurrent_weights = config->recurrent_weights;
// q format for intermediate calculation
cell->q_dec_c = config->q_dec_c;
cell->q_dec_h = config->q_dec_h;
cell->q_dec_z = config->q_dec_z;
return (nnom_rnn_cell_t *)cell;
}
nnom_status_t lstm_cell_free(nnom_rnn_cell_t* cell)
{
return NN_SUCCESS;
}
// keras implementation as below.
/*
def step(cell_inputs, cell_states):
"""Step function that will be used by Keras RNN backend."""
h_tm1 = cell_states[0] # previous memory state
c_tm1 = cell_states[1] # previous carry state
z = K.dot(cell_inputs, kernel) -> q_iw
z += K.dot(h_tm1, recurrent_kernel) -> q_hw
z = K.bias_add(z, bias)
z0, z1, z2, z3 = array_ops.split(z, 4, axis=1)
i = nn.sigmoid(z0)
f = nn.sigmoid(z1)
c = f * c_tm1 + i * nn.tanh(z2)
o = nn.sigmoid(z3)
h = o * nn.tanh(c)
return h, [h, c]
*/
// the state buffer and computational buffer shape of the cell
nnom_status_t lstm_cell_q7_q15_build(nnom_rnn_cell_t* cell)
{
nnom_layer_t *layer = cell->layer;
nnom_lstm_cell_t *c = (nnom_lstm_cell_t *)cell;
// calculate output shift for the 2 calculation.
// hw = the product of hidden x weight, iw = the product of input x weight
// due to the addition of them, they must have same q format.
// that is -> c->q_dec_z;
// for the dots in cell: output shift = input_dec + weight_dec - output_dec
c->oshift_hw = c->q_dec_h + c->recurrent_weights->q_dec[0] - c->q_dec_z;
c->oshift_iw = layer->in->tensor->q_dec[0] + c->weights->q_dec[0] - c->q_dec_z;
// bias shift = bias_dec - out_dec
c->bias_shift = layer->in->tensor->q_dec[0] + c->weights->q_dec[0] - c->bias->q_dec[0];
// state size = one timestamp output size.
cell->state_size = cell->units * 2 * 2; // Q15
// // comp buffer size: not required
cell->comp_buf_size = cell->units * 12 * 2 + cell->feature_size * 2; //q15 + input q7->q15 buffer.
// finally, calculate the MAC for info (for each timestamp)
cell->macc = cell->feature_size * cell->units *4 // input: feature * state * 4 gates
+ cell->units * cell->units *4 // recurrent, state
+ cell->units *10; // output_unit * (5 gate + 3 mult + 2 addition)
return NN_SUCCESS;
}
// Q7 input output
// Q7 weights
// Q15 states and intermediate buffer
nnom_status_t lstm_cell_q7_q15_run(nnom_rnn_cell_t* cell)
{
nnom_layer_t *layer = cell->layer;
nnom_lstm_cell_t* c = (nnom_lstm_cell_t*) cell;
int act_int_bit = 7 - c->q_dec_z;
// state buffer
// low |-- hidden --|-- carry --| high
q15_t* h_tm1 = (q15_t*)cell->in_state;
q15_t* c_tm1 = (q15_t*)cell->in_state + cell->units;
q15_t* o_state[2];
o_state[0] = (q15_t*)cell->out_state;
o_state[1] = (q15_t*)cell->out_state + cell->units;
// computing buffer
// low |-- buf0 --|-- buf1 --|-- buf2 --|-- input q15 --|
q15_t* z[4];
q15_t *buf0, *buf1, *buf2, *in_q15_buf;
buf0 = (q15_t*)layer->comp->mem->blk;
buf1 = (q15_t*)layer->comp->mem->blk + cell->units*4;
buf2 = (q15_t*)layer->comp->mem->blk + cell->units*8;
in_q15_buf = (q15_t*)layer->comp->mem->blk + cell->units*12;
// input q7 -> q15
local_q7_to_q15(cell->in_data, in_q15_buf, cell->feature_size);
// z1 = K.dot(cell_inputs, kernel) + bias -> buf1
#ifdef NNOM_USING_CMSIS_NN
arm_fully_connected_mat_q7_vec_q15_opt
#else
local_fully_connected_mat_q7_vec_q15_opt
#endif
(in_q15_buf, c->weights->p_data, cell->feature_size, cell->units*4, c->bias_shift + 8, c->oshift_iw, c->bias->p_data, buf1, NULL);
// z2 = K.dot(h_tm1, recurrent_kernel) -> buf2
// --- arm version must use bias, so we have to use local implementation
local_fully_connected_mat_q7_vec_q15_opt(h_tm1, c->recurrent_weights->p_data,
cell->units, cell->units*4, 0, c->oshift_hw, NULL, buf2, NULL);
// z = z1 + z2 -> buf0
local_add_q15(buf1, buf2, buf0, 0, cell->units*4);
// split the data to each gate
z[0] = buf0;
z[1] = buf0 + cell->units;
z[2] = buf0 + cell->units*2;
z[3] = buf0 + cell->units*3;
// i = nn.sigmoid(z0)
local_sigmoid_q15(z[0], cell->units, act_int_bit);
// f = nn.sigmoid(z1)
local_sigmoid_q15(z[1], cell->units, act_int_bit);
// o = nn.sigmoid(z3)
local_sigmoid_q15(z[3], cell->units, act_int_bit);
/* c = f * c_tm1 + i * nn.tanh(z2) for the step 1-3. */
// 1. i * tanh(z2) -> buf1
local_tanh_q15(z[2], cell->units, act_int_bit);
local_mult_q15(z[0], z[2], buf1, 30 - (c->q_dec_c+8), cell->units);
// 2. f * c_tm1 -> o_state[0]
local_mult_q15(z[1], c_tm1, o_state[0], 15, cell->units);
// 3. c = i*tanh + f*c_tm1 -> o_state[1] ** fill the upper state (carry)
local_add_q15(buf1, o_state[0], o_state[1], 0, cell->units);
/* h = o * nn.tanh(c) -> o_state[0] for the step 1-2 */
// 1. tanh(c) -> buf2 --- first copy then activate.
nnom_memcpy(buf2, o_state[1], cell->units*2);
local_tanh_q15(buf2, cell->units, 7 - c->q_dec_c); // this int bit is under 8bit
// 2. h = o*tanh(c) -> o_state[0] ** fill the lower state (memory, hidden)
local_mult_q15(z[3], buf2, o_state[0], 15, cell->units);
// copy and shift q15 to q7 ** (copy hidden to output)
local_q15_to_q7(o_state[0], cell->out_data, 8, cell->units);
return NN_SUCCESS;
}
// researve for debugging, printing the intermediate products and variables
#if 0
static void print_variable(q7_t* data,char*name, int dec_bit, int size)
{
printf("\n");
printf("%s\n", name);
for(int i = 0; i < size; i++)
{
if(i%8==0)
printf("\n");
printf("%f\t", (float) data[i] / (1 << dec_bit));
}
printf("\n");
}
static void print_variable_q15(q15_t *data,char*name, int dec_bit, int size)
{
printf("\n\n");
printf("%s", name);
for(int i = 0; i < size; i++)
{
if(i%8==0)
printf("\n");
printf("%f\t", (float) data[i] / (1 << dec_bit));
}
printf("\n");
}
// Q7 input output
// Q7 weights
// Q15 states and intermediate buffer
nnom_status_t lstm_cell_q7_q15_run(nnom_rnn_cell_t* cell)
{
nnom_layer_t *layer = cell->layer;
nnom_rnn_layer_t* cl = (nnom_rnn_layer_t *) layer;
nnom_lstm_cell_t* c = (nnom_lstm_cell_t*) cell;
int act_int_bit = 7 - c->q_dec_z;
// test
//nnom_memset(cell->in_data, 32, cell->feature_size);
// state buffer
// low |-- hidden --|-- carry --| high
q15_t* h_tm1 = (q15_t*)cell->in_state;
q15_t* c_tm1 = (q15_t*)cell->in_state + cell->units;
q15_t* o_state[2];
o_state[0] = (q15_t*)cell->out_state;
o_state[1] = (q15_t*)cell->out_state + cell->units;
// computing buffer
// low |-- buf0 --|-- buf1 --|-- buf2 --|-- input q15 --|
q15_t* z[4];
q15_t *buf0, *buf1, *buf2, *in_q15_buf;
buf0 = (q15_t*)layer->comp->mem->blk;
buf1 = (q15_t*)layer->comp->mem->blk + cell->units*4;
buf2 = (q15_t*)layer->comp->mem->blk + cell->units*8;
in_q15_buf = (q15_t*)layer->comp->mem->blk + cell->units*12;
// input q7 -> q15
//local_q7_to_q15_no_shift(cell->in_data, in_q15_buf, cell->feature_size);
local_q7_to_q15(cell->in_data, in_q15_buf, cell->feature_size);
print_variable_q15(in_q15_buf, "input", layer->in->tensor->q_dec[0] + 8, cell->feature_size);
print_variable_q15(h_tm1, "h_tml", 15, cell->units);
print_variable_q15(c_tm1, "c_tml", c->q_dec_c + 8, cell->units);
// z1 = K.dot(cell_inputs, kernel) + bias -> buf1
#ifdef NNOM_USING_CMSIS_NN
arm_fully_connected_mat_q7_vec_q15_opt
#else
local_fully_connected_mat_q7_vec_q15_opt
#endif
(in_q15_buf, c->weights->p_data, cell->feature_size, cell->units*4, c->bias_shift + 8, c->oshift_iw, c->bias->p_data, buf1, NULL);
// z2 = K.dot(h_tm1, recurrent_kernel) -> buf2
// arm version must use bias, so we have to use local implementation
local_fully_connected_mat_q7_vec_q15_opt(h_tm1, c->recurrent_weights->p_data,
cell->units, cell->units*4, 0, c->oshift_hw, NULL, buf2, NULL);
// z = z1 + z2 -> buf0
local_add_q15(buf1, buf2, buf0, 0, cell->units*4);
print_variable_q15(buf0, "z", c->q_dec_z + 8, cell->units*4);
print_variable_q15(buf1, "z1", c->q_dec_z + 8, cell->units*4);
print_variable_q15(buf2, "z2", c->q_dec_z + 8, cell->units*4);
// split the data to each gate
z[0] = buf0;
z[1] = buf0 + cell->units;
z[2] = buf0 + cell->units*2;
z[3] = buf0 + cell->units*3;
// i = nn.sigmoid(z0)
local_sigmoid_q15(z[0], cell->units, act_int_bit);
// f = nn.sigmoid(z1)
local_sigmoid_q15(z[1], cell->units, act_int_bit);
// o = nn.sigmoid(z3)
local_sigmoid_q15(z[3], cell->units, act_int_bit);
print_variable_q15(z[0], "z[0] - i", 15, cell->units);
print_variable_q15(z[1], "z[1] - f", 15, cell->units);
print_variable_q15(z[3], "z[3] - o", 15, cell->units);
/* c = f * c_tm1 + i * nn.tanh(z2) for the step 1-3. */
// 1. i * tanh(z2) -> buf1
local_tanh_q15(z[2], cell->units, act_int_bit);
print_variable_q15(z[2], "z[2] - ?", 15, cell->units);
local_mult_q15(z[0], z[2], buf1, 30 - (c->q_dec_c+8), cell->units); //q0.15 * q0.15 >> (shift) = (q_c + 8) // i am not very sure
print_variable_q15(buf1, "c2: i * tanh(z2) ", c->q_dec_c+8, cell->units);
// 2. f * c_tm1 -> o_state[0]
local_mult_q15(z[1], c_tm1, o_state[0], 15, cell->units);
print_variable_q15(o_state[0], "c1: f * c_tm1", c->q_dec_c+8, cell->units);
// 3. c = i*tanh + f*c_tm1 -> o_state[1] ** fill the upper state (carry)
local_add_q15(buf1, o_state[0], o_state[1], 0, cell->units);
print_variable_q15(o_state[1], "c = c1+c2", c->q_dec_c+8, cell->units);
/* h = o * nn.tanh(c) -> o_state[0] for the step 1-2 */
// 1. tanh(c) -> buf2 --- first copy then activate.
nnom_memcpy(buf2, o_state[1], cell->units*2);
local_tanh_q15(buf2, cell->units, 7 - c->q_dec_c); // this int bit is under 8bit
print_variable_q15(buf2, "tanh(c)", 15, cell->units);
// 2. h = o*tanh(c) -> o_state[0] ** fill the lower state (memory, hidden)
local_mult_q15(z[3], buf2, o_state[0], 15, cell->units);
print_variable_q15(o_state[0], "h = o*tanh(c)", 15, cell->units);
// copy and shift q15 to q7 ** (copy hidden to output)
local_q15_to_q7(o_state[0], cell->out_data, 8, cell->units);
print_variable(cell->out_data, "q7 output)", 7, cell->units);
return NN_SUCCESS;
}
#endif

View File

@@ -0,0 +1,239 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_matrix.h"
// TODO, completely change this file to local version
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_status_t matrix_build(nnom_layer_t *layer);
nnom_layer_t *add_s(const nnom_matrix_config_t * config)
{
nnom_matrix_layer_t *cl = (nnom_matrix_layer_t *) Add(config->output_shift);
if(cl)
cl->super.config = (void*) config;
return (nnom_layer_t *)cl;
}
nnom_layer_t *sub_s(const nnom_matrix_config_t * config)
{
nnom_matrix_layer_t *cl = (nnom_matrix_layer_t *) Sub(config->output_shift);
if(cl)
cl->super.config = (void*) config;
return (nnom_layer_t *)cl;
}
nnom_layer_t *mult_s(const nnom_matrix_config_t * config)
{
nnom_matrix_layer_t *cl = (nnom_matrix_layer_t *) Mult(config->output_shift);
if(cl)
cl->super.config = (void*) config;
return (nnom_layer_t *)cl;
}
nnom_layer_t *Add(int16_t oshift)
{
nnom_matrix_layer_t *cl = (nnom_matrix_layer_t *)_same_shape_matrix_layer();
if (cl == NULL)
return NULL;
// set type in layer parent
cl->super.type = NNOM_ADD;
cl->super.run = add_run;
cl->oshift = oshift;
return (nnom_layer_t *)cl;
}
nnom_layer_t *Sub(int16_t oshift)
{
nnom_matrix_layer_t *cl = (nnom_matrix_layer_t *)_same_shape_matrix_layer();
if (cl == NULL)
return NULL;
// set type in layer parent
cl->super.type = NNOM_SUB;
cl->super.run = sub_run;
cl->oshift = oshift;
return (nnom_layer_t *)cl;
}
nnom_layer_t *Mult(int16_t oshift)
{
nnom_matrix_layer_t *cl = (nnom_matrix_layer_t *)_same_shape_matrix_layer();
if (cl == NULL)
return NULL;
// set type in layer parent
cl->super.type = NNOM_MULT;
cl->super.run = mult_run;
cl->oshift = oshift;
return (nnom_layer_t *)cl;
}
// init a base layer instance with same shape 1 in 1 out. More IO can be added later
// mainly used by matrix calculation (add, mult, sub)
nnom_layer_t *_same_shape_matrix_layer()
{
nnom_matrix_layer_t *layer;
nnom_layer_io_t *in, *out;
//nnom_buf_t *comp;
size_t mem_size;
// apply a block memory for all the sub handles.
mem_size = sizeof(nnom_matrix_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_matrix_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
//comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.build = matrix_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
//comp->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
//layer->super.comp = comp;
return (nnom_layer_t*)layer;
}
nnom_status_t matrix_build(nnom_layer_t *layer)
{
// get the last layer's output as input shape (if more than one)
nnom_layer_io_t *in = layer->in;
while(in)
{
in->tensor = in->hook.io->tensor;
in = in->aux;
}
// output tensor
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR,layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// now this build has passed the input tensors (shapes, formats) to the new tensors.
return NN_SUCCESS;
}
nnom_status_t add_run(nnom_layer_t *layer)
{
nnom_matrix_layer_t* cl = (nnom_matrix_layer_t*)layer;
nnom_layer_io_t *in = layer->in;;
size_t t_size = tensor_size(layer->out->tensor);
int32_t oshift = cl->oshift;
size_t num_input = nnom_io_length(layer->in);
q7_t *input_mem_blk[MAX_INPUT_LAYER];
// if there is only 2 matrix
if(num_input == 2)
{
#ifdef NNOM_USING_CMSIS_NN
if(oshift == 0)
arm_add_q7(layer->in->tensor->p_data, layer->in->aux->tensor->p_data, layer->out->tensor->p_data, t_size);
else
#endif
local_add_q7(layer->in->tensor->p_data, layer->in->aux->tensor->p_data, layer->out->tensor->p_data, oshift, t_size);
}
else
{
for(int i = 0; i < num_input; i++)
{
input_mem_blk[i] = in->tensor->p_data;
in = in->aux;
}
local_multiple_add_q7(layer->out->tensor->p_data, oshift, t_size, num_input, input_mem_blk);
}
return NN_SUCCESS;
}
nnom_status_t sub_run(nnom_layer_t *layer)
{
nnom_matrix_layer_t* cl = (nnom_matrix_layer_t*)layer;
nnom_layer_io_t *in = layer->in;
size_t t_size = tensor_size(layer->out->tensor);
int32_t oshift = cl->oshift;
size_t num_input = nnom_io_length(layer->in);
q7_t *input_mem_blk[MAX_INPUT_LAYER];
// if there is only 2 matrix
if(num_input == 2)
{
// the first 2 matrix
#ifdef NNOM_USING_CMSIS_NN
if(oshift == 0)
arm_sub_q7(layer->in->tensor->p_data, layer->in->aux->tensor->p_data, layer->out->tensor->p_data, t_size);
else
#endif
local_sub_q7(layer->in->tensor->p_data, layer->in->aux->tensor->p_data, layer->out->tensor->p_data, oshift, t_size);
}
else
{
for(int i = 0; i < num_input; i++)
{
input_mem_blk[i] = in->tensor->p_data;
in = in->aux;
}
local_multiple_sub_q7(layer->out->tensor->p_data, oshift, t_size, num_input, input_mem_blk);
}
return NN_SUCCESS;
}
nnom_status_t mult_run(nnom_layer_t *layer)
{
nnom_matrix_layer_t* cl = (nnom_matrix_layer_t*)layer;
nnom_layer_io_t *in = layer->in;
size_t t_size = tensor_size(layer->out->tensor);
int32_t oshift = cl->oshift;
size_t num_input = nnom_io_length(layer->in);
q7_t *input_mem_blk[MAX_INPUT_LAYER];
// if there is only 2 matrix
if(num_input == 2)
{
// the first 2 matrix
#ifdef NNOM_USING_CMSIS_NN
if(oshift == 0)
arm_mult_q7(layer->in->tensor->p_data, layer->in->aux->tensor->p_data, layer->out->tensor->p_data, t_size);
else
#endif
local_mult_q7(layer->in->tensor->p_data, layer->in->aux->tensor->p_data, layer->out->tensor->p_data, oshift, t_size);
}
else
{
for(int i = 0; i < num_input; i++)
{
input_mem_blk[i] = in->tensor->p_data;
in = in->aux;
}
local_multiple_mult_q7(layer->out->tensor->p_data, oshift, t_size, num_input, input_mem_blk);
}
return NN_SUCCESS;
}

View File

@@ -0,0 +1,191 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_maxpool.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_layer_t *maxpool_s(const nnom_pool_config_t * config)
{
nnom_layer_t *layer;
// test, to accomodate 1d and 2d input
if(config->num_dim == 1)
{
layer = MaxPool(kernel(1, config->kernel_size[0]),
stride(1, config->stride_size[0]),
config->padding_type);
}
else
{
layer = MaxPool(kernel(config->kernel_size[0], config->kernel_size[1]),
stride(config->stride_size[0], config->stride_size[1]),
config->padding_type);
}
if(layer)
layer->config = (void*) config;
return layer;
}
nnom_layer_t *MaxPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type)
{
nnom_maxpool_layer_t *layer;
nnom_buf_t *comp;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_maxpool_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_maxpool_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_MAXPOOL;
layer->super.run = maxpool_run;
layer->super.build = maxpool_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
comp->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
layer->super.comp = comp;
// set parameters
layer->kernel = k;
layer->stride = s;
layer->padding_type = pad_type;
// padding
if (layer->padding_type == PADDING_SAME)
{
layer->pad.h = (k.h - 1) / 2;
layer->pad.w = (k.w - 1) / 2;
layer->pad.c = 1; // no meaning
}
else
{
layer->pad.h = 0;
layer->pad.w = 0;
layer->pad.c = 0;
}
return (nnom_layer_t *)layer;
}
nnom_status_t maxpool_build(nnom_layer_t *layer)
{
nnom_maxpool_layer_t *cl = (nnom_maxpool_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
// copy then change later.
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// now we set up the tensor shape, always HWC format
if (cl->padding_type == PADDING_SAME)
{
layer->out->tensor->dim[0] = NN_CEILIF(layer->in->tensor->dim[0], cl->stride.h);
layer->out->tensor->dim[1] = NN_CEILIF(layer->in->tensor->dim[1], cl->stride.w);
layer->out->tensor->dim[2] = layer->in->tensor->dim[2]; // channel stays the same
}
else
{
layer->out->tensor->dim[0] = NN_CEILIF(layer->in->tensor->dim[0] - cl->kernel.h + 1, cl->stride.h);
layer->out->tensor->dim[1] = NN_CEILIF(layer->in->tensor->dim[1] - cl->kernel.w + 1, cl->stride.w);
layer->out->tensor->dim[2] = layer->in->tensor->dim[2];
}
return NN_SUCCESS;
}
nnom_status_t maxpool_run(nnom_layer_t *layer)
{
nnom_maxpool_layer_t *cl = (nnom_maxpool_layer_t *)(layer);
uint16_t out_x, out_y;
// if global pooling
if(layer->out->tensor->num_dim == 1)
{
out_x = 1; out_y = 1;
}
else // normal pooling.
{
out_x = layer->out->tensor->dim[1]; //W
out_y = layer->out->tensor->dim[0]; //h
}
#ifdef NNOM_USING_CHW
local_maxpool_q7_CHW(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
NULL,
layer->out->tensor->p_data);
#else //end of CHW
// HWC
#ifdef NNOM_USING_CMSIS_NN
// 2D, square
if (layer->in->tensor->dim[1] == layer->in->tensor->dim[0] &&
layer->out->tensor->dim[1] == layer->out->tensor->dim[0])
{
arm_maxpool_q7_HWC(
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[2],
cl->kernel.w, cl->pad.w, cl->stride.w,
layer->out->tensor->dim[1],
NULL,
layer->out->tensor->p_data);
}
// none square 2D, or 1D
else
#endif
{
// CMSIS-NN does not support none-square pooling, we have to use local implementation
local_maxpool_q7_HWC(layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
NULL,
layer->out->tensor->p_data);
}
#endif // CHW/HWC
return NN_SUCCESS;
}

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_output.h"
nnom_layer_t *output_s(const nnom_io_config_t* config)
{
nnom_layer_t *layer = input_s(config);
if(layer)
{
layer->config = (void*) config;
layer->type = NNOM_OUTPUT;
layer->run = output_run;
layer->build = default_build;
}
return layer;
}
nnom_layer_t *Output(nnom_3d_shape_t output_shape, void *p_buf)
{
// they are acturally the same.. expect the type defined
nnom_layer_t *layer = Input(output_shape, p_buf);
if (layer != NULL)
{
layer->type = NNOM_OUTPUT;
layer->run = output_run;
layer->build = default_build;
}
return layer;
}
nnom_status_t output_run(nnom_layer_t *layer)
{
nnom_io_layer_t *cl = (nnom_io_layer_t *)layer;
nnom_memcpy(cl->buf, layer->in->tensor->p_data, tensor_size(layer->out->tensor)); // in->memory -> user memory
return NN_SUCCESS;
}

View File

@@ -0,0 +1,191 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_rnn.h"
nnom_status_t rnn_build(nnom_layer_t *layer);
nnom_status_t rnn_run(nnom_layer_t *layer);
nnom_status_t rnn_free(nnom_layer_t* layer);
// RNN
nnom_layer_t *rnn_s(nnom_rnn_cell_t *cell, const nnom_rnn_config_t* config)
{
nnom_rnn_layer_t *layer;
nnom_buf_t *comp;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_rnn_layer_t) + sizeof(nnom_layer_io_t) * 2 + sizeof(nnom_buf_t);
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_rnn_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
comp = (void *)((uint8_t*)out + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_RNN;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
comp->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
layer->super.comp = comp;
// set run and outshape methods
layer->super.run = rnn_run;
layer->super.build = rnn_build;
layer->super.free = rnn_free;
// rnn parameters.
layer->return_sequence = config->return_sequence;
layer->stateful = config->stateful;
layer->go_backwards = config->go_backwards;
layer->super.config = (void*)config;
layer->cell = cell;
// set this layer to the cell
layer->cell->layer = (nnom_layer_t *)layer;
return (nnom_layer_t *)layer;
}
nnom_status_t rnn_free(nnom_layer_t* layer)
{
nnom_rnn_layer_t* cl = (nnom_rnn_layer_t*)layer;
// free cell
if(cl->cell->free)
cl->cell->free(cl->cell);
// free state buffer
nnom_free(cl->state_buf);
return NN_SUCCESS;
}
nnom_status_t rnn_build(nnom_layer_t* layer)
{
nnom_rnn_layer_t *cl = (nnom_rnn_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// timestamp size
cl->timestamp_size = layer->in->tensor->num_dim > 2 ? layer->in->tensor->dim[1] : layer->in->tensor->dim[0];
if(cl->return_sequence)
{
// create new tensor for the output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, 2, 0);
// shape: timestamp, units
layer->out->tensor->dim[0] = cl->timestamp_size;
layer->out->tensor->dim[1] = cl->cell->units;
}
else
{
// create new tensor for the output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, 1, 0);
// shape: timestamp, units
layer->out->tensor->dim[0] = cl->cell->units;
}
// output q format - the output of the available activations are both q0.7.
layer->out->tensor->q_dec[0] = layer->in->tensor->bitwidth==16? 15: 7;
layer->out->tensor->bitwidth = layer->in->tensor->bitwidth;
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// get feature size from input tensor
cl->cell->feature_size = tensor_get_num_channel(layer->in->tensor); // vector (feature) size
// call cell builder to build the cell
cl->cell->build(cl->cell);
// get the size of computation buffer?
cl->super.comp->size = cl->cell->comp_buf_size; // size of intermediate buffer required by the cell.
cl->state_buf = nnom_mem(cl->cell->state_size * 2); // allocate state buf for upper/lower state buffer.
if(!cl->state_buf)
return NN_NO_MEMORY;
// get the computational cost provided by Cell
layer->stat.macc = cl->cell->macc * cl->timestamp_size;
return NN_SUCCESS;
}
nnom_status_t rnn_run(nnom_layer_t* layer)
{
nnom_status_t result;
nnom_rnn_layer_t* cl = (nnom_rnn_layer_t*)(layer);
size_t timestamps_size = layer->in->tensor->dim[layer->in->tensor->num_dim-2];
size_t feature_size = tensor_get_num_channel(layer->in->tensor); // feature size = last dimension.
size_t state_size = cl->cell->state_size;
size_t output_growth;
void* upper_state = (q7_t*)cl->state_buf + state_size;
void* lower_state = (q7_t*)cl->state_buf;
// reset state buffer if not in stateful
if (!cl->stateful)
nnom_memset(cl->state_buf, 0, state_size * 2);
// set output data
output_growth = cl->return_sequence ? cl->cell->units : 0;
// run timestamp by timestamp
for (uint32_t round = 0; round < timestamps_size; round++)
{
if(cl->go_backwards)
{
// set input data
cl->cell->in_data = (q7_t*)layer->in->tensor->p_data + feature_size*(timestamps_size - 1 - round);
// set output data
cl->cell->out_data = (q7_t*)layer->out->tensor->p_data + output_growth*(timestamps_size - 1 - round);
}
else
{
// set input data
cl->cell->in_data = (q7_t*)layer->in->tensor->p_data + feature_size*round;
// set output data
cl->cell->out_data = (q7_t*)layer->out->tensor->p_data + output_growth*round;
}
// switch upper/lower state buffer
if(cl->cell->in_state != lower_state)
{
cl->cell->in_state = lower_state;
cl->cell->out_state = upper_state;
}
else
{
cl->cell->in_state = upper_state;
cl->cell->out_state = lower_state;
}
// run it
result = cl->cell->run(cl->cell);
if(result != NN_SUCCESS)
return result;
}
return NN_SUCCESS;
}

View File

@@ -0,0 +1,142 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2020-08-21 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_simple_cell.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
// Simple RNN
// unit = output shape
// type of activation
nnom_rnn_cell_t *simple_cell_s(const nnom_simple_cell_config_t* config)
{
nnom_simple_cell_t *cell;
cell = nnom_mem(sizeof(nnom_simple_cell_t));
if (cell == NULL)
return NULL;
// set methods
cell->super.run = simple_cell_run;
cell->super.build = simple_cell_build;
cell->super.free = simple_cell_free;
cell->super.config = (void*) config;
cell->super.units = config->units;
cell->super.type = NNOM_SIMPLE_CELL;
// set parameters
cell->bias = config->bias;
cell->weights = config->weights;
cell->recurrent_weights = config->recurrent_weights;
cell->act_type = config->act_type;
// q format for intermediate products
cell->q_dec_iw = config->q_dec_iw;
cell->q_dec_hw = config->q_dec_hw;
cell->q_dec_h = config->q_dec_h;
return (nnom_rnn_cell_t *)cell;
}
nnom_status_t simple_cell_free(nnom_rnn_cell_t* cell)
{
return NN_SUCCESS;
}
// the state buffer and computational buffer shape of the cell
nnom_status_t simple_cell_build(nnom_rnn_cell_t* cell)
{
nnom_layer_t *layer = cell->layer;
nnom_simple_cell_t *c = (nnom_simple_cell_t *)cell;
nnom_simple_cell_config_t *config = (nnom_simple_cell_config_t *)cell->config;
int q_hw_iw;
// activation, check if activation is supported
if(config->act_type != ACT_SIGMOID && config->act_type != ACT_TANH)
return NN_ARGUMENT_ERROR;
// calculate output shift for the 2 calculation.
// hw = the product of hidden x weight, iw = the product of input x weight
// due to the addition of them, they must have same q format.
q_hw_iw = MIN(c->q_dec_hw, c->q_dec_iw);
// for the 2 dot in cell: output shift = input_dec + weight_dec - output_dec
c->oshift_hw = c->q_dec_h + c->recurrent_weights->q_dec[0] - q_hw_iw;
c->oshift_iw = layer->in->tensor->q_dec[0] + c->weights->q_dec[0] - q_hw_iw;
// bias shift = bias_dec - out_dec
c->bias_shift = layer->in->tensor->q_dec[0] + c->weights->q_dec[0] - c->bias->q_dec[0];
// state size = one timestamp output size.
cell->state_size = cell->units;
// comp buffer size: not required
cell->comp_buf_size = 0;
// finally, calculate the MAC for info
cell->macc = cell->feature_size * cell->units // input: feature * state
+ cell->units * cell->units; // recurrent, state * output_unit
return NN_SUCCESS;
}
// This Simple Cell replicate the Keras's SimpleCell as blow
/*
def call(self, inputs, states, training=None):
prev_output = states[0] if nest.is_sequence(states) else states
h = K.dot(inputs, self.kernel)
h = K.bias_add(h, self.bias)
h2 = K.dot(prev_output, self.recurrent_kernel)
output = h + H2
output = self.activation(output)
new_state = [output] if nest.is_sequence(states) else output
return output, new_state
*/
nnom_status_t simple_cell_run(nnom_rnn_cell_t* cell)
{
nnom_simple_cell_t* c = (nnom_simple_cell_t*) cell;
int act_int_bit = 7 - MIN(c->q_dec_hw, c->q_dec_iw);
// in_state x recurrent_weight -> h2 (output buf)
local_dot_q7_opt(cell->in_state, c->recurrent_weights->p_data, cell->units, cell->units, c->oshift_hw, cell->out_data);
// (input x weight) + bias -> h (in_state buf)
local_fully_connected_q7_opt(cell->in_data, c->weights->p_data,
cell->feature_size, cell->units, c->bias_shift, c->oshift_iw, c->bias->p_data, cell->in_state, NULL);
// h + h2 -> (out_state buf)
local_add_q7(cell->in_state, cell->out_data, cell->out_state, 0, cell->units);
// active(out_state buf)
if(c->act_type == ACT_TANH)
local_tanh_q7(cell->out_state, cell->units, act_int_bit);
//local_hard_tanh_q7(cell->out_state, cell->units, act_int_bit);
else
local_sigmoid_q7(cell->out_state, cell->units, act_int_bit);
//local_hard_sigmoid_q7(cell->out_state, cell->units, act_int_bit);
// (out_state buf) --copy--> (output buf)
nnom_memcpy(cell->out_data, cell->out_state, cell->units);
return NN_SUCCESS;
}

View File

@@ -0,0 +1,86 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_softmax.h"
#ifdef NNOM_USING_CMSIS_NN
#include "arm_math.h"
#include "arm_nnfunctions.h"
#endif
nnom_layer_t *softmax_s(const nnom_softmax_config_t * config)
{
nnom_layer_t * layer = Softmax();
if(layer)
layer->config = (void*) config;
return layer;
}
nnom_layer_t *Softmax(void)
{
nnom_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->type = NNOM_SOFTMAX;
layer->run = softmax_run;
layer->build = softmax_build;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->in = io_init(layer, in);
layer->out = io_init(layer, out);
return layer;
}
nnom_status_t softmax_build(nnom_layer_t *layer)
{
// get the last layer's output as input shape
layer->in->tensor = layer->in->hook.io->tensor;
// output tensor
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// softmax has fixed output dec bit
layer->out->tensor->q_dec[0] = 7;
return NN_SUCCESS;
}
nnom_status_t softmax_run(nnom_layer_t *layer)
{
// looks like the new version cause accuracy drop quite a lot.
// #ifdef NNOM_USING_CMSIS_NN
// // temporary fixed for mutiple dimension input.
// arm_softmax_q7(layer->in->tensor->p_data, tensor_size(layer->out->tensor), layer->out->tensor->p_data);
// #else
local_softmax_q7(layer->in->tensor->p_data, tensor_size(layer->out->tensor), layer->out->tensor->p_data);
//#endif
return NN_SUCCESS;
}

View File

@@ -0,0 +1,104 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_sumpool.h"
nnom_layer_t *sumpool_s(const nnom_pool_config_t * config)
{
nnom_sumpool_layer_t *cl;
if(config->num_dim == 1)
{
cl = (nnom_sumpool_layer_t *)SumPool(kernel(1, config->kernel_size[0]),
stride(1, config->stride_size[0]),
config->padding_type);
}
else
{
cl = (nnom_sumpool_layer_t *)SumPool(kernel(config->kernel_size[0], config->kernel_size[1]),
stride(config->stride_size[0], config->stride_size[1]),
config->padding_type);
}
if(cl)
{
cl->super.config = (void*) config;
cl->output_shift = config->output_shift; // no idea if we need it
}
return (nnom_layer_t *)cl;
}
nnom_layer_t *SumPool(nnom_3d_shape_t k, nnom_3d_shape_t s, nnom_padding_t pad_type)
{
nnom_layer_t *layer = MaxPool(k, s, pad_type);
if (layer != NULL)
{
layer->type = NNOM_SUMPOOL;
layer->run = sumpool_run;
layer->build = sumpool_build;
}
return (nnom_layer_t *)layer;
}
nnom_status_t sumpool_build(nnom_layer_t *layer)
{
// avg pooling share the same output shape, stride, padding setting.
maxpool_build(layer);
// however, avg pooling require a computational buffer.
layer->comp->size = 4 * tensor_size(layer->out->tensor);
return NN_SUCCESS;
}
// sum pooling, dynamic change Q format, must be used in the last layer before softmax in current version
nnom_status_t sumpool_run(nnom_layer_t *layer)
{
nnom_sumpool_layer_t *cl = (nnom_sumpool_layer_t *)(layer);
uint16_t out_x, out_y;
// if global pooling
if(layer->out->tensor->num_dim == 1)
{
out_x = 1; out_y = 1;
}
else // normal pooling.
{
out_x = layer->out->tensor->dim[1]; //W
out_y = layer->out->tensor->dim[0]; //h
}
#ifdef NNOM_USING_CHW
local_sumpool_q7_CHW(
#else
local_sumpool_q7_HWC(
#endif
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
cl->pad.w, cl->pad.h,
cl->stride.w, cl->stride.h,
out_x, out_y,
layer->comp->mem->blk,
layer->out->tensor->p_data);
return NN_SUCCESS;
}

View File

@@ -0,0 +1,104 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_upsample.h"
nnom_layer_t *upsample_s(const nnom_upsample_config_t *config)
{
nnom_layer_t *layer = UpSample(kernel(config->kernel[0], config->kernel[1]));
if(layer)
layer->config = (void*) config;
return layer;
}
// up sampling layer
nnom_layer_t *UpSample(nnom_3d_shape_t kernel)
{
nnom_upsample_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_upsample_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_upsample_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_UPSAMPLE;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
// set run and outshape methods
layer->super.run = upsample_run;
layer->super.build = upsample_build;
// set parameters
layer->kernel = kernel;
return (nnom_layer_t*)layer;
}
nnom_status_t upsample_build(nnom_layer_t *layer)
{
nnom_upsample_layer_t* cl = (nnom_upsample_layer_t*)layer;
// get the last layer's output as input shape
layer->in->tensor = layer->in->hook.io->tensor;
// output tensor
// 1. allocate a new tensor for output
// 2. set the same dim, qfmt to the new tensor.
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// enlarge w and h, c stay the same.
layer->out->tensor->dim[0] = layer->in->tensor->dim[0] * cl->kernel.h;
layer->out->tensor->dim[1] = layer->in->tensor->dim[1] * cl->kernel.w;
return NN_SUCCESS;
}
// up sampling, or so called unpooling
nnom_status_t upsample_run(nnom_layer_t *layer)
{
nnom_upsample_layer_t *cl = (nnom_upsample_layer_t *)(layer);
#ifdef NNOM_USING_CHW
local_up_sampling_q7_CHW(
#else
local_up_sampling_q7_HWC(
#endif
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->kernel.w, cl->kernel.h,
layer->out->tensor->dim[1], layer->out->tensor->dim[0],
NULL,
layer->out->tensor->p_data);
return NN_SUCCESS;
}

View File

@@ -0,0 +1,107 @@
/*
* Copyright (c) 2018-2020
* Jianjia Ma
* majianjia@live.com
*
* SPDX-License-Identifier: Apache-2.0
*
* Change Logs:
* Date Author Notes
* 2019-07-23 Jianjia Ma The first version
*/
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include "nnom.h"
#include "nnom_local.h"
#include "nnom_layers.h"
#include "layers/nnom_zero_padding.h"
nnom_layer_t * zeropadding_s(const nnom_zero_padding_config_t* config)
{
nnom_layer_t *layer = ZeroPadding(config->pad);
if(layer)
layer->config = (void*) config;
return (nnom_layer_t*)layer;
}
// Zero padding layer
nnom_layer_t *ZeroPadding(nnom_border_t pad)
{
nnom_zero_padding_layer_t *layer;
nnom_layer_io_t *in, *out;
// apply a block memory for all the sub handles.
size_t mem_size = sizeof(nnom_zero_padding_layer_t) + sizeof(nnom_layer_io_t) * 2;
layer = nnom_mem(mem_size);
if (layer == NULL)
return NULL;
// distribut the memory to sub handles.
in = (void *)((uint8_t*)layer + sizeof(nnom_zero_padding_layer_t));
out = (void *)((uint8_t*)in + sizeof(nnom_layer_io_t));
// set type in layer parent
layer->super.type = NNOM_ZERO_PADDING;
// set buf state
in->type = NNOM_TENSOR_BUF_TEMP;
out->type = NNOM_TENSOR_BUF_TEMP;
// put in & out on the layer.
layer->super.in = io_init(layer, in);
layer->super.out = io_init(layer, out);
// set run and outshape methods
layer->super.run = zero_padding_run;
layer->super.build = zero_padding_build;
// set parameters
layer->pad = pad;
return (nnom_layer_t*)layer;
}
nnom_status_t zero_padding_build(nnom_layer_t* layer)
{
nnom_zero_padding_layer_t *cl = (nnom_zero_padding_layer_t *)layer;
// get the tensor from last layer's output
layer->in->tensor = layer->in->hook.io->tensor;
// create new tensor for output
layer->out->tensor = new_tensor(NNOM_QTYPE_PER_TENSOR, layer->in->tensor->num_dim, tensor_get_num_channel(layer->in->tensor));
// copy then change later.
tensor_cpy_attr(layer->out->tensor, layer->in->tensor);
// see if the activation will change the q format
if(layer->actail)
layer->out->tensor->q_dec[0] = act_get_dec_bit(layer->actail->type, layer->out->tensor->q_dec[0]);
// output shape
layer->out->tensor->dim[1] = layer->in->tensor->dim[1] + cl->pad.left + cl->pad.right;
layer->out->tensor->dim[0] = layer->in->tensor->dim[0] + cl->pad.top + cl->pad.bottom;
layer->out->tensor->dim[2] = layer->in->tensor->dim[2];
return NN_SUCCESS;
}
nnom_status_t zero_padding_run(nnom_layer_t * layer)
{
nnom_zero_padding_layer_t *cl = (nnom_zero_padding_layer_t*)layer;
#ifdef NNOM_USING_CHW
local_zero_padding_CHW_q7(
#else
local_zero_padding_HWC_q7(
#endif
layer->in->tensor->p_data,
layer->in->tensor->dim[1], layer->in->tensor->dim[0], layer->in->tensor->dim[2],
cl->pad.top,
cl->pad.bottom,
cl->pad.left,
cl->pad.right,
layer->out->tensor->p_data,
layer->out->tensor->dim[1], layer->out->tensor->dim[0]);
return NN_SUCCESS;
}

View File

@@ -1,95 +0,0 @@
![](https://raw.githubusercontent.com/onnx/onnx/master/docs/ONNX_logo_main.png)
# ONNX
**通用神经网络模型推理框架 onnx 在 TencentOS-tiny 上的移植**
[ONNX](https://onnx.ai/) (Open Neural Network Exchange) 是机器学习模型的通用格式,可以融合不同机器学习框架的模型。
ONNX是一个用于表示深度学习模型的标准可使模型在不同框架之间进行转移。Tensorflow, Keras, Pytorch, Caffe2, mxnet等知名深度学习框架训练的模型可以转化为onnx格式于是便可以在RTOS上运行。
## 支持算子
- Conv2D
- Relu
- Maxpool
- Softmax
- Matmul
- Add
- Flatten
- Transpose
## mnist例程
当前有两个手写体识别的例程:
mnist_int 和 mnist_float
分别适用用于整形推理和浮点推理根据平台不同而定其中mnist_int在imx6ull上通过验证mnist_float在stm32L4上通过验证。最小的 demo 只需要 16KB 内存因此在STM32F103C8T6 上也可以运行其中mnist_int相当于是做了int32的量化针对归一化后的浮点double型参数乘以1000倍并修改了softmax层的算子最后可以完成速度更快的整形数推理
| 例程文件 | 说明 |
| ------------- | ---------------------------------------- |
| mnist_int.c | 整形推理,模型参数保存在 mnist_int.h |
| mnist_float.c | 浮点推理模型参数保存在mnist_float.h |
#### 模型结构
```
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_5 (Conv2D) (None, 28, 28, 2) 20
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 14, 14, 2) 0
_________________________________________________________________
dropout_5 (Dropout) (None, 14, 14, 2) 0
_________________________________________________________________
conv2d_6 (Conv2D) (None, 14, 14, 2) 38
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 7, 7, 2) 0
_________________________________________________________________
dropout_6 (Dropout) (None, 7, 7, 2) 0
_________________________________________________________________
flatten_3 (Flatten) (None, 98) 0
_________________________________________________________________
dense_5 (Dense) (None, 4) 396
_________________________________________________________________
dense_6 (Dense) (None, 10) 50
=================================================================
Total params: 504
Trainable params: 504
Non-trainable params: 0
_________________________________________________________________
```
推理测试
![mnist_test](pic/mnist_test.png)
## 注意事项
由于 onnx 的模型是 Google Protobuf v3 的格式所以在protobuf文件夹中也包含了模型解析部分可以配合文件系统进行模型读取。
- protobuf-c
- onnx-pb-c
在 platform 中是对于不同平台的特殊适配比如malloc、free的实现等
- tencentos_libc_malloc
## Todo List
- 解析更加复杂的模型
- 针对不同硬件适配加速算子
## 参考
https://github.com/wuhanstudio/onnx-backend
## 联系方式
- 维护derek
- 邮箱dkeji627@gmail.com

View File

@@ -1,191 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include "mnist_int.h"
#include "onnx.h"
static const char codeLib[] = "@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\\|()1{}[]?-_+~<>i!lI;:,\"^`'. ";
int data[5]={1.5 , 2.5 , 3.5 , 4.5 , 5.5};
static const int img[2][784] = {IMG0, IMG1};
static const int img1[784] = {1,2,3,4,5};
int hello()
{
printf("hello pnnx\r\n");
return 0;
}
void print_img(void * buf)
{
int index = 0;
//char ch = '@';
int x = 0;
int y = 0;
printf("test2\r\n");
for(y = 0; y < 28; y++)
{
for (x = 0; x < 28; x++)
{
index = 0;
if(((int*)buf)[y*28+x] > 600)
{
index =69;
}
if(index < 0)
{
index = 0;
}
printf("%c",codeLib[index]);
printf("%c",codeLib[index]);
}
printf("\r\n");
}
}
int mnist()
{
printf("test1\r\n");
int img_index = 1;
print_img(img[img_index]);
printf("img ok\r\n");
// 1. Conv2D
int64_t shapeW3[] = {2, 1, 3, 3};
int64_t dimW3 = 4;
int64_t permW3_t[] = { 0, 2, 3, 1};
int* W3_t = transpose(W3, shapeW3, dimW3, permW3_t);
printf("transpose ok\r\n");
int* conv1 = (int*) malloc(sizeof(int)*28*28*2);
memset(conv1, 0, sizeof(sizeof(int)*28*28*2));
conv2D(img[img_index], 28, 28, 1, W3, 2, 3, 3, 1, 1, 1, 1, B3, conv1, 28, 28);
free(W3_t);
printf("Conv2D ok \r\n");
// 2. Relu
int* relu1 = (int*) malloc(sizeof(int)*28*28*2);
relu(conv1, 28*28*2, relu1);
free(conv1);
printf("Relu ok\r\n");
// 3. Maxpool
int* maxpool1 = (int*) malloc(sizeof(int)*14*14*2);
memset(maxpool1, 0, sizeof(sizeof(int)*14*14*2));
maxpool(relu1, 28, 28, 2, 2, 2, 0, 0, 2, 2, 14, 14, maxpool1);
free(relu1);
printf("Maxpool ok\r\n");
// 4. Conv2D
int64_t shapeW2[] = {2, 2, 3, 3};
int64_t dimW2 = 4;
int64_t perm_t[] = { 0, 2, 3, 1};
int* W2_t = transpose(W2, shapeW2, dimW2, perm_t);
int* conv2 = (int*) malloc(sizeof(int)*14*14*2);
memset(conv2, 0, sizeof(sizeof(int)*14*14*2));
conv2D(maxpool1, 14, 14, 2, W2_t, 2, 3, 3, 1, 1, 1, 1, B2, conv2, 14, 14);
free(W2_t);
free(maxpool1);
printf("Conv2D ok\r\n");
// 5. Relu
int* relu2 = (int*) malloc(sizeof(int)*14*14*2);
relu(conv2, 14*14*2, relu2);
free(conv2);
printf("Relu ok\r\n");
// 6. Maxpool
int* maxpool2 = (int*) malloc(sizeof(int)*7*7*2);
memset(maxpool2, 0, sizeof(sizeof(int)*7*7*2));
maxpool(relu2, 14, 14, 2, 2, 2, 0, 0, 2, 2, 7, 7, maxpool2);
free(relu2);
printf("Maxpool ok\r\n");
// Flatten NOT REQUIRED
// 7. Dense
int64_t shapeW1[] = {98, 4};
int64_t dimW1 = 2;
int64_t permW1_t[] = { 1, 0};
int* W1_t = transpose(W1, shapeW1, dimW1, permW1_t);
int* dense1 = (int*) malloc(sizeof(int)*4);
memset(dense1, 0, sizeof(sizeof(int)*4));
dense(maxpool2, W1_t, 98, 4, B1, dense1);
free(W1_t);
free(maxpool2);
printf("Dense ok\r\n");
// 8. Dense
int64_t shapeW[] = {4, 10};
int64_t dimW = 2;
int64_t permW_t[] = { 1, 0};
int* W_t = transpose(W, shapeW, dimW, permW_t);
int* dense2 = (int*) malloc(sizeof(int)*10);
memset(dense2, 0, sizeof(sizeof(int)*10));
dense(dense1, W_t, 4, 10, B, dense2);
free(W_t);
free(dense1);
printf("Dense ok\r\n");
// 9. Softmax
int* output = (int*) malloc(sizeof(int)*10);
memset(output, 0, sizeof(sizeof(int)*10));
softmax(dense2, 10, output);
printf("Softmax ok\r\n");
int max = 0;
int min = output[0];
int max_index = 0;
int min_index = 0;
printf("\n\rPredictions: \n\r");
for(int i = 0; i < 10; i++)
{
printf("%d ", output[i]);
if(output[i] > max)
{
max = output[i];
max_index = i;
}
if(output[i] < min)
{
min = output[i];
min_index = i;
}
}
printf("\n\r");
printf("\n\rThe number is %d\n\r", min_index);
free(dense2);
free(output);
printf("Result ok\r\n");
return 0;
}

View File

@@ -1,73 +0,0 @@
#ifndef __MNIST_INT_H__
#define __MNIST_INT_H__
#include <stdio.h>
#include <stdint.h>
#define IMG0 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,380,376,301,462,239,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,352,541,921,921,921,921,921,921,984,984,972,996,960,921,745,82,0,0,0,0,0,0,0,0,0,0,0,549,984,996,996,996,996,996,996,996,996,996,996,996,996,996,996,741,90,0,0,0,0,0,0,0,0,0,0,886,996,815,780,780,780,780,545,239,239,239,239,239,501,870,996,996,741,82,0,0,0,0,0,0,0,0,0,149,321,50,0,0,0,0,0,0,0,0,0,0,0,133,835,996,996,450,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,329,996,996,917,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,329,996,996,917,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,415,615,996,996,952,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,98,458,894,894,894,992,996,996,996,996,941,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,266,466,862,996,996,996,996,996,996,996,996,996,556,0,0,0,0,0,0,0,0,0,0,0,0,0,145,733,992,996,996,996,874,807,807,294,266,843,996,996,458,0,0,0,0,0,0,0,0,0,0,0,0,443,858,996,949,890,450,349,121,0,0,0,0,784,996,945,160,0,0,0,0,0,0,0,0,0,0,0,0,662,996,690,243,0,0,0,0,0,0,0,188,905,996,917,0,0,0,0,0,0,0,0,0,0,0,0,0,70,486,0,0,0,0,0,0,0,0,0,329,996,996,650,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,545,996,933,223,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,823,980,996,658,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,949,996,937,223,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,349,984,945,337,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,807,964,615,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,458,270,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
#define IMG0_LABEL 7
#define IMG1 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,121,517,996,992,996,835,321,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,82,556,913,988,992,988,992,988,874,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,482,996,992,996,992,878,796,796,874,1000,835,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,796,992,988,992,831,78,0,0,239,992,988,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,160,952,878,796,717,160,596,117,0,0,1000,992,400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,156,78,0,0,400,992,196,0,321,992,988,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,321,839,121,443,913,996,913,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,243,400,321,160,992,909,992,988,913,196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,596,992,996,992,996,992,996,913,482,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,596,988,992,988,992,988,752,196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,243,717,796,952,996,992,243,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,156,674,988,796,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,82,0,0,0,0,0,0,0,0,0,717,996,439,0,0,0,0,0,0,0,0,0,0,0,0,0,0,243,796,639,0,0,0,0,0,0,0,0,239,992,592,0,0,0,0,0,0,0,0,0,0,0,0,0,82,839,752,0,0,0,0,0,0,0,0,43,835,996,592,0,0,0,0,0,0,0,0,0,0,0,0,0,400,992,592,0,0,0,0,0,0,0,160,835,988,992,435,0,0,0,0,0,0,0,0,0,0,0,0,0,160,1000,835,360,200,0,0,121,360,678,992,996,992,556,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,674,988,992,988,796,796,913,988,992,988,992,509,78,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,82,796,1000,992,996,992,996,992,956,796,321,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,78,592,592,992,670,592,592,156,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
#define IMG1_LABEL 3
#define TOTAL_IMAGE 2
static const signed char label[] = {IMG0_LABEL, IMG1_LABEL};
static const int W3[] = {-323,-426,-651,790,-221,37,398,221,797,254,307,625,-589,203,-64,-1566,-376,-644};
static const int B3[] = {-829,-140};
static const int W2[] = {7,231,36,-146,-155,4,273,-27,234,-635,-556,-770,156,710,239,1820,-18,1574,1723,-596,1399,335,568,379,35,-182,-32,6,-2,-5,293,137,355,2,2,-22};
static const int B2[] = {-116,-3};
static const int W1[] = {157,-226,21,25,8,-775,-415,-125,-396,335,-631,-28,-506,-357,-3780,-826,102,571,-625,66,559,253,-3075,-695,253,317,-866,127,831,266,-2586,-572,297,162,-991,77,891,168,-2524,-563,416,-108,-1022,206,398,-160,-1918,-483,57,-1257,-231,1051,-798,-1626,-260,-76,-464,755,131,247,-1527,163,-75,-58,-338,1305,144,440,-310,154,5,-31,-159,661,83,265,-38,180,-7,54,-14,306,6,223,30,126,-28,111,35,46,-26,264,69,107,-30,95,248,-364,-102,496,40,20,-54,54,-71,-1538,-235,1589,-23,-249,18,80,51,614,157,128,-869,1376,430,134,-149,454,130,231,3,427,233,92,-60,464,103,250,-53,214,116,224,126,234,127,332,14,106,108,305,314,-71,134,454,54,74,97,274,486,-436,-135,572,135,-7,118,244,-375,-468,-564,865,340,-172,40,363,89,-498,476,21,285,617,705,-306,-570,-206,41,230,-179,-23,141,23,-641,-69,-85,164,-534,-101,-131,149,-635,-98,-232,154,-485,-190,-204,106,-529,-173,-362,122,-386,-247,-252,102,-145,-101,43,-171,-31,-301,-94,69,-549,668,145,-737,770,-412,101,52,254,227,-30,-83,-663,512,-121,-334,-75,-98,-16,-31,-435,94,-49,-77,-128,-89,-70,-10,-290,-13,-39,-23,-155,-52,-147,-75,-268,-35,-95,-15,-39,24,-196,-199,-203,-42,-187,-45,-10,148,-117,-418,-206,-24,-157,-55,-90,402,-357,-786,-79,162,-144,-274,268,688,-64,113,599,1294,-1250,608,123,158,-175,34,391,231,-756,200,79,14,-121,8,268,57,-526,124,80,-38,-88,0,286,-10,-393,111,65,-33,-74,-27,300,2,-479,-45,-10,39,-92,-192,154,212,-389,-373,-206,292,-129,-360,-554,457,-352,-947,-1248,887,336,3,-227,1456,549,383,-411,375,176,38,163,705,55,644,-207,146,103,197,174,365,-97,522,-184,-1,88,241,155,172,-105,382,-306,-162,115,307,158,-17,-50,262,-1299,-227,108,744,-398,16,100,-163,-649,-567,17,989,-1395,441,166,-191};
static const int B1[] = {1201,-1177,2169,-1961};
static const int W[] = {558,787,-40,-122,-412,-36,169,-147,-16,-280,18,62,495,339,-475,-140,-882,183,20,-137,-52,679,-280,-312,444,-261,-322,1032,-144,522,57,-965,-305,168,-532,426,-543,14,267,159};
static const int B[] = {41,1461,71,-1277,809,-1693,-297,-117,329,659};
/*
#define IMG0 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3803,3764,3019,4627,2392,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3529,5411,9215,9215,9215,9215,9215,9215,9843,9843,9725,9960,9607,9215,7450,823,0,0,0,0,0,0,0,0,0,0,0,5490,9843,9960,9960,9960,9960,9960,9960,9960,9960,9960,9960,9960,9960,9960,9960,7411,901,0,0,0,0,0,0,0,0,0,0,8862,9960,8156,7803,7803,7803,7803,5450,2392,2392,2392,2392,2392,5019,8705,9960,9960,7411,823,0,0,0,0,0,0,0,0,0,1490,3215,509,0,0,0,0,0,0,0,0,0,0,0,1333,8352,9960,9960,4509,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3294,9960,9960,9176,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3294,9960,9960,9176,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4156,6156,9960,9960,9529,2000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,980,4588,8941,8941,8941,9921,9960,9960,9960,9960,9411,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2666,4666,8627,9960,9960,9960,9960,9960,9960,9960,9960,9960,5568,0,0,0,0,0,0,0,0,0,0,0,0,0,1450,7333,9921,9960,9960,9960,8745,8078,8078,2941,2666,8431,9960,9960,4588,0,0,0,0,0,0,0,0,0,0,0,0,4431,8588,9960,9490,8901,4509,3490,1215,0,0,0,0,7843,9960,9450,1607,0,0,0,0,0,0,0,0,0,0,0,0,6627,9960,6901,2431,0,0,0,0,0,0,0,1882,9058,9960,9176,0,0,0,0,0,0,0,0,0,0,0,0,0,705,4862,0,0,0,0,0,0,0,0,0,3294,9960,9960,6509,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5450,9960,9333,2235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8235,9803,9960,6588,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9490,9960,9372,2235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3490,9843,9450,3372,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,196,8078,9647,6156,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,156,4588,2705,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
#define IMG0_LABEL 7
#define IMG1 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1215,5176,9960,9921,9960,8352,3215,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,823,5568,9137,9882,9921,9882,9921,9882,8745,784,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4823,9960,9921,9960,9921,8784,7960,7960,8745,10000,8352,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7960,9921,9882,9921,8313,784,0,0,2392,9921,9882,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1607,9529,8784,7960,7176,1607,5960,1176,0,0,10000,9921,4000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1568,784,0,0,4000,9921,1960,0,3215,9921,9882,784,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3215,8392,1215,4431,9137,9960,9137,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2431,4000,3215,1607,9921,9098,9921,9882,9137,1960,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5960,9921,9960,9921,9960,9921,9960,9137,4823,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5960,9882,9921,9882,9921,9882,7529,1960,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2431,7176,7960,9529,9960,9921,2431,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1568,6745,9882,7960,784,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,823,0,0,0,0,0,0,0,0,0,7176,9960,4392,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2431,7960,6392,0,0,0,0,0,0,0,0,2392,9921,5921,0,0,0,0,0,0,0,0,0,0,0,0,0,823,8392,7529,0,0,0,0,0,0,0,0,431,8352,9960,5921,0,0,0,0,0,0,0,0,0,0,0,0,0,4000,9921,5921,0,0,0,0,0,0,0,1607,8352,9882,9921,4352,0,0,0,0,0,0,0,0,0,0,0,0,0,1607,10000,8352,3607,2000,0,0,1215,3607,6784,9921,9960,9921,5568,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6745,9882,9921,9882,7960,7960,9137,9882,9921,9882,9921,5098,784,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,823,7960,10000,9921,9960,9921,9960,9921,9568,7960,3215,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,784,5921,5921,9921,6705,5921,5921,1568,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
#define IMG1_LABEL 3
#define TOTAL_IMAGE 2
static const signed char label[] = {IMG0_LABEL, IMG1_LABEL};
static const int W3[] = {-3233,-4261,-6519,7906,-2210,371,3984,2212,7975,2549,3076,6250,-5895,2037,-647,-15660,-3767,-6443};
static const int B3[] = {-8293,-1409};
static const int W2[] = {70,2319,368,-1468,-1559,44,2732,-275,2340,-6354,-5564,-7705,1560,7101,2395,18201,-183,15745,17230,-5966,13997,3351,5684,3797,350,-1828,-322,69,-26,-57,2935,1379,3558,22,25,-226};
static const int B2[] = {-1165,-36};
static const int W1[] = {1579,-2264,212,255,87,-7751,-4159,-1258,-3963,3354,-6319,-287,-5066,-3574,-37807,-8261,1022,5711,-6256,669,5596,2537,-30759,-6959,2531,3173,-8664,1275,8313,2666,-25865,-5720,2974,1623,-9915,779,8913,1685,-25247,-5639,4167,-1080,-10229,2062,3988,-1602,-19185,-4837,573,-12573,-2311,10518,-7981,-16263,-2600,-764,-4646,7558,1318,2474,-15276,1636,-754,-585,-3385,13052,1444,4408,-3103,1541,53,-317,-1599,6612,832,2651,-384,1805,-73,541,-142,3065,61,2231,303,1269,-281,1118,353,468,-265,2645,699,1071,-303,952,2480,-3649,-1027,4960,400,209,-547,541,-718,-15381,-2356,15890,-230,-2493,187,804,519,6141,1578,1288,-8691,13761,4305,1347,-1497,4542,1307,2311,36,4274,2339,920,-602,4642,1039,2504,-532,2146,1169,2240,1263,2349,1277,3324,140,1063,1087,3052,3141,-716,1348,4541,546,745,973,2748,4866,-4363,-1358,5724,1359,-74,1185,2448,-3753,-4687,-5648,8657,3407,-1721,406,3630,895,-4989,4768,217,2856,6174,7059,-3063,-5705,-2069,419,2304,-1790,-237,1411,234,-6417,-699,-858,1646,-5346,-1016,-1311,1490,-6350,-989,-2324,1540,-4858,-1904,-2046,1062,-5291,-1735,-3627,1222,-3865,-2478,-2522,1026,-1450,-1011,437,-1715,-313,-3013,-940,698,-5491,6684,1457,-7375,7700,-4125,1011,528,2546,2275,-302,-832,-6638,5122,-1210,-3340,-750,-982,-160,-318,-4358,943,-498,-777,-1282,-896,-701,-107,-2909,-131,-397,-234,-1553,-520,-1477,-755,-2686,-352,-956,-154,-390,242,-1960,-1999,-2030,-426,-1877,-451,-101,1482,-1170,-4180,-2068,-240,-1578,-556,-903,4025,-3574,-7861,-799,1620,-1446,-2749,2683,6881,-641,1136,5998,12947,-12500,6082,1234,1580,-1750,342,3910,2319,-7568,2004,791,142,-1213,85,2689,570,-5261,1248,806,-385,-889,7,2863,-108,-3930,1114,656,-337,-745,-273,3002,29,-4795,-452,-102,393,-923,-1924,1540,2123,-3898,-3738,-2064,2920,-1299,-3604,-5544,4572,-3526,-9479,-12481,8870,3362,35,-2276,14563,5495,3839,-4119,3758,1768,381,1635,7051,550,6445,-2072,1461,1031,1971,1742,3657,-978,5229,-1845,-13,886,2418,1554,1722,-1053,3821,-3065,-1629,1154,3075,1586,-177,-502,2623,-12994,-2270,1085,7447,-3980,168,1006,-1635,-6495,-5674,179,9896,-13958,4412,1664,-1919};
static const int B1[] = {12019,-11770,21698,-19615};
static const int W[] = {5580,7870,-409,-1225,-4126,-360,1691,-1471,-164,-2805,187,629,4956,3393,-4754,-1405,-8827,1835,208,-1378,-522,6792,-2802,-3127,4441,-2610,-3221,10321,-1444,5221,575,-9654,-3051,1685,-5320,4268,-5434,146,2679,1592};
static const int B[] = {414,14614,715,-12774,8092,-16933,-2974,-1177,3292,6596};
*/
int mnist(void);
#endif //__MNIST_INT_H__

View File

@@ -1,35 +0,0 @@
#include "onnx.h"
void add(const int *input, // pointer to vector
const int *bias, // pointer to matrix
const uint16_t dim_vec, // length of the vector
int *output)
{
for (int i = 0; i < dim_vec; i++)
{
output[i] = input[i] + bias[i];
}
}
int* add_layer(Onnx__GraphProto* graph, const int *input, int64_t* shapeInput, int64_t* shapeOutput, const char* layer_name)
{
//assert(graph != NULL && input != NULL && layer_name != "" );
Onnx__NodeProto* node = onnx_graph_get_node_by_name(graph, layer_name);
const char* bias = node->input[1];
int* B = onnx_graph_get_weights_by_name(graph, bias);
int64_t* shapeB = onnx_graph_get_dims_by_name(graph, bias);
if(shapeB == NULL)
{
return NULL;
}
int* output = (int*) malloc(sizeof(int)*shapeB[0]);
memset(output, 0, sizeof(sizeof(int)*shapeB[0]));
add(input, B, shapeB[0], output);
memcpy(shapeInput, shapeOutput, sizeof(int64_t)*3);
return output;
}

View File

@@ -1,113 +0,0 @@
#include "onnx.h"
void conv2D(const int *input, // input image
const uint16_t dim_im_in_x, // input image dimention x
const uint16_t dim_im_in_y, // input image dimention y
const uint16_t ch_im_in, // number of input image channels
const int *weight, // kernel weights
const uint16_t ch_im_out, // number of filters, i.e., output image channels
const uint16_t dim_kernel_x, // filter kernel size x
const uint16_t dim_kernel_y, // filter kernel size y
const uint16_t padding_x, // padding sizes x
const uint16_t padding_y, // padding sizes y
const uint16_t stride_x, // stride x
const uint16_t stride_y, // stride y
const int *bias, // bias
int *output, // output image
const uint16_t dim_im_out_x, // output image dimension x
const uint16_t dim_im_out_y // output image dimension y
)
{
int i, j, k, l, m, n;
int conv_out = 0.0f;
int in_row, in_col;
// For each filter
for (i = 0; i < ch_im_out; i++)
{
// For each image dimension
for (j = 0; j < dim_im_out_y; j++)
{
for (k = 0; k < dim_im_out_x; k++)
{
conv_out = bias[i];
// For each kernel dimension
for (m = 0; m < dim_kernel_y; m++)
{
for (n = 0; n < dim_kernel_x; n++)
{
// if-for implementation
in_row = stride_y * j + m - padding_y;
in_col = stride_x * k + n - padding_x;
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
{
// For each input channel
for (l = 0; l < ch_im_in; l++)
{
conv_out += input[(in_row * dim_im_in_x + in_col) * ch_im_in + l] *
weight[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in +
l];
}
}
}
}
output[i + (j * dim_im_out_x + k) * ch_im_out] = conv_out;
}
}
}
}
int* conv2D_layer(Onnx__GraphProto* graph, const int *input, int64_t* shapeInput, int64_t* shapeOutput, const char* layer_name)
{
//assert(graph != NULL && input != NULL && layer_name != "" );
Onnx__NodeProto* node = onnx_graph_get_node_by_name(graph, layer_name);
if(node == NULL)
{
// layer not found
return NULL;
}
const char* weight = node->input[1];
const char* bias = node->input[2];
// Get weight shape
int64_t* shapeW = onnx_graph_get_dims_by_name(graph, weight);
if(shapeW == NULL)
{
return NULL;
}
int64_t dimW = onnx_graph_get_dim_by_name(graph, weight);
if(dimW < 0)
{
return NULL;
}
// Get weights
// NCWH --> NWHC
int64_t permW_t[] = { 0, 2, 3, 1};
int* W = onnx_graph_get_weights_by_name(graph, weight);
if(W == NULL)
{
return NULL;
}
int* W_t = transpose(W, shapeW, dimW, permW_t);
// Get bias
int* B = onnx_graph_get_weights_by_name(graph, bias);
if(B == NULL)
{
return NULL;
}
int* output = (int*) malloc(sizeof(int)*shapeW[0]*shapeInput[W_INDEX]*shapeInput[H_INDEX]);
memset(output, 0, sizeof(sizeof(int)*shapeW[0]*shapeInput[W_INDEX]*shapeInput[H_INDEX]));
conv2D(input, shapeInput[W_INDEX], shapeInput[H_INDEX], shapeW[1], W_t, shapeW[0], shapeW[2], shapeW[3], 1, 1, 1, 1, B, output, shapeInput[W_INDEX], shapeInput[H_INDEX]);
shapeOutput[W_INDEX] = shapeInput[W_INDEX];
shapeOutput[H_INDEX] = shapeInput[H_INDEX];
shapeOutput[C_INDEX] = shapeW[0];
free(W_t);
return output;
}

Some files were not shown because too many files have changed in this diff Show More