| 1 | /*
|
|---|
| 2 | * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
|---|
| 3 | *
|
|---|
| 4 | * SPDX-License-Identifier: Apache-2.0
|
|---|
| 5 | *
|
|---|
| 6 | * Licensed under the Apache License, Version 2.0 (the License); you may
|
|---|
| 7 | * not use this file except in compliance with the License.
|
|---|
| 8 | * You may obtain a copy of the License at
|
|---|
| 9 | *
|
|---|
| 10 | * www.apache.org/licenses/LICENSE-2.0
|
|---|
| 11 | *
|
|---|
| 12 | * Unless required by applicable law or agreed to in writing, software
|
|---|
| 13 | * distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|---|
| 14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|---|
| 15 | * See the License for the specific language governing permissions and
|
|---|
| 16 | * limitations under the License.
|
|---|
| 17 | */
|
|---|
| 18 |
|
|---|
| 19 | /* ----------------------------------------------------------------------
|
|---|
| 20 | * Project: CMSIS NN Library
|
|---|
| 21 | * Title: arm_relu_q7.c
|
|---|
| 22 | * Description: Q7 version of ReLU
|
|---|
| 23 | *
|
|---|
| 24 | * $Date: 17. January 2018
|
|---|
| 25 | * $Revision: V.1.0.0
|
|---|
| 26 | *
|
|---|
| 27 | * Target Processor: Cortex-M cores
|
|---|
| 28 | *
|
|---|
| 29 | * -------------------------------------------------------------------- */
|
|---|
| 30 |
|
|---|
| 31 | #include "arm_math.h"
|
|---|
| 32 | #include "arm_nnfunctions.h"
|
|---|
| 33 |
|
|---|
| 34 | /**
|
|---|
| 35 | * @ingroup groupNN
|
|---|
| 36 | */
|
|---|
| 37 |
|
|---|
| 38 | /**
|
|---|
| 39 | * @addtogroup Acti
|
|---|
| 40 | * @{
|
|---|
| 41 | */
|
|---|
| 42 |
|
|---|
| 43 | /**
|
|---|
| 44 | * @brief Q7 RELU function
|
|---|
| 45 | * @param[in,out] data pointer to input
|
|---|
| 46 | * @param[in] size number of elements
|
|---|
| 47 | * @return none.
|
|---|
| 48 | *
|
|---|
| 49 | * @details
|
|---|
| 50 | *
|
|---|
| 51 | * Optimized relu with QSUB instructions.
|
|---|
| 52 | *
|
|---|
| 53 | */
|
|---|
| 54 |
|
|---|
| 55 | void arm_relu_q7(q7_t * data, uint16_t size)
|
|---|
| 56 | {
|
|---|
| 57 |
|
|---|
| 58 | #if defined (ARM_MATH_DSP)
|
|---|
| 59 | /* Run the following code for Cortex-M4 and Cortex-M7 */
|
|---|
| 60 |
|
|---|
| 61 | uint16_t i = size >> 2;
|
|---|
| 62 | q7_t *pIn = data;
|
|---|
| 63 | q7_t *pOut = data;
|
|---|
| 64 | q31_t in;
|
|---|
| 65 | q31_t buf;
|
|---|
| 66 | q31_t mask;
|
|---|
| 67 |
|
|---|
| 68 | while (i)
|
|---|
| 69 | {
|
|---|
| 70 | in = *__SIMD32(pIn)++;
|
|---|
| 71 |
|
|---|
| 72 | /* extract the first bit */
|
|---|
| 73 | buf = __ROR(in & 0x80808080, 7);
|
|---|
| 74 |
|
|---|
| 75 | /* if MSB=1, mask will be 0xFF, 0x0 otherwise */
|
|---|
| 76 | mask = __QSUB8(0x00000000, buf);
|
|---|
| 77 |
|
|---|
| 78 | *__SIMD32(pOut)++ = in & (~mask);
|
|---|
| 79 | i--;
|
|---|
| 80 | }
|
|---|
| 81 |
|
|---|
| 82 | i = size & 0x3;
|
|---|
| 83 | while (i)
|
|---|
| 84 | {
|
|---|
| 85 | if (*pIn < 0)
|
|---|
| 86 | {
|
|---|
| 87 | *pIn = 0;
|
|---|
| 88 | }
|
|---|
| 89 | pIn++;
|
|---|
| 90 | i--;
|
|---|
| 91 | }
|
|---|
| 92 |
|
|---|
| 93 | #else
|
|---|
| 94 | /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|---|
| 95 |
|
|---|
| 96 | uint16_t i;
|
|---|
| 97 |
|
|---|
| 98 | for (i = 0; i < size; i++)
|
|---|
| 99 | {
|
|---|
| 100 | if (data[i] < 0)
|
|---|
| 101 | data[i] = 0;
|
|---|
| 102 | }
|
|---|
| 103 |
|
|---|
| 104 | #endif /* ARM_MATH_DSP */
|
|---|
| 105 |
|
|---|
| 106 | }
|
|---|
| 107 |
|
|---|
| 108 | /**
|
|---|
| 109 | * @} end of Acti group
|
|---|
| 110 | */
|
|---|