mirror of
https://github.com/esp8266/Arduino.git
synced 2025-06-22 08:22:04 +03:00
[sam] Adding CMSIS 2.10
This commit is contained in:
@ -0,0 +1,127 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_max_f32.c
|
||||
*
|
||||
* Description: Maximum value of a floating-point vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup Max Maximum
|
||||
*
|
||||
* Computes the maximum value of an array of data.
|
||||
* The function returns both the maximum value and its position within the array.
|
||||
* There are separate functions for floating-point, Q31, Q15, and Q7 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Max
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Maximum value of a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult maximum value returned here
|
||||
* @param[out] *pIndex index of maximum value returned here
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_max_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
float32_t * pResult,
|
||||
uint32_t * pIndex)
|
||||
{
|
||||
float32_t maxVal, out; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, outIndex; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
outIndex = 0u;
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
out = *pSrc++;
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Initialize maxVal to the next consecutive values one by one */
|
||||
maxVal = *pSrc++;
|
||||
|
||||
/* compare for the maximum value */
|
||||
if(out < maxVal)
|
||||
{
|
||||
/* Update the maximum value and it's index */
|
||||
out = maxVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
} while(blkCnt > 0u);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize maxVal to the next consecutive values one by one */
|
||||
maxVal = *pSrc++;
|
||||
|
||||
/* compare for the maximum value */
|
||||
if(out < maxVal)
|
||||
{
|
||||
/* Update the maximum value and it's index */
|
||||
out = maxVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
/* Store the maximum value and it's index into destination pointers */
|
||||
*pResult = out;
|
||||
*pIndex = outIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Max group
|
||||
*/
|
@ -0,0 +1,119 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_max_q15.c
|
||||
*
|
||||
* Description: Maximum value of a Q15 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Max
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Maximum value of a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult maximum value returned here
|
||||
* @param[out] *pIndex index of maximum value returned here
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_max_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q15_t * pResult,
|
||||
uint32_t * pIndex)
|
||||
{
|
||||
q15_t maxVal, out; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, outIndex; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
outIndex = 0u;
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
out = *pSrc++;
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Initialize maxVal to the next consecutive values one by one */
|
||||
maxVal = *pSrc++;
|
||||
|
||||
/* compare for the maximum value */
|
||||
if(out < maxVal)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
out = maxVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
blkCnt--;
|
||||
|
||||
} while(blkCnt > 0u);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize maxVal to the next consecutive values one by one */
|
||||
maxVal = *pSrc++;
|
||||
|
||||
/* compare for the maximum value */
|
||||
if(out < maxVal)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
out = maxVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the maximum value and its index into destination pointers */
|
||||
*pResult = out;
|
||||
*pIndex = outIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Max group
|
||||
*/
|
@ -0,0 +1,121 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_max_q31.c
|
||||
*
|
||||
* Description: Maximum value of a Q31 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Max
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Maximum value of a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult maximum value returned here
|
||||
* @param[out] *pIndex index of maximum value returned here
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_max_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q31_t * pResult,
|
||||
uint32_t * pIndex)
|
||||
{
|
||||
q31_t maxVal, out; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, outIndex; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
outIndex = 0u;
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
out = *pSrc++;
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
do
|
||||
{
|
||||
/* Initialize maxVal to the next consecutive values one by one */
|
||||
maxVal = *pSrc++;
|
||||
|
||||
/* compare for the maximum value */
|
||||
if(out < maxVal)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
out = maxVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
} while(blkCnt > 0u);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize maxVal to the next consecutive values one by one */
|
||||
maxVal = *pSrc++;
|
||||
|
||||
/* Compare for the maximum value */
|
||||
if(out < maxVal)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
out = maxVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the maximum value and its index into destination pointers */
|
||||
*pResult = out;
|
||||
*pIndex = outIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Max group
|
||||
*/
|
@ -0,0 +1,206 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_max_q7.c
|
||||
*
|
||||
* Description: Maximum value of a Q7 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Max
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Maximum value of a Q7 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult maximum value returned here
|
||||
* @param[out] *pIndex index of maximum value returned here
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_max_q7(
|
||||
q7_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q7_t * pResult,
|
||||
uint32_t * pIndex)
|
||||
{
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q7_t res, maxVal, x0, x1, maxVal2, maxVal1; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, index1, index2, index3, indx, indxMod; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
indx = 0u;
|
||||
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
res = *pSrc++;
|
||||
|
||||
/* Loop unrolling */
|
||||
blkCnt = (blockSize - 1u) >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
indxMod = blockSize - (blkCnt * 4u);
|
||||
|
||||
/* Load two input values for comparision */
|
||||
x0 = *pSrc++;
|
||||
x1 = *pSrc++;
|
||||
|
||||
if(x0 < x1)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
maxVal1 = x1;
|
||||
index1 = indxMod + 1u;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
maxVal1 = x0;
|
||||
index1 = indxMod;
|
||||
}
|
||||
|
||||
/* Load two input values for comparision */
|
||||
x0 = *pSrc++;
|
||||
x1 = *pSrc++;
|
||||
|
||||
if(x0 < x1)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
maxVal2 = x1;
|
||||
index2 = indxMod + 3u;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
maxVal2 = x0;
|
||||
index2 = indxMod + 2u;
|
||||
}
|
||||
|
||||
if(maxVal1 < maxVal2)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
maxVal = maxVal2;
|
||||
index3 = index2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
maxVal = maxVal1;
|
||||
index3 = index1;
|
||||
}
|
||||
|
||||
if(res < maxVal)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
res = maxVal;
|
||||
indx = index3;
|
||||
}
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
}
|
||||
|
||||
/* If the blockSize - 1 is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = (blockSize - 1u) % 0x04u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize maxVal to the next consecutive values one by one */
|
||||
maxVal = *pSrc++;
|
||||
|
||||
/* compare for the maximum value */
|
||||
if(res < maxVal)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
res = maxVal;
|
||||
indx = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store the maximum value and its index into destination pointers */
|
||||
*pResult = res;
|
||||
*pIndex = indx;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q7_t maxVal, out; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, outIndex; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
outIndex = 0u;
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
out = *pSrc++;
|
||||
|
||||
/* Loop over blockSize - 1 number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize maxVal to the next consecutive values one by one */
|
||||
maxVal = *pSrc++;
|
||||
|
||||
/* compare for the maximum value */
|
||||
if(out < maxVal)
|
||||
{
|
||||
/* Update the maximum value and its index */
|
||||
out = maxVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
}
|
||||
|
||||
/* Store the maximum value and its index into destination pointers */
|
||||
*pResult = out;
|
||||
*pIndex = outIndex;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Max group
|
||||
*/
|
@ -0,0 +1,122 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mean_f32.c
|
||||
*
|
||||
* Description: Mean value of a floating-point vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup mean Mean
|
||||
*
|
||||
* Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating-point, Q31, Q15, and Q7 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup mean
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Mean value of a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult mean value returned here
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
|
||||
void arm_mean_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
float32_t * pResult)
|
||||
{
|
||||
float32_t sum = 0.0f; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
|
||||
/* Store the result to the destination */
|
||||
*pResult = sum / (float32_t) blockSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of mean group
|
||||
*/
|
@ -0,0 +1,119 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mean_q15.c
|
||||
*
|
||||
* Description: Mean value of a Q15 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup mean
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Mean value of a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult mean value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function is implemented using a 32-bit internal accumulator.
|
||||
* The input is represented in 1.15 format and is accumulated in a 32-bit
|
||||
* accumulator in 17.15 format.
|
||||
* There is no risk of internal overflow with this approach, and the
|
||||
* full precision of intermediate result is preserved.
|
||||
* Finally, the accumulator is saturated and truncated to yield a result of 1.15 format.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_mean_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q15_t * pResult)
|
||||
{
|
||||
q31_t sum = 0; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
|
||||
/* Store the result to the destination */
|
||||
*pResult = (q15_t) (sum / blockSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of mean group
|
||||
*/
|
@ -0,0 +1,119 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mean_q31.c
|
||||
*
|
||||
* Description: Mean value of a Q31 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup mean
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Mean value of a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult mean value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*\par
|
||||
* The function is implemented using a 64-bit internal accumulator.
|
||||
* The input is represented in 1.31 format and is accumulated in a 64-bit
|
||||
* accumulator in 33.31 format.
|
||||
* There is no risk of internal overflow with this approach, and the
|
||||
* full precision of intermediate result is preserved.
|
||||
* Finally, the accumulator is truncated to yield a result of 1.31 format.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_mean_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q31_t * pResult)
|
||||
{
|
||||
q63_t sum = 0; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
|
||||
/* Store the result to the destination */
|
||||
*pResult = (q31_t) (sum / (int32_t) blockSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of mean group
|
||||
*/
|
@ -0,0 +1,119 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mean_q7.c
|
||||
*
|
||||
* Description: Mean value of a Q7 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup mean
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Mean value of a Q7 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult mean value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function is implemented using a 32-bit internal accumulator.
|
||||
* The input is represented in 1.7 format and is accumulated in a 32-bit
|
||||
* accumulator in 25.7 format.
|
||||
* There is no risk of internal overflow with this approach, and the
|
||||
* full precision of intermediate result is preserved.
|
||||
* Finally, the accumulator is truncated to yield a result of 1.7 format.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_mean_q7(
|
||||
q7_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q7_t * pResult)
|
||||
{
|
||||
q31_t sum = 0; /* Temporary result storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
|
||||
/* Store the result to the destination */
|
||||
*pResult = (q7_t) (sum / (int32_t) blockSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of mean group
|
||||
*/
|
@ -0,0 +1,133 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_min_f32.c
|
||||
*
|
||||
* Description: Minimum value of a floating-point vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup Min Minimum
|
||||
*
|
||||
* Computes the minimum value of an array of data.
|
||||
* The function returns both the minimum value and its position within the array.
|
||||
* There are separate functions for floating-point, Q31, Q15, and Q7 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Min
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Minimum value of a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult minimum value returned here
|
||||
* @param[out] *pIndex index of minimum value returned here
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_min_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
float32_t * pResult,
|
||||
uint32_t * pIndex)
|
||||
{
|
||||
float32_t minVal, out; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, outIndex; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
outIndex = 0u;
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
out = *pSrc++;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
do
|
||||
{
|
||||
/* Initialize minVal to the next consecutive values one by one */
|
||||
minVal = *pSrc++;
|
||||
|
||||
/* compare for the minimum value */
|
||||
if(out > minVal)
|
||||
{
|
||||
/* Update the minimum value and it's index */
|
||||
out = minVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
blkCnt--;
|
||||
|
||||
} while(blkCnt > 0u);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize - 1 number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize minVal to the next consecutive values one by one */
|
||||
minVal = *pSrc++;
|
||||
|
||||
/* compare for the minimum value */
|
||||
if(out > minVal)
|
||||
{
|
||||
/* Update the minimum value and it's index */
|
||||
out = minVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
/* Store the minimum value and it's index into destination pointers */
|
||||
*pResult = out;
|
||||
*pIndex = outIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Min group
|
||||
*/
|
@ -0,0 +1,127 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_min_q15.c
|
||||
*
|
||||
* Description: Minimum value of a Q15 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @addtogroup Min
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Minimum value of a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult minimum value returned here
|
||||
* @param[out] *pIndex index of minimum value returned here
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_min_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q15_t * pResult,
|
||||
uint32_t * pIndex)
|
||||
{
|
||||
q15_t minVal, out; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, outIndex; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
outIndex = 0u;
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
out = *pSrc++;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
do
|
||||
{
|
||||
/* Initialize minVal to the next consecutive values one by one */
|
||||
minVal = *pSrc++;
|
||||
|
||||
/* compare for the minimum value */
|
||||
if(out > minVal)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
out = minVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
blkCnt--;
|
||||
|
||||
} while(blkCnt > 0u);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize - 1 number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize minVal to the next consecutive values one by one */
|
||||
minVal = *pSrc++;
|
||||
|
||||
/* compare for the minimum value */
|
||||
if(out > minVal)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
out = minVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
/* Store the minimum value and its index into destination pointers */
|
||||
*pResult = out;
|
||||
*pIndex = outIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Min group
|
||||
*/
|
@ -0,0 +1,125 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_min_q31.c
|
||||
*
|
||||
* Description: Minimum value of a Q31 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @addtogroup Min
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Minimum value of a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult minimum value returned here
|
||||
* @param[out] *pIndex index of minimum value returned here
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_min_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q31_t * pResult,
|
||||
uint32_t * pIndex)
|
||||
{
|
||||
q31_t minVal, out; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, outIndex; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
outIndex = 0u;
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
out = *pSrc++;
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
do
|
||||
{
|
||||
/* Initialize minVal to the next consecutive values one by one */
|
||||
minVal = *pSrc++;
|
||||
|
||||
/* compare for the minimum value */
|
||||
if(out > minVal)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
out = minVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
blkCnt--;
|
||||
|
||||
} while(blkCnt > 0u);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize -1 number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize minVal to the next consecutive values one by one */
|
||||
minVal = *pSrc++;
|
||||
|
||||
/* compare for the minimum value */
|
||||
if(out > minVal)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
out = minVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the minimum value and its index into destination pointers */
|
||||
*pResult = out;
|
||||
*pIndex = outIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Min group
|
||||
*/
|
@ -0,0 +1,204 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_min_q7.c
|
||||
*
|
||||
* Description: Minimum value of a Q7 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Min
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Minimum value of a Q7 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult minimum value returned here
|
||||
* @param[out] *pIndex index of minimum value returned here
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_min_q7(
|
||||
q7_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q7_t * pResult,
|
||||
uint32_t * pIndex)
|
||||
{
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q7_t minVal, minVal1, minVal2, res, x0, x1; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, indx, index1, index2, index3, indxMod; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
indx = 0u;
|
||||
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
res = *pSrc++;
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = (blockSize - 1u) >> 2u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
indxMod = blockSize - (blkCnt * 4u);
|
||||
|
||||
/* Load two input values for comparision */
|
||||
x0 = *pSrc++;
|
||||
x1 = *pSrc++;
|
||||
|
||||
if(x0 > x1)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
minVal1 = x1;
|
||||
index1 = indxMod + 1u;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
minVal1 = x0;
|
||||
index1 = indxMod;
|
||||
}
|
||||
|
||||
/* Load two input values for comparision */
|
||||
x0 = *pSrc++;
|
||||
x1 = *pSrc++;
|
||||
|
||||
if(x0 > x1)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
minVal2 = x1;
|
||||
index2 = indxMod + 3u;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
minVal2 = x0;
|
||||
index2 = indxMod + 2u;
|
||||
}
|
||||
|
||||
if(minVal1 > minVal2)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
minVal = minVal2;
|
||||
index3 = index2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
minVal = minVal1;
|
||||
index3 = index1;
|
||||
}
|
||||
|
||||
if(res > minVal)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
res = minVal;
|
||||
indx = index3;
|
||||
}
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
||||
}
|
||||
|
||||
blkCnt = (blockSize - 1u) % 0x04u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize minVal to the next consecutive values one by one */
|
||||
minVal = *pSrc++;
|
||||
|
||||
/* compare for the minimum value */
|
||||
if(res > minVal)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
res = minVal;
|
||||
indx = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store the minimum value and its index into destination pointers */
|
||||
*pResult = res;
|
||||
*pIndex = indx;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q7_t minVal, out; /* Temporary variables to store the output value. */
|
||||
uint32_t blkCnt, outIndex; /* loop counter */
|
||||
|
||||
/* Initialise the index value to zero. */
|
||||
outIndex = 0u;
|
||||
|
||||
/* Load first input value that act as reference value for comparision */
|
||||
out = *pSrc++;
|
||||
|
||||
/* Loop over blockSize - 1 number of values */
|
||||
blkCnt = (blockSize - 1u);
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Initialize minVal to the next consecutive values one by one */
|
||||
minVal = *pSrc++;
|
||||
|
||||
/* compare for the minimum value */
|
||||
if(out > minVal)
|
||||
{
|
||||
/* Update the minimum value and its index */
|
||||
out = minVal;
|
||||
outIndex = blockSize - blkCnt;
|
||||
}
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store the minimum value and its index into destination pointers */
|
||||
*pResult = out;
|
||||
*pIndex = outIndex;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Min group
|
||||
*/
|
@ -0,0 +1,135 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_power_f32.c
|
||||
*
|
||||
* Description: Sum of the squares of the elements of a floating-point vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 0.0.7 2010/06/10
|
||||
* Misra-C changes done
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup power Power
|
||||
*
|
||||
* Calculates the sum of the squares of the elements in the input vector.
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating point, Q31, Q15, and Q7 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup power
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Sum of the squares of the elements of a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult sum of the squares value returned here
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_power_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
float32_t * pResult)
|
||||
{
|
||||
float32_t sum = 0.0f; /* accumulator */
|
||||
float32_t in; /* Temporary variable to store input value */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute Power and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* compute power and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store the result to the destination */
|
||||
*pResult = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of power group
|
||||
*/
|
@ -0,0 +1,141 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_power_q15.c
|
||||
*
|
||||
* Description: Sum of the squares of the elements of a Q15 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup power
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Sum of the squares of the elements of a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult sum of the squares value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
* \par
|
||||
* The function is implemented using a 64-bit internal accumulator.
|
||||
* The input is represented in 1.15 format.
|
||||
* Intermediate multiplication yields a 2.30 format, and this
|
||||
* result is added without saturation to a 64-bit accumulator in 34.30 format.
|
||||
* With 33 guard bits in the accumulator, there is no risk of overflow, and the
|
||||
* full precision of the intermediate multiplication is preserved.
|
||||
* Finally, the return result is in 34.30 format.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_power_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q63_t * pResult)
|
||||
{
|
||||
q63_t sum = 0; /* Temporary result storage */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t in32; /* Temporary variable to store input value */
|
||||
q15_t in16; /* Temporary variable to store input value */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute Power and then store the result in a temporary variable, sum. */
|
||||
in32 = *__SIMD32(pSrc)++;
|
||||
sum = __SMLALD(in32, in32, sum);
|
||||
in32 = *__SIMD32(pSrc)++;
|
||||
sum = __SMLALD(in32, in32, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute Power and then store the result in a temporary variable, sum. */
|
||||
in16 = *pSrc++;
|
||||
sum = __SMLALD(in16, in16, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q15_t in; /* Temporary variable to store input value */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute Power and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += ((q31_t) in * in);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Store the results in 34.30 format */
|
||||
*pResult = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of power group
|
||||
*/
|
@ -0,0 +1,132 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_power_q31.c
|
||||
*
|
||||
* Description: Sum of the squares of the elements of a Q31 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup power
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Sum of the squares of the elements of a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult sum of the squares value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
* \par
|
||||
* The function is implemented using a 64-bit internal accumulator.
|
||||
* The input is represented in 1.31 format.
|
||||
* Intermediate multiplication yields a 2.62 format, and this
|
||||
* result is truncated to 2.48 format by discarding the lower 14 bits.
|
||||
* The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
|
||||
* With 15 guard bits in the accumulator, there is no risk of overflow, and the
|
||||
* full precision of the intermediate multiplication is preserved.
|
||||
* Finally, the return result is in 16.48 format.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_power_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q63_t * pResult)
|
||||
{
|
||||
q63_t sum = 0; /* Temporary result storage */
|
||||
q31_t in;
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and then store the result in a temporary variable sum, providing 15 guard bits. */
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) in * in) >> 14u;
|
||||
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) in * in) >> 14u;
|
||||
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) in * in) >> 14u;
|
||||
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) in * in) >> 14u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute Power and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) in * in) >> 14u;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store the results in 16.48 format */
|
||||
*pResult = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of power group
|
||||
*/
|
@ -0,0 +1,137 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_power_q7.c
|
||||
*
|
||||
* Description: Sum of the squares of the elements of a Q7 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup power
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Sum of the squares of the elements of a Q7 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult sum of the squares value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
* \par
|
||||
* The function is implemented using a 32-bit internal accumulator.
|
||||
* The input is represented in 1.7 format.
|
||||
* Intermediate multiplication yields a 2.14 format, and this
|
||||
* result is added without saturation to an accumulator in 18.14 format.
|
||||
* With 17 guard bits in the accumulator, there is no risk of overflow, and the
|
||||
* full precision of the intermediate multiplication is preserved.
|
||||
* Finally, the return result is in 18.14 format.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_power_q7(
|
||||
q7_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q31_t * pResult)
|
||||
{
|
||||
q31_t sum = 0; /* Temporary result storage */
|
||||
q7_t in; /* Temporary variable to store input */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t input1; /* Temporary variable to store packed input */
|
||||
q15_t in1, in2; /* Temporary variables to store input */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* Reading two inputs of pSrc vector and packing */
|
||||
in1 = (q15_t) * pSrc++;
|
||||
in2 = (q15_t) * pSrc++;
|
||||
input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
|
||||
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute Power and then store the result in a temporary variable, sum. */
|
||||
sum = __SMLAD(input1, input1, sum);
|
||||
|
||||
/* Reading two inputs of pSrc vector and packing */
|
||||
in1 = (q15_t) * pSrc++;
|
||||
in2 = (q15_t) * pSrc++;
|
||||
input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
|
||||
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute Power and then store the result in a temporary variable, sum. */
|
||||
sum = __SMLAD(input1, input1, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute Power and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += ((q15_t) in * in);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store the result in 18.14 format */
|
||||
*pResult = sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of power group
|
||||
*/
|
@ -0,0 +1,130 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_rms_f32.c
|
||||
*
|
||||
* Description: Root mean square value of an array of F32 type
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup RMS Root mean square (RMS)
|
||||
*
|
||||
*
|
||||
* Calculates the Root Mean Sqaure of the elements in the input vector.
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating point, Q31, and Q15 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup RMS
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Root Mean Square of the elements of a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult rms value returned here
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_rms_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
float32_t * pResult)
|
||||
{
|
||||
float32_t sum = 0.0f; /* Accumulator */
|
||||
float32_t in; /* Tempoprary variable to store input value */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute sum of the squares and then store the result in a temporary variable, sum */
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute sum of the squares and then store the results in a temporary variable, sum */
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Rms and store the result in the destination */
|
||||
arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of RMS group
|
||||
*/
|
@ -0,0 +1,150 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_rms_q15.c
|
||||
*
|
||||
* Description: Root Mean Square of the elements of a Q15 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @addtogroup RMS
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Root Mean Square of the elements of a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult rms value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
* \par
|
||||
* The function is implemented using a 64-bit internal accumulator.
|
||||
* The input is represented in 1.15 format.
|
||||
* Intermediate multiplication yields a 2.30 format, and this
|
||||
* result is added without saturation to a 64-bit accumulator in 34.30 format.
|
||||
* With 33 guard bits in the accumulator, there is no risk of overflow, and the
|
||||
* full precision of the intermediate multiplication is preserved.
|
||||
* Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
|
||||
* 15 bits, and then saturated to yield a result in 1.15 format.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_rms_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q15_t * pResult)
|
||||
{
|
||||
q63_t sum = 0; /* accumulator */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t in; /* temporary variable to store the input value */
|
||||
q15_t in1; /* temporary variable to store the input value */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/* loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute sum of the squares and then store the results in a temporary variable, sum */
|
||||
in = *__SIMD32(pSrc)++;
|
||||
sum = __SMLALD(in, in, sum);
|
||||
in = *__SIMD32(pSrc)++;
|
||||
sum = __SMLALD(in, in, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute sum of the squares and then store the results in a temporary variable, sum */
|
||||
in1 = *pSrc++;
|
||||
sum = __SMLALD(in1, in1, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Truncating and saturating the accumulator to 1.15 format */
|
||||
sum = __SSAT((q31_t) (sum >> 15), 16);
|
||||
|
||||
in1 = (q15_t) (sum / blockSize);
|
||||
|
||||
/* Store the result in the destination */
|
||||
arm_sqrt_q15(in1, pResult);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q15_t in; /* temporary variable to store the input value */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute sum of the squares and then store the results in a temporary variable, sum */
|
||||
in = *pSrc++;
|
||||
sum += ((q31_t) in * in);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Truncating and saturating the accumulator to 1.15 format */
|
||||
sum = __SSAT((q31_t) (sum >> 15), 16);
|
||||
|
||||
in = (q15_t) (sum / blockSize);
|
||||
|
||||
/* Store the result in the destination */
|
||||
arm_sqrt_q15(in, pResult);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of RMS group
|
||||
*/
|
@ -0,0 +1,143 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_rms_q31.c
|
||||
*
|
||||
* Description: Root Mean Square of the elements of a Q31 vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @addtogroup RMS
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Root Mean Square of the elements of a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult rms value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
*\par
|
||||
* The function is implemented using an internal 64-bit accumulator.
|
||||
* The input is represented in 1.31 format, and intermediate multiplication
|
||||
* yields a 2.62 format.
|
||||
* The accumulator maintains full precision of the intermediate multiplication results,
|
||||
* but provides only a single guard bit.
|
||||
* There is no saturation on intermediate additions.
|
||||
* If the accumulator overflows, it wraps around and distorts the result.
|
||||
* In order to avoid overflows completely, the input signal must be scaled down by
|
||||
* log2(blockSize) bits, as a total of blockSize additions are performed internally.
|
||||
* Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_rms_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q31_t * pResult)
|
||||
{
|
||||
q63_t sum = 0; /* accumulator */
|
||||
q31_t in; /* Temporary variable to store the input */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t *pIn1 = pSrc; /* SrcA pointer */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute sum of the squares and then store the result in a temporary variable, sum */
|
||||
in = *pIn1++;
|
||||
sum += (q63_t) in *in;
|
||||
in = *pIn1++;
|
||||
sum += (q63_t) in *in;
|
||||
in = *pIn1++;
|
||||
sum += (q63_t) in *in;
|
||||
in = *pIn1++;
|
||||
sum += (q63_t) in *in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute sum of the squares and then store the results in a temporary variable, sum */
|
||||
in = *pIn1++;
|
||||
sum += (q63_t) in *in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
|
||||
/* Compute sum of the squares and then store the results in a temporary variable, sum */
|
||||
in = *pSrc++;
|
||||
sum += (q63_t) in *in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Convert data in 2.62 to 1.31 by 31 right shifts */
|
||||
sum = sum >> 31;
|
||||
|
||||
/* Compute Rms and store the result in the destination vector */
|
||||
arm_sqrt_q31((q31_t) (sum / (int32_t) blockSize), pResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of RMS group
|
||||
*/
|
@ -0,0 +1,222 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_std_f32.c
|
||||
*
|
||||
* Description: Standard deviation of the elements of a floating-point vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup STD Standard deviation
|
||||
*
|
||||
* Calculates the standard deviation of the elements in the input vector.
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
|
||||
*
|
||||
* where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
|
||||
*
|
||||
* sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating point, Q31, and Q15 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup STD
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Standard deviation of the elements of a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult standard deviation value returned here
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_std_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
float32_t * pResult)
|
||||
{
|
||||
float32_t sum = 0.0f; /* Temporary result storage */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
float32_t meanOfSquares, mean, in, squareOfMean;
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
float32_t *pIn; /* Temporary pointer */
|
||||
|
||||
pIn = pSrc;
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
meanOfSquares = sum / ((float32_t) blockSize - 1.0f);
|
||||
|
||||
/* Reset the accumulator */
|
||||
sum = 0.0f;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Reset the input working pointer */
|
||||
pSrc = pIn;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
/* Compute mean of all input values */
|
||||
mean = sum / (float32_t) blockSize;
|
||||
|
||||
/* Compute square of mean */
|
||||
squareOfMean = (mean * mean) * (((float32_t) blockSize) /
|
||||
((float32_t) blockSize - 1.0f));
|
||||
|
||||
/* Compute standard deviation and then store the result to the destination */
|
||||
arm_sqrt_f32((meanOfSquares - squareOfMean), pResult);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
float32_t sumOfSquares = 0.0f; /* Sum of squares */
|
||||
float32_t squareOfSum; /* Square of Sum */
|
||||
float32_t in; /* input value */
|
||||
float32_t var; /* Temporary varaince storage */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sumOfSquares. */
|
||||
in = *pSrc++;
|
||||
sumOfSquares += in * in;
|
||||
|
||||
/* C = (A[0] + A[1] + ... + A[blockSize-1]) */
|
||||
/* Compute Sum of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
sum += in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute the square of sum */
|
||||
squareOfSum = ((sum * sum) / (float32_t) blockSize);
|
||||
|
||||
/* Compute the variance */
|
||||
var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
|
||||
|
||||
/* Compute standard deviation and then store the result to the destination */
|
||||
arm_sqrt_f32(var, pResult);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of STD group
|
||||
*/
|
@ -0,0 +1,229 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_std_q15.c
|
||||
*
|
||||
* Description: Standard deviation of an array of Q15 type.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup STD
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Standard deviation of the elements of a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult standard deviation value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
* \par
|
||||
* The function is implemented using a 64-bit internal accumulator.
|
||||
* The input is represented in 1.15 format.
|
||||
* Intermediate multiplication yields a 2.30 format, and this
|
||||
* result is added without saturation to a 64-bit accumulator in 34.30 format.
|
||||
* With 33 guard bits in the accumulator, there is no risk of overflow, and the
|
||||
* full precision of the intermediate multiplication is preserved.
|
||||
* Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
|
||||
* 15 bits, and then saturated to yield a result in 1.15 format.
|
||||
*/
|
||||
|
||||
void arm_std_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q15_t * pResult)
|
||||
{
|
||||
q63_t sum = 0; /* Accumulator */
|
||||
q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
|
||||
q15_t mean; /* mean */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q15_t t; /* Temporary variable */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q15_t *pIn; /* Temporary pointer */
|
||||
q31_t in; /* input value */
|
||||
q15_t in1; /* input value */
|
||||
|
||||
pIn = pSrc;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *__SIMD32(pSrc)++;
|
||||
sum = __SMLALD(in, in, sum);
|
||||
in = *__SIMD32(pSrc)++;
|
||||
sum = __SMLALD(in, in, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in1 = *pSrc++;
|
||||
sum = __SMLALD(in1, in1, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
|
||||
sum = __SSAT((sum >> 15u), 16u);
|
||||
|
||||
meanOfSquares = (q31_t) ((sum * t) >> 14u);
|
||||
|
||||
/* Reset the accumulator */
|
||||
sum = 0;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Reset the input working pointer */
|
||||
pSrc = pIn;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
/* Compute mean of all input values */
|
||||
t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
|
||||
mean = (q15_t) __SSAT(sum, 16u);
|
||||
|
||||
/* Compute square of mean */
|
||||
squareOfMean = ((q31_t) mean * mean) >> 15;
|
||||
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
|
||||
|
||||
/* mean of the squares minus the square of the mean. */
|
||||
in1 = (q15_t) (meanOfSquares - squareOfMean);
|
||||
|
||||
/* Compute standard deviation and store the result to the destination */
|
||||
arm_sqrt_q15(in1, pResult);
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q63_t sumOfSquares = 0; /* Accumulator */
|
||||
q15_t in; /* input value */
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sumOfSquares. */
|
||||
in = *pSrc++;
|
||||
sumOfSquares += (in * in);
|
||||
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
|
||||
sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
|
||||
meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
|
||||
|
||||
/* Compute mean of all input values */
|
||||
mean = (q15_t) __SSAT(sum, 16u);
|
||||
|
||||
/* Compute square of mean of the input samples
|
||||
* and then store the result in a temporary variable, squareOfMean.*/
|
||||
t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
|
||||
squareOfMean = ((q31_t) mean * mean) >> 15;
|
||||
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
|
||||
|
||||
/* mean of the squares minus the square of the mean. */
|
||||
in = (q15_t) (meanOfSquares - squareOfMean);
|
||||
|
||||
/* Compute standard deviation and store the result to the destination */
|
||||
arm_sqrt_q15(in, pResult);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of STD group
|
||||
*/
|
@ -0,0 +1,219 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_std_q31.c
|
||||
*
|
||||
* Description: Standard deviation of an array of Q31 type.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup STD
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Standard deviation of the elements of a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult standard deviation value returned here
|
||||
* @return none.
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
*\par
|
||||
* The function is implemented using an internal 64-bit accumulator.
|
||||
* The input is represented in 1.31 format, and intermediate multiplication
|
||||
* yields a 2.62 format.
|
||||
* The accumulator maintains full precision of the intermediate multiplication results,
|
||||
* but provides only a single guard bit.
|
||||
* There is no saturation on intermediate additions.
|
||||
* If the accumulator overflows it wraps around and distorts the result.
|
||||
* In order to avoid overflows completely the input signal must be scaled down by
|
||||
* log2(blockSize) bits, as a total of blockSize additions are performed internally.
|
||||
* Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_std_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q31_t * pResult)
|
||||
{
|
||||
q63_t sum = 0; /* Accumulator */
|
||||
q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
|
||||
q31_t mean; /* mean */
|
||||
q31_t in; /* input value */
|
||||
q31_t t; /* Temporary variable */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t *pIn; /* Temporary pointer */
|
||||
|
||||
pIn = pSrc;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
sum = (sum >> 31);
|
||||
meanOfSquares = (q31_t) ((sum * t) >> 30);
|
||||
|
||||
/* Reset the accumulator */
|
||||
sum = 0;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Reset the input working pointer */
|
||||
pSrc = pIn;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q63_t sumOfSquares = 0; /* Accumulator */
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sumOfSquares. */
|
||||
in = *pSrc++;
|
||||
sumOfSquares += ((q63_t) (in) * (in));
|
||||
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
|
||||
sumOfSquares = (sumOfSquares >> 31);
|
||||
meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Compute mean of all input values */
|
||||
t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f);
|
||||
mean = (q31_t) (sum);
|
||||
|
||||
/* Compute square of mean */
|
||||
squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
|
||||
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
|
||||
|
||||
|
||||
/* Compute standard deviation and then store the result to the destination */
|
||||
arm_sqrt_q31(meanOfSquares - squareOfMean, pResult);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of STD group
|
||||
*/
|
@ -0,0 +1,219 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_var_f32.c
|
||||
*
|
||||
* Description: Variance of the elements of a floating-point vector.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* ---------------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup variance Variance
|
||||
*
|
||||
* Calculates the variance of the elements in the input vector.
|
||||
* The underlying algorithm is used:
|
||||
*
|
||||
* <pre>
|
||||
* Result = (sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1)
|
||||
*
|
||||
* where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
|
||||
*
|
||||
* sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating point, Q31, and Q15 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup variance
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Variance of the elements of a floating-point vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult variance value returned here
|
||||
* @return none.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_var_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
float32_t * pResult)
|
||||
{
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
float32_t sum = (float32_t) 0.0; /* Accumulator */
|
||||
float32_t meanOfSquares, mean, in, squareOfMean; /* Temporary variables */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
float32_t *pIn; /* Temporary pointer */
|
||||
|
||||
/* updating temporary pointer */
|
||||
pIn = pSrc;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += in * in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
meanOfSquares = sum / ((float32_t) blockSize - 1.0f);
|
||||
|
||||
/* Reset the accumulator */
|
||||
sum = 0.0f;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Reset the input working pointer */
|
||||
pSrc = pIn;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
/* Compute mean of all input values */
|
||||
mean = sum / (float32_t) blockSize;
|
||||
|
||||
/* Compute square of mean */
|
||||
squareOfMean = (mean * mean) * (((float32_t) blockSize) /
|
||||
((float32_t) blockSize - 1.0f));
|
||||
|
||||
/* Compute variance and then store the result to the destination */
|
||||
*pResult = meanOfSquares - squareOfMean;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
float32_t sum = 0.0f; /* Temporary result storage */
|
||||
float32_t sumOfSquares = 0.0f; /* Sum of squares */
|
||||
float32_t squareOfSum; /* Square of Sum */
|
||||
float32_t in; /* input value */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sumOfSquares. */
|
||||
in = *pSrc++;
|
||||
sumOfSquares += in * in;
|
||||
|
||||
/* C = (A[0] + A[1] + ... + A[blockSize-1]) */
|
||||
/* Compute Sum of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
sum += in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute the square of sum */
|
||||
squareOfSum = ((sum * sum) / (float32_t) blockSize);
|
||||
|
||||
/* Compute the variance */
|
||||
*pResult = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of variance group
|
||||
*/
|
@ -0,0 +1,214 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_var_q15.c
|
||||
*
|
||||
* Description: Variance of an array of Q15 type.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup variance
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Variance of the elements of a Q15 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult variance value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
* \par
|
||||
* The function is implemented using a 64-bit internal accumulator.
|
||||
* The input is represented in 1.15 format.
|
||||
* Intermediate multiplication yields a 2.30 format, and this
|
||||
* result is added without saturation to a 64-bit accumulator in 34.30 format.
|
||||
* With 33 guard bits in the accumulator, there is no risk of overflow, and the
|
||||
* full precision of the intermediate multiplication is preserved.
|
||||
* Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
|
||||
* 15 bits, and then saturated to yield a result in 1.15 format.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_var_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q31_t * pResult)
|
||||
{
|
||||
q63_t sum = 0; /* Accumulator */
|
||||
q31_t meanOfSquares, squareOfMean; /* Mean of square and square of mean */
|
||||
q15_t mean; /* mean */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
q15_t t; /* Temporary variable */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t in; /* Input variable */
|
||||
q15_t in1; /* Temporary variable */
|
||||
q15_t *pIn; /* Temporary pointer */
|
||||
|
||||
pIn = pSrc;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *__SIMD32(pSrc)++;
|
||||
sum = __SMLALD(in, in, sum);
|
||||
in = *__SIMD32(pSrc)++;
|
||||
sum = __SMLALD(in, in, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in1 = *pSrc++;
|
||||
sum = __SMLALD(in1, in1, sum);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
|
||||
sum = __SSAT((sum >> 15u), 16u);
|
||||
|
||||
meanOfSquares = (q31_t) ((sum * t) >> 14u);
|
||||
|
||||
/* Reset the accumulator */
|
||||
sum = 0;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Reset the input working pointer */
|
||||
pSrc = pIn;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q63_t sumOfSquares = 0; /* Accumulator */
|
||||
q15_t in; /* Temporary variable */
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sumOfSquares. */
|
||||
in = *pSrc++;
|
||||
sumOfSquares += (in * in);
|
||||
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
|
||||
sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
|
||||
meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
|
||||
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Compute mean of all input values */
|
||||
t = (q15_t) ((1.0f / (float32_t) (blockSize * (blockSize - 1u))) * 32768);
|
||||
mean = __SSAT(sum, 16u);
|
||||
|
||||
/* Compute square of mean */
|
||||
squareOfMean = ((q31_t) mean * mean) >> 15;
|
||||
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
|
||||
|
||||
/* Compute variance and then store the result to the destination */
|
||||
*pResult = (meanOfSquares - squareOfMean);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of variance group
|
||||
*/
|
@ -0,0 +1,216 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010 ARM Limited. All rights reserved.
|
||||
*
|
||||
* $Date: 15. July 2011
|
||||
* $Revision: V1.0.10
|
||||
*
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_var_q31.c
|
||||
*
|
||||
* Description: Variance of an array of Q31 type.
|
||||
*
|
||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
||||
*
|
||||
* Version 1.0.10 2011/7/15
|
||||
* Big Endian support added and Merged M0 and M3/M4 Source code.
|
||||
*
|
||||
* Version 1.0.3 2010/11/29
|
||||
* Re-organized the CMSIS folders and updated documentation.
|
||||
*
|
||||
* Version 1.0.2 2010/11/11
|
||||
* Documentation updated.
|
||||
*
|
||||
* Version 1.0.1 2010/10/05
|
||||
* Production release and review comments incorporated.
|
||||
*
|
||||
* Version 1.0.0 2010/09/20
|
||||
* Production release and review comments incorporated.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup variance
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Variance of the elements of a Q31 vector.
|
||||
* @param[in] *pSrc points to the input vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @param[out] *pResult variance value returned here
|
||||
* @return none.
|
||||
*
|
||||
* @details
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
*
|
||||
*\par
|
||||
* The function is implemented using an internal 64-bit accumulator.
|
||||
* The input is represented in 1.31 format, and intermediate multiplication
|
||||
* yields a 2.62 format.
|
||||
* The accumulator maintains full precision of the intermediate multiplication results,
|
||||
* but provides only a single guard bit.
|
||||
* There is no saturation on intermediate additions.
|
||||
* If the accumulator overflows it wraps around and distorts the result.
|
||||
* In order to avoid overflows completely the input signal must be scaled down by
|
||||
* log2(blockSize) bits, as a total of blockSize additions are performed internally.
|
||||
* Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void arm_var_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t blockSize,
|
||||
q63_t * pResult)
|
||||
{
|
||||
q63_t sum = 0; /* Accumulator */
|
||||
q31_t meanOfSquares, squareOfMean; /* Mean of square and square of mean */
|
||||
q31_t mean; /* Mean */
|
||||
q31_t in; /* Input variable */
|
||||
q31_t t; /* Temporary variable */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifndef ARM_MATH_CM0
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
q31_t *pIn; /* Temporary pointer */
|
||||
|
||||
pIn = pSrc;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sum. */
|
||||
in = *pSrc++;
|
||||
sum += ((q63_t) (in) * (in));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
t = (q31_t) ((1.0 / (blockSize - 1)) * 1073741824LL);
|
||||
sum = (sum >> 31);
|
||||
meanOfSquares = (q31_t) ((sum * t) >> 30);
|
||||
|
||||
/* Reset the accumulator */
|
||||
sum = 0;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2u;
|
||||
|
||||
/* Reset the input working pointer */
|
||||
pSrc = pIn;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4u;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
q63_t sumOfSquares = 0; /* Accumulator */
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while(blkCnt > 0u)
|
||||
{
|
||||
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
|
||||
/* Compute Sum of squares of the input samples
|
||||
* and then store the result in a temporary variable, sumOfSquares. */
|
||||
in = *pSrc++;
|
||||
sumOfSquares += ((q63_t) (in) * (in));
|
||||
|
||||
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
|
||||
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
|
||||
sum += in;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Compute Mean of squares of the input samples
|
||||
* and then store the result in a temporary variable, meanOfSquares. */
|
||||
t = (q31_t) ((1.0 / (blockSize - 1)) * 1073741824LL);
|
||||
sumOfSquares = (sumOfSquares >> 31);
|
||||
meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_CM0 */
|
||||
|
||||
/* Compute mean of all input values */
|
||||
t = (q31_t) ((1.0 / (blockSize * (blockSize - 1u))) * 2147483648LL);
|
||||
mean = (q31_t) (sum);
|
||||
|
||||
/* Compute square of mean */
|
||||
squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
|
||||
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
|
||||
|
||||
/* Compute variance and then store the result to the destination */
|
||||
*pResult = (q63_t) meanOfSquares - squareOfMean;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of variance group
|
||||
*/
|
Reference in New Issue
Block a user