1
0
mirror of https://github.com/esp8266/Arduino.git synced 2025-06-22 08:22:04 +03:00

[sam] Adding CMSIS 2.10

This commit is contained in:
Thibaut VIARD
2011-08-08 16:58:06 +02:00
parent 32f3cf5734
commit 2260c6875d
1336 changed files with 366105 additions and 0 deletions

View File

@ -0,0 +1,127 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_max_f32.c
*
* Description: Maximum value of a floating-point vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @defgroup Max Maximum
*
* Computes the maximum value of an array of data.
* The function returns both the maximum value and its position within the array.
* There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
* @addtogroup Max
* @{
*/
/**
* @brief Maximum value of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult maximum value returned here
* @param[out] *pIndex index of maximum value returned here
* @return none.
*/
void arm_max_f32(
float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Loop over blockSize number of values */
blkCnt = (blockSize - 1u);
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
do
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if(out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
} while(blkCnt > 0u);
#else
/* Run the below code for Cortex-M0 */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if(out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0 */
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Max group
*/

View File

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_max_q15.c
*
* Description: Maximum value of a Q15 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup Max
* @{
*/
/**
* @brief Maximum value of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult maximum value returned here
* @param[out] *pIndex index of maximum value returned here
* @return none.
*/
void arm_max_q15(
q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Loop over blockSize number of values */
blkCnt = (blockSize - 1u);
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
do
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if(out < maxVal)
{
/* Update the maximum value and its index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
blkCnt--;
} while(blkCnt > 0u);
#else
/* Run the below code for Cortex-M0 */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if(out < maxVal)
{
/* Update the maximum value and its index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0 */
/* Store the maximum value and its index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Max group
*/

View File

@ -0,0 +1,121 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_max_q31.c
*
* Description: Maximum value of a Q31 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup Max
* @{
*/
/**
* @brief Maximum value of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult maximum value returned here
* @param[out] *pIndex index of maximum value returned here
* @return none.
*/
void arm_max_q31(
q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Loop over blockSize number of values */
blkCnt = (blockSize - 1u);
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
do
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if(out < maxVal)
{
/* Update the maximum value and its index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
} while(blkCnt > 0u);
#else
/* Run the below code for Cortex-M0 */
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* Compare for the maximum value */
if(out < maxVal)
{
/* Update the maximum value and its index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0 */
/* Store the maximum value and its index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Max group
*/

View File

@ -0,0 +1,206 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_max_q7.c
*
* Description: Maximum value of a Q7 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup Max
* @{
*/
/**
* @brief Maximum value of a Q7 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult maximum value returned here
* @param[out] *pIndex index of maximum value returned here
* @return none.
*/
void arm_max_q7(
q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q7_t res, maxVal, x0, x1, maxVal2, maxVal1; /* Temporary variables to store the output value. */
uint32_t blkCnt, index1, index2, index3, indx, indxMod; /* loop counter */
/* Initialise the index value to zero. */
indx = 0u;
/* Load first input value that act as reference value for comparision */
res = *pSrc++;
/* Loop unrolling */
blkCnt = (blockSize - 1u) >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
indxMod = blockSize - (blkCnt * 4u);
/* Load two input values for comparision */
x0 = *pSrc++;
x1 = *pSrc++;
if(x0 < x1)
{
/* Update the maximum value and its index */
maxVal1 = x1;
index1 = indxMod + 1u;
}
else
{
/* Update the maximum value and its index */
maxVal1 = x0;
index1 = indxMod;
}
/* Load two input values for comparision */
x0 = *pSrc++;
x1 = *pSrc++;
if(x0 < x1)
{
/* Update the maximum value and its index */
maxVal2 = x1;
index2 = indxMod + 3u;
}
else
{
/* Update the maximum value and its index */
maxVal2 = x0;
index2 = indxMod + 2u;
}
if(maxVal1 < maxVal2)
{
/* Update the maximum value and its index */
maxVal = maxVal2;
index3 = index2;
}
else
{
/* Update the maximum value and its index */
maxVal = maxVal1;
index3 = index1;
}
if(res < maxVal)
{
/* Update the maximum value and its index */
res = maxVal;
indx = index3;
}
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize - 1 is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = (blockSize - 1u) % 0x04u;
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if(res < maxVal)
{
/* Update the maximum value and its index */
res = maxVal;
indx = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value and its index into destination pointers */
*pResult = res;
*pIndex = indx;
#else
/* Run the below code for Cortex-M0 */
q7_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Loop over blockSize - 1 number of values */
blkCnt = (blockSize - 1u);
while(blkCnt > 0u)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if(out < maxVal)
{
/* Update the maximum value and its index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value and its index into destination pointers */
*pResult = out;
*pIndex = outIndex;
#endif /* #ifndef ARM_MATH_CM0 */
}
/**
* @} end of Max group
*/

View File

@ -0,0 +1,122 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_mean_f32.c
*
* Description: Mean value of a floating-point vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @defgroup mean Mean
*
* Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
* The underlying algorithm is used:
*
* <pre>
* Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
* </pre>
*
* There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
* @addtogroup mean
* @{
*/
/**
* @brief Mean value of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult mean value returned here
* @return none.
*/
void arm_mean_f32(
float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t sum = 0.0f; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0 */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = sum / (float32_t) blockSize;
}
/**
* @} end of mean group
*/

View File

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_mean_q15.c
*
* Description: Mean value of a Q15 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup mean
* @{
*/
/**
* @brief Mean value of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult mean value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function is implemented using a 32-bit internal accumulator.
* The input is represented in 1.15 format and is accumulated in a 32-bit
* accumulator in 17.15 format.
* There is no risk of internal overflow with this approach, and the
* full precision of intermediate result is preserved.
* Finally, the accumulator is saturated and truncated to yield a result of 1.15 format.
*
*/
void arm_mean_q15(
q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q31_t sum = 0; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0 */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = (q15_t) (sum / blockSize);
}
/**
* @} end of mean group
*/

View File

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_mean_q31.c
*
* Description: Mean value of a Q31 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup mean
* @{
*/
/**
* @brief Mean value of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult mean value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*\par
* The function is implemented using a 64-bit internal accumulator.
* The input is represented in 1.31 format and is accumulated in a 64-bit
* accumulator in 33.31 format.
* There is no risk of internal overflow with this approach, and the
* full precision of intermediate result is preserved.
* Finally, the accumulator is truncated to yield a result of 1.31 format.
*
*/
void arm_mean_q31(
q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q63_t sum = 0; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0 */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = (q31_t) (sum / (int32_t) blockSize);
}
/**
* @} end of mean group
*/

View File

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_mean_q7.c
*
* Description: Mean value of a Q7 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup mean
* @{
*/
/**
* @brief Mean value of a Q7 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult mean value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function is implemented using a 32-bit internal accumulator.
* The input is represented in 1.7 format and is accumulated in a 32-bit
* accumulator in 25.7 format.
* There is no risk of internal overflow with this approach, and the
* full precision of intermediate result is preserved.
* Finally, the accumulator is truncated to yield a result of 1.7 format.
*
*/
void arm_mean_q7(
q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q31_t sum = 0; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0 */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store the result to the destination */
*pResult = (q7_t) (sum / (int32_t) blockSize);
}
/**
* @} end of mean group
*/

View File

@ -0,0 +1,133 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_min_f32.c
*
* Description: Minimum value of a floating-point vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @defgroup Min Minimum
*
* Computes the minimum value of an array of data.
* The function returns both the minimum value and its position within the array.
* There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
* @addtogroup Min
* @{
*/
/**
* @brief Minimum value of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult minimum value returned here
* @param[out] *pIndex index of minimum value returned here
* @return none.
*
*/
void arm_min_f32(
float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop over blockSize number of values */
blkCnt = (blockSize - 1u);
do
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if(out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
blkCnt--;
} while(blkCnt > 0u);
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize - 1 number of values */
blkCnt = (blockSize - 1u);
while(blkCnt > 0u)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if(out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0 */
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Min group
*/

View File

@ -0,0 +1,127 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_min_q15.c
*
* Description: Minimum value of a Q15 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup Min
* @{
*/
/**
* @brief Minimum value of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult minimum value returned here
* @param[out] *pIndex index of minimum value returned here
* @return none.
*
*/
void arm_min_q15(
q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop over blockSize number of values */
blkCnt = (blockSize - 1u);
do
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if(out > minVal)
{
/* Update the minimum value and its index */
out = minVal;
outIndex = blockSize - blkCnt;
}
blkCnt--;
} while(blkCnt > 0u);
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize - 1 number of values */
blkCnt = (blockSize - 1u);
while(blkCnt > 0u)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if(out > minVal)
{
/* Update the minimum value and its index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0 */
/* Store the minimum value and its index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Min group
*/

View File

@ -0,0 +1,125 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_min_q31.c
*
* Description: Minimum value of a Q31 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup Min
* @{
*/
/**
* @brief Minimum value of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult minimum value returned here
* @param[out] *pIndex index of minimum value returned here
* @return none.
*
*/
void arm_min_q31(
q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Loop over blockSize number of values */
blkCnt = (blockSize - 1u);
do
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if(out > minVal)
{
/* Update the minimum value and its index */
out = minVal;
outIndex = blockSize - blkCnt;
}
blkCnt--;
} while(blkCnt > 0u);
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize -1 number of values */
blkCnt = (blockSize - 1u);
while(blkCnt > 0u)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if(out > minVal)
{
/* Update the minimum value and its index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0 */
/* Store the minimum value and its index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
* @} end of Min group
*/

View File

@ -0,0 +1,204 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_min_q7.c
*
* Description: Minimum value of a Q7 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup Min
* @{
*/
/**
* @brief Minimum value of a Q7 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult minimum value returned here
* @param[out] *pIndex index of minimum value returned here
* @return none.
*
*/
void arm_min_q7(
q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q7_t minVal, minVal1, minVal2, res, x0, x1; /* Temporary variables to store the output value. */
uint32_t blkCnt, indx, index1, index2, index3, indxMod; /* loop counter */
/* Initialise the index value to zero. */
indx = 0u;
/* Load first input value that act as reference value for comparision */
res = *pSrc++;
/* Loop over blockSize number of values */
blkCnt = (blockSize - 1u) >> 2u;
while(blkCnt > 0u)
{
indxMod = blockSize - (blkCnt * 4u);
/* Load two input values for comparision */
x0 = *pSrc++;
x1 = *pSrc++;
if(x0 > x1)
{
/* Update the minimum value and its index */
minVal1 = x1;
index1 = indxMod + 1u;
}
else
{
/* Update the minimum value and its index */
minVal1 = x0;
index1 = indxMod;
}
/* Load two input values for comparision */
x0 = *pSrc++;
x1 = *pSrc++;
if(x0 > x1)
{
/* Update the minimum value and its index */
minVal2 = x1;
index2 = indxMod + 3u;
}
else
{
/* Update the minimum value and its index */
minVal2 = x0;
index2 = indxMod + 2u;
}
if(minVal1 > minVal2)
{
/* Update the minimum value and its index */
minVal = minVal2;
index3 = index2;
}
else
{
/* Update the minimum value and its index */
minVal = minVal1;
index3 = index1;
}
if(res > minVal)
{
/* Update the minimum value and its index */
res = minVal;
indx = index3;
}
/* Decrement the loop counter */
blkCnt--;
}
blkCnt = (blockSize - 1u) % 0x04u;
while(blkCnt > 0u)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if(res > minVal)
{
/* Update the minimum value and its index */
res = minVal;
indx = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the minimum value and its index into destination pointers */
*pResult = res;
*pIndex = indx;
#else
/* Run the below code for Cortex-M0 */
q7_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
/* Initialise the index value to zero. */
outIndex = 0u;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Loop over blockSize - 1 number of values */
blkCnt = (blockSize - 1u);
while(blkCnt > 0u)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = *pSrc++;
/* compare for the minimum value */
if(out > minVal)
{
/* Update the minimum value and its index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the minimum value and its index into destination pointers */
*pResult = out;
*pIndex = outIndex;
#endif /* #ifndef ARM_MATH_CM0 */
}
/**
* @} end of Min group
*/

View File

@ -0,0 +1,135 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_power_f32.c
*
* Description: Sum of the squares of the elements of a floating-point vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
*
* Version 0.0.7 2010/06/10
* Misra-C changes done
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @defgroup power Power
*
* Calculates the sum of the squares of the elements in the input vector.
* The underlying algorithm is used:
*
* <pre>
* Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];
* </pre>
*
* There are separate functions for floating point, Q31, Q15, and Q7 data types.
*/
/**
* @addtogroup power
* @{
*/
/**
* @brief Sum of the squares of the elements of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult sum of the squares value returned here
* @return none.
*
*/
void arm_power_f32(
float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t sum = 0.0f; /* accumulator */
float32_t in; /* Temporary variable to store input value */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0 */
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* compute power and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* Store the result to the destination */
*pResult = sum;
}
/**
* @} end of power group
*/

View File

@ -0,0 +1,141 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_power_q15.c
*
* Description: Sum of the squares of the elements of a Q15 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup power
* @{
*/
/**
* @brief Sum of the squares of the elements of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult sum of the squares value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*
* \par
* The function is implemented using a 64-bit internal accumulator.
* The input is represented in 1.15 format.
* Intermediate multiplication yields a 2.30 format, and this
* result is added without saturation to a 64-bit accumulator in 34.30 format.
* With 33 guard bits in the accumulator, there is no risk of overflow, and the
* full precision of the intermediate multiplication is preserved.
* Finally, the return result is in 34.30 format.
*
*/
void arm_power_q15(
q15_t * pSrc,
uint32_t blockSize,
q63_t * pResult)
{
q63_t sum = 0; /* Temporary result storage */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in32; /* Temporary variable to store input value */
q15_t in16; /* Temporary variable to store input value */
uint32_t blkCnt; /* loop counter */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
in32 = *__SIMD32(pSrc)++;
sum = __SMLALD(in32, in32, sum);
in32 = *__SIMD32(pSrc)++;
sum = __SMLALD(in32, in32, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
in16 = *pSrc++;
sum = __SMLALD(in16, in16, sum);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
q15_t in; /* Temporary variable to store input value */
uint32_t blkCnt; /* loop counter */
/* Loop over blockSize number of values */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q31_t) in * in);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0 */
/* Store the results in 34.30 format */
*pResult = sum;
}
/**
* @} end of power group
*/

View File

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_power_q31.c
*
* Description: Sum of the squares of the elements of a Q31 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup power
* @{
*/
/**
* @brief Sum of the squares of the elements of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult sum of the squares value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*
* \par
* The function is implemented using a 64-bit internal accumulator.
* The input is represented in 1.31 format.
* Intermediate multiplication yields a 2.62 format, and this
* result is truncated to 2.48 format by discarding the lower 14 bits.
* The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
* With 15 guard bits in the accumulator, there is no risk of overflow, and the
* full precision of the intermediate multiplication is preserved.
* Finally, the return result is in 16.48 format.
*
*/
void arm_power_q31(
q31_t * pSrc,
uint32_t blockSize,
q63_t * pResult)
{
q63_t sum = 0; /* Temporary result storage */
q31_t in;
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and then store the result in a temporary variable sum, providing 15 guard bits. */
in = *pSrc++;
sum += ((q63_t) in * in) >> 14u;
in = *pSrc++;
sum += ((q63_t) in * in) >> 14u;
in = *pSrc++;
sum += ((q63_t) in * in) >> 14u;
in = *pSrc++;
sum += ((q63_t) in * in) >> 14u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0 */
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q63_t) in * in) >> 14u;
/* Decrement the loop counter */
blkCnt--;
}
/* Store the results in 16.48 format */
*pResult = sum;
}
/**
* @} end of power group
*/

View File

@ -0,0 +1,137 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_power_q7.c
*
* Description: Sum of the squares of the elements of a Q7 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup power
* @{
*/
/**
* @brief Sum of the squares of the elements of a Q7 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult sum of the squares value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*
* \par
* The function is implemented using a 32-bit internal accumulator.
* The input is represented in 1.7 format.
* Intermediate multiplication yields a 2.14 format, and this
* result is added without saturation to an accumulator in 18.14 format.
* With 17 guard bits in the accumulator, there is no risk of overflow, and the
* full precision of the intermediate multiplication is preserved.
* Finally, the return result is in 18.14 format.
*
*/
void arm_power_q7(
q7_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t sum = 0; /* Temporary result storage */
q7_t in; /* Temporary variable to store input */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t input1; /* Temporary variable to store packed input */
q15_t in1, in2; /* Temporary variables to store input */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* Reading two inputs of pSrc vector and packing */
in1 = (q15_t) * pSrc++;
in2 = (q15_t) * pSrc++;
input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
sum = __SMLAD(input1, input1, sum);
/* Reading two inputs of pSrc vector and packing */
in1 = (q15_t) * pSrc++;
in2 = (q15_t) * pSrc++;
input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
sum = __SMLAD(input1, input1, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0 */
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q15_t) in * in);
/* Decrement the loop counter */
blkCnt--;
}
/* Store the result in 18.14 format */
*pResult = sum;
}
/**
* @} end of power group
*/

View File

@ -0,0 +1,130 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_rms_f32.c
*
* Description: Root mean square value of an array of F32 type
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @defgroup RMS Root mean square (RMS)
*
*
* Calculates the Root Mean Sqaure of the elements in the input vector.
* The underlying algorithm is used:
*
* <pre>
* Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
* </pre>
*
* There are separate functions for floating point, Q31, and Q15 data types.
*/
/**
* @addtogroup RMS
* @{
*/
/**
* @brief Root Mean Square of the elements of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult rms value returned here
* @return none.
*
*/
void arm_rms_f32(
float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t sum = 0.0f; /* Accumulator */
float32_t in; /* Tempoprary variable to store input value */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute sum of the squares and then store the result in a temporary variable, sum */
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0 */
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute sum of the squares and then store the results in a temporary variable, sum */
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Rms and store the result in the destination */
arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
}
/**
* @} end of RMS group
*/

View File

@ -0,0 +1,150 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_rms_q15.c
*
* Description: Root Mean Square of the elements of a Q15 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @addtogroup RMS
* @{
*/
/**
* @brief Root Mean Square of the elements of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult rms value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*
* \par
* The function is implemented using a 64-bit internal accumulator.
* The input is represented in 1.15 format.
* Intermediate multiplication yields a 2.30 format, and this
* result is added without saturation to a 64-bit accumulator in 34.30 format.
* With 33 guard bits in the accumulator, there is no risk of overflow, and the
* full precision of the intermediate multiplication is preserved.
* Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
* 15 bits, and then saturated to yield a result in 1.15 format.
*
*/
void arm_rms_q15(
q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q63_t sum = 0; /* accumulator */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in; /* temporary variable to store the input value */
q15_t in1; /* temporary variable to store the input value */
uint32_t blkCnt; /* loop counter */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute sum of the squares and then store the results in a temporary variable, sum */
in = *__SIMD32(pSrc)++;
sum = __SMLALD(in, in, sum);
in = *__SIMD32(pSrc)++;
sum = __SMLALD(in, in, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute sum of the squares and then store the results in a temporary variable, sum */
in1 = *pSrc++;
sum = __SMLALD(in1, in1, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* Truncating and saturating the accumulator to 1.15 format */
sum = __SSAT((q31_t) (sum >> 15), 16);
in1 = (q15_t) (sum / blockSize);
/* Store the result in the destination */
arm_sqrt_q15(in1, pResult);
#else
/* Run the below code for Cortex-M0 */
q15_t in; /* temporary variable to store the input value */
uint32_t blkCnt; /* loop counter */
/* Loop over blockSize number of values */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute sum of the squares and then store the results in a temporary variable, sum */
in = *pSrc++;
sum += ((q31_t) in * in);
/* Decrement the loop counter */
blkCnt--;
}
/* Truncating and saturating the accumulator to 1.15 format */
sum = __SSAT((q31_t) (sum >> 15), 16);
in = (q15_t) (sum / blockSize);
/* Store the result in the destination */
arm_sqrt_q15(in, pResult);
#endif /* #ifndef ARM_MATH_CM0 */
}
/**
* @} end of RMS group
*/

View File

@ -0,0 +1,143 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_rms_q31.c
*
* Description: Root Mean Square of the elements of a Q31 vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @addtogroup RMS
* @{
*/
/**
* @brief Root Mean Square of the elements of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult rms value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*
*\par
* The function is implemented using an internal 64-bit accumulator.
* The input is represented in 1.31 format, and intermediate multiplication
* yields a 2.62 format.
* The accumulator maintains full precision of the intermediate multiplication results,
* but provides only a single guard bit.
* There is no saturation on intermediate additions.
* If the accumulator overflows, it wraps around and distorts the result.
* In order to avoid overflows completely, the input signal must be scaled down by
* log2(blockSize) bits, as a total of blockSize additions are performed internally.
* Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
*
*/
void arm_rms_q31(
q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q63_t sum = 0; /* accumulator */
q31_t in; /* Temporary variable to store the input */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t *pIn1 = pSrc; /* SrcA pointer */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute sum of the squares and then store the result in a temporary variable, sum */
in = *pIn1++;
sum += (q63_t) in *in;
in = *pIn1++;
sum += (q63_t) in *in;
in = *pIn1++;
sum += (q63_t) in *in;
in = *pIn1++;
sum += (q63_t) in *in;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute sum of the squares and then store the results in a temporary variable, sum */
in = *pIn1++;
sum += (q63_t) in *in;
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Loop over blockSize number of values */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute sum of the squares and then store the results in a temporary variable, sum */
in = *pSrc++;
sum += (q63_t) in *in;
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0 */
/* Convert data in 2.62 to 1.31 by 31 right shifts */
sum = sum >> 31;
/* Compute Rms and store the result in the destination vector */
arm_sqrt_q31((q31_t) (sum / (int32_t) blockSize), pResult);
}
/**
* @} end of RMS group
*/

View File

@ -0,0 +1,222 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_std_f32.c
*
* Description: Standard deviation of the elements of a floating-point vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @defgroup STD Standard deviation
*
* Calculates the standard deviation of the elements in the input vector.
* The underlying algorithm is used:
*
* <pre>
* Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
*
* where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
*
* sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
* </pre>
*
* There are separate functions for floating point, Q31, and Q15 data types.
*/
/**
* @addtogroup STD
* @{
*/
/**
* @brief Standard deviation of the elements of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult standard deviation value returned here
* @return none.
*
*/
void arm_std_f32(
float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t sum = 0.0f; /* Temporary result storage */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t meanOfSquares, mean, in, squareOfMean;
uint32_t blkCnt; /* loop counter */
float32_t *pIn; /* Temporary pointer */
pIn = pSrc;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
meanOfSquares = sum / ((float32_t) blockSize - 1.0f);
/* Reset the accumulator */
sum = 0.0f;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Reset the input working pointer */
pSrc = pIn;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute mean of all input values */
mean = sum / (float32_t) blockSize;
/* Compute square of mean */
squareOfMean = (mean * mean) * (((float32_t) blockSize) /
((float32_t) blockSize - 1.0f));
/* Compute standard deviation and then store the result to the destination */
arm_sqrt_f32((meanOfSquares - squareOfMean), pResult);
#else
/* Run the below code for Cortex-M0 */
float32_t sumOfSquares = 0.0f; /* Sum of squares */
float32_t squareOfSum; /* Square of Sum */
float32_t in; /* input value */
float32_t var; /* Temporary varaince storage */
uint32_t blkCnt; /* loop counter */
/* Loop over blockSize number of values */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sumOfSquares. */
in = *pSrc++;
sumOfSquares += in * in;
/* C = (A[0] + A[1] + ... + A[blockSize-1]) */
/* Compute Sum of the input samples
* and then store the result in a temporary variable, sum. */
sum += in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute the square of sum */
squareOfSum = ((sum * sum) / (float32_t) blockSize);
/* Compute the variance */
var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
/* Compute standard deviation and then store the result to the destination */
arm_sqrt_f32(var, pResult);
#endif /* #ifndef ARM_MATH_CM0 */
}
/**
* @} end of STD group
*/

View File

@ -0,0 +1,229 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_std_q15.c
*
* Description: Standard deviation of an array of Q15 type.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup STD
* @{
*/
/**
* @brief Standard deviation of the elements of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult standard deviation value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*
* \par
* The function is implemented using a 64-bit internal accumulator.
* The input is represented in 1.15 format.
* Intermediate multiplication yields a 2.30 format, and this
* result is added without saturation to a 64-bit accumulator in 34.30 format.
* With 33 guard bits in the accumulator, there is no risk of overflow, and the
* full precision of the intermediate multiplication is preserved.
* Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
* 15 bits, and then saturated to yield a result in 1.15 format.
*/
void arm_std_q15(
q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q63_t sum = 0; /* Accumulator */
q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
q15_t mean; /* mean */
uint32_t blkCnt; /* loop counter */
q15_t t; /* Temporary variable */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q15_t *pIn; /* Temporary pointer */
q31_t in; /* input value */
q15_t in1; /* input value */
pIn = pSrc;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *__SIMD32(pSrc)++;
sum = __SMLALD(in, in, sum);
in = *__SIMD32(pSrc)++;
sum = __SMLALD(in, in, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in1 = *pSrc++;
sum = __SMLALD(in1, in1, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
sum = __SSAT((sum >> 15u), 16u);
meanOfSquares = (q31_t) ((sum * t) >> 14u);
/* Reset the accumulator */
sum = 0;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Reset the input working pointer */
pSrc = pIn;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute mean of all input values */
t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
mean = (q15_t) __SSAT(sum, 16u);
/* Compute square of mean */
squareOfMean = ((q31_t) mean * mean) >> 15;
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
/* mean of the squares minus the square of the mean. */
in1 = (q15_t) (meanOfSquares - squareOfMean);
/* Compute standard deviation and store the result to the destination */
arm_sqrt_q15(in1, pResult);
#else
/* Run the below code for Cortex-M0 */
q63_t sumOfSquares = 0; /* Accumulator */
q15_t in; /* input value */
/* Loop over blockSize number of values */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sumOfSquares. */
in = *pSrc++;
sumOfSquares += (in * in);
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
/* Compute mean of all input values */
mean = (q15_t) __SSAT(sum, 16u);
/* Compute square of mean of the input samples
* and then store the result in a temporary variable, squareOfMean.*/
t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
squareOfMean = ((q31_t) mean * mean) >> 15;
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
/* mean of the squares minus the square of the mean. */
in = (q15_t) (meanOfSquares - squareOfMean);
/* Compute standard deviation and store the result to the destination */
arm_sqrt_q15(in, pResult);
#endif /* #ifndef ARM_MATH_CM0 */
}
/**
* @} end of STD group
*/

View File

@ -0,0 +1,219 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_std_q31.c
*
* Description: Standard deviation of an array of Q31 type.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup STD
* @{
*/
/**
* @brief Standard deviation of the elements of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult standard deviation value returned here
* @return none.
* @details
* <b>Scaling and Overflow Behavior:</b>
*
*\par
* The function is implemented using an internal 64-bit accumulator.
* The input is represented in 1.31 format, and intermediate multiplication
* yields a 2.62 format.
* The accumulator maintains full precision of the intermediate multiplication results,
* but provides only a single guard bit.
* There is no saturation on intermediate additions.
* If the accumulator overflows it wraps around and distorts the result.
* In order to avoid overflows completely the input signal must be scaled down by
* log2(blockSize) bits, as a total of blockSize additions are performed internally.
* Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
*
*/
void arm_std_q31(
q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q63_t sum = 0; /* Accumulator */
q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
q31_t mean; /* mean */
q31_t in; /* input value */
q31_t t; /* Temporary variable */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t *pIn; /* Temporary pointer */
pIn = pSrc;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q63_t) (in) * (in));
in = *pSrc++;
sum += ((q63_t) (in) * (in));
in = *pSrc++;
sum += ((q63_t) (in) * (in));
in = *pSrc++;
sum += ((q63_t) (in) * (in));
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q63_t) (in) * (in));
/* Decrement the loop counter */
blkCnt--;
}
t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
sum = (sum >> 31);
meanOfSquares = (q31_t) ((sum * t) >> 30);
/* Reset the accumulator */
sum = 0;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Reset the input working pointer */
pSrc = pIn;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
q63_t sumOfSquares = 0; /* Accumulator */
/* Loop over blockSize number of values */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sumOfSquares. */
in = *pSrc++;
sumOfSquares += ((q63_t) (in) * (in));
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
sumOfSquares = (sumOfSquares >> 31);
meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
#endif /* #ifndef ARM_MATH_CM0 */
/* Compute mean of all input values */
t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f);
mean = (q31_t) (sum);
/* Compute square of mean */
squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
/* Compute standard deviation and then store the result to the destination */
arm_sqrt_q31(meanOfSquares - squareOfMean, pResult);
}
/**
* @} end of STD group
*/

View File

@ -0,0 +1,219 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_var_f32.c
*
* Description: Variance of the elements of a floating-point vector.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @defgroup variance Variance
*
* Calculates the variance of the elements in the input vector.
* The underlying algorithm is used:
*
* <pre>
* Result = (sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1)
*
* where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
*
* sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
* </pre>
*
* There are separate functions for floating point, Q31, and Q15 data types.
*/
/**
* @addtogroup variance
* @{
*/
/**
* @brief Variance of the elements of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult variance value returned here
* @return none.
*
*/
void arm_var_f32(
float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t sum = (float32_t) 0.0; /* Accumulator */
float32_t meanOfSquares, mean, in, squareOfMean; /* Temporary variables */
uint32_t blkCnt; /* loop counter */
float32_t *pIn; /* Temporary pointer */
/* updating temporary pointer */
pIn = pSrc;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += in * in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
meanOfSquares = sum / ((float32_t) blockSize - 1.0f);
/* Reset the accumulator */
sum = 0.0f;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Reset the input working pointer */
pSrc = pIn;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute mean of all input values */
mean = sum / (float32_t) blockSize;
/* Compute square of mean */
squareOfMean = (mean * mean) * (((float32_t) blockSize) /
((float32_t) blockSize - 1.0f));
/* Compute variance and then store the result to the destination */
*pResult = meanOfSquares - squareOfMean;
#else
/* Run the below code for Cortex-M0 */
float32_t sum = 0.0f; /* Temporary result storage */
float32_t sumOfSquares = 0.0f; /* Sum of squares */
float32_t squareOfSum; /* Square of Sum */
float32_t in; /* input value */
uint32_t blkCnt; /* loop counter */
/* Loop over blockSize number of values */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sumOfSquares. */
in = *pSrc++;
sumOfSquares += in * in;
/* C = (A[0] + A[1] + ... + A[blockSize-1]) */
/* Compute Sum of the input samples
* and then store the result in a temporary variable, sum. */
sum += in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute the square of sum */
squareOfSum = ((sum * sum) / (float32_t) blockSize);
/* Compute the variance */
*pResult = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
#endif /* #ifndef ARM_MATH_CM0 */
}
/**
* @} end of variance group
*/

View File

@ -0,0 +1,214 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_var_q15.c
*
* Description: Variance of an array of Q15 type.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup variance
* @{
*/
/**
* @brief Variance of the elements of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult variance value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*
* \par
* The function is implemented using a 64-bit internal accumulator.
* The input is represented in 1.15 format.
* Intermediate multiplication yields a 2.30 format, and this
* result is added without saturation to a 64-bit accumulator in 34.30 format.
* With 33 guard bits in the accumulator, there is no risk of overflow, and the
* full precision of the intermediate multiplication is preserved.
* Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
* 15 bits, and then saturated to yield a result in 1.15 format.
*
*/
void arm_var_q15(
q15_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q63_t sum = 0; /* Accumulator */
q31_t meanOfSquares, squareOfMean; /* Mean of square and square of mean */
q15_t mean; /* mean */
uint32_t blkCnt; /* loop counter */
q15_t t; /* Temporary variable */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in; /* Input variable */
q15_t in1; /* Temporary variable */
q15_t *pIn; /* Temporary pointer */
pIn = pSrc;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *__SIMD32(pSrc)++;
sum = __SMLALD(in, in, sum);
in = *__SIMD32(pSrc)++;
sum = __SMLALD(in, in, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in1 = *pSrc++;
sum = __SMLALD(in1, in1, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
sum = __SSAT((sum >> 15u), 16u);
meanOfSquares = (q31_t) ((sum * t) >> 14u);
/* Reset the accumulator */
sum = 0;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Reset the input working pointer */
pSrc = pIn;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
q63_t sumOfSquares = 0; /* Accumulator */
q15_t in; /* Temporary variable */
/* Loop over blockSize number of values */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sumOfSquares. */
in = *pSrc++;
sumOfSquares += (in * in);
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
#endif /* #ifndef ARM_MATH_CM0 */
/* Compute mean of all input values */
t = (q15_t) ((1.0f / (float32_t) (blockSize * (blockSize - 1u))) * 32768);
mean = __SSAT(sum, 16u);
/* Compute square of mean */
squareOfMean = ((q31_t) mean * mean) >> 15;
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
/* Compute variance and then store the result to the destination */
*pResult = (meanOfSquares - squareOfMean);
}
/**
* @} end of variance group
*/

View File

@ -0,0 +1,216 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010 ARM Limited. All rights reserved.
*
* $Date: 15. July 2011
* $Revision: V1.0.10
*
* Project: CMSIS DSP Library
* Title: arm_var_q31.c
*
* Description: Variance of an array of Q31 type.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Version 1.0.10 2011/7/15
* Big Endian support added and Merged M0 and M3/M4 Source code.
*
* Version 1.0.3 2010/11/29
* Re-organized the CMSIS folders and updated documentation.
*
* Version 1.0.2 2010/11/11
* Documentation updated.
*
* Version 1.0.1 2010/10/05
* Production release and review comments incorporated.
*
* Version 1.0.0 2010/09/20
* Production release and review comments incorporated.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupStats
*/
/**
* @addtogroup variance
* @{
*/
/**
* @brief Variance of the elements of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] blockSize length of the input vector
* @param[out] *pResult variance value returned here
* @return none.
*
* @details
* <b>Scaling and Overflow Behavior:</b>
*
*\par
* The function is implemented using an internal 64-bit accumulator.
* The input is represented in 1.31 format, and intermediate multiplication
* yields a 2.62 format.
* The accumulator maintains full precision of the intermediate multiplication results,
* but provides only a single guard bit.
* There is no saturation on intermediate additions.
* If the accumulator overflows it wraps around and distorts the result.
* In order to avoid overflows completely the input signal must be scaled down by
* log2(blockSize) bits, as a total of blockSize additions are performed internally.
* Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
*
*/
void arm_var_q31(
q31_t * pSrc,
uint32_t blockSize,
q63_t * pResult)
{
q63_t sum = 0; /* Accumulator */
q31_t meanOfSquares, squareOfMean; /* Mean of square and square of mean */
q31_t mean; /* Mean */
q31_t in; /* Input variable */
q31_t t; /* Temporary variable */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t *pIn; /* Temporary pointer */
pIn = pSrc;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q63_t) (in) * (in));
in = *pSrc++;
sum += ((q63_t) (in) * (in));
in = *pSrc++;
sum += ((q63_t) (in) * (in));
in = *pSrc++;
sum += ((q63_t) (in) * (in));
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sum. */
in = *pSrc++;
sum += ((q63_t) (in) * (in));
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
t = (q31_t) ((1.0 / (blockSize - 1)) * 1073741824LL);
sum = (sum >> 31);
meanOfSquares = (q31_t) ((sum * t) >> 30);
/* Reset the accumulator */
sum = 0;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Reset the input working pointer */
pSrc = pIn;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
q63_t sumOfSquares = 0; /* Accumulator */
/* Loop over blockSize number of values */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
/* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sumOfSquares. */
in = *pSrc++;
sumOfSquares += ((q63_t) (in) * (in));
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
/* Compute sum of all input values and then store the result in a temporary variable, sum. */
sum += in;
/* Decrement the loop counter */
blkCnt--;
}
/* Compute Mean of squares of the input samples
* and then store the result in a temporary variable, meanOfSquares. */
t = (q31_t) ((1.0 / (blockSize - 1)) * 1073741824LL);
sumOfSquares = (sumOfSquares >> 31);
meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
#endif /* #ifndef ARM_MATH_CM0 */
/* Compute mean of all input values */
t = (q31_t) ((1.0 / (blockSize * (blockSize - 1u))) * 2147483648LL);
mean = (q31_t) (sum);
/* Compute square of mean */
squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
/* Compute variance and then store the result to the destination */
*pResult = (q63_t) meanOfSquares - squareOfMean;
}
/**
* @} end of variance group
*/