[sam] Adding CMSIS 2.10

2025-10-18 09:50:40 +03:00 · 2011-08-08 16:58:06 +02:00
parent 32f3cf5734
commit 2260c6875d
1336 changed files with 366105 additions and 0 deletions
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c
@@ -0,0 +1,127 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_max_f32.c   
+*   
+* Description:	Maximum value of a floating-point vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @defgroup Max Maximum   
+ *   
+ * Computes the maximum value of an array of data.    
+ * The function returns both the maximum value and its position within the array.    
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.   
+ */
+
+/**   
+ * @addtogroup Max   
+ * @{   
+ */
+
+
+/**   
+ * @brief Maximum value of a floating-point vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult maximum value returned here   
+ * @param[out]      *pIndex index of maximum value returned here   
+ * @return none.   
+ */
+
+void arm_max_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult,
+  uint32_t * pIndex)
+{
+  float32_t maxVal, out;                         /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop over blockSize number of values */
+  blkCnt = (blockSize - 1u);
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  do
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  } while(blkCnt > 0u);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**   
+ * @} end of Max group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c
@@ -0,0 +1,119 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_max_q15.c   
+*   
+* Description:	Maximum value of a Q15 vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup Max   
+ * @{   
+ */
+
+
+/**   
+ * @brief Maximum value of a Q15 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult maximum value returned here   
+ * @param[out]      *pIndex index of maximum value returned here   
+ * @return none.   
+ */
+
+void arm_max_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult,
+  uint32_t * pIndex)
+{
+  q15_t maxVal, out;                             /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop over blockSize number of values */
+  blkCnt = (blockSize - 1u);
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  do
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    blkCnt--;
+
+  } while(blkCnt > 0u);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Store the maximum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**   
+ * @} end of Max group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c
@@ -0,0 +1,121 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_max_q31.c   
+*   
+* Description:	Maximum value of a Q31 vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup Max   
+ * @{   
+ */
+
+
+/**   
+ * @brief Maximum value of a Q31 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult maximum value returned here   
+ * @param[out]      *pIndex index of maximum value returned here   
+ * @return none.   
+ */
+
+void arm_max_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult,
+  uint32_t * pIndex)
+{
+  q31_t maxVal, out;                             /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop over blockSize number of values */
+  blkCnt = (blockSize - 1u);
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  do
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  } while(blkCnt > 0u);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* Compare for the maximum value */
+    if(out < maxVal)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Store the maximum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**   
+ * @} end of Max group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c
@@ -0,0 +1,206 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_max_q7.c   
+*   
+* Description:	Maximum value of a Q7 vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup Max   
+ * @{   
+ */
+
+
+/**   
+ * @brief Maximum value of a Q7 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult maximum value returned here   
+ * @param[out]      *pIndex index of maximum value returned here   
+  * @return none.   
+ */
+
+void arm_max_q7(
+  q7_t * pSrc,
+  uint32_t blockSize,
+  q7_t * pResult,
+  uint32_t * pIndex)
+{
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q7_t res, maxVal, x0, x1, maxVal2, maxVal1;    /* Temporary variables to store the output value. */
+  uint32_t blkCnt, index1, index2, index3, indx, indxMod;       /* loop counter */
+
+  /* Initialise the index value to zero. */
+  indx = 0u;
+
+  /* Load first input value that act as reference value for comparision */
+  res = *pSrc++;
+
+  /* Loop unrolling */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    indxMod = blockSize - (blkCnt * 4u);
+
+    /* Load two input values for comparision */
+    x0 = *pSrc++;
+    x1 = *pSrc++;
+
+    if(x0 < x1)
+    {
+      /* Update the maximum value and its index */
+      maxVal1 = x1;
+      index1 = indxMod + 1u;
+    }
+    else
+    {
+      /* Update the maximum value and its index */
+      maxVal1 = x0;
+      index1 = indxMod;
+    }
+
+    /* Load two input values for comparision */
+    x0 = *pSrc++;
+    x1 = *pSrc++;
+
+    if(x0 < x1)
+    {
+      /* Update the maximum value and its index */
+      maxVal2 = x1;
+      index2 = indxMod + 3u;
+    }
+    else
+    {
+      /* Update the maximum value and its index */
+      maxVal2 = x0;
+      index2 = indxMod + 2u;
+    }
+
+    if(maxVal1 < maxVal2)
+    {
+      /* Update the maximum value and its index */
+      maxVal = maxVal2;
+      index3 = index2;
+    }
+    else
+    {
+      /* Update the maximum value and its index */
+      maxVal = maxVal1;
+      index3 = index1;
+    }
+
+    if(res < maxVal)
+    {
+      /* Update the maximum value and its index */
+      res = maxVal;
+      indx = index3;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+  /* If the blockSize - 1 is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */
+  blkCnt = (blockSize - 1u) % 0x04u;
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if(res < maxVal)
+    {
+      /* Update the maximum value and its index */
+      res = maxVal;
+      indx = blockSize - blkCnt;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and its index into destination pointers */
+  *pResult = res;
+  *pIndex = indx;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q7_t maxVal, out;                              /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop over blockSize - 1 number of values */
+  blkCnt = (blockSize - 1u);
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+  /* Store the maximum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**   
+ * @} end of Max group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c
@@ -0,0 +1,122 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_mean_f32.c   
+*   
+* Description:	Mean value of a floating-point vector.  
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @defgroup mean Mean   
+ *   
+ * Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.   
+ * The underlying algorithm is used:   
+ *   
+ * <pre>   
+ * 	Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;   
+ * </pre>   
+ *   
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.   
+ */
+
+/**   
+ * @addtogroup mean   
+ * @{   
+ */
+
+
+/**   
+ * @brief Mean value of a floating-point vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult mean value returned here   
+ * @return none.   
+ */
+
+
+void arm_mean_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store the result to the destination */
+  *pResult = sum / (float32_t) blockSize;
+}
+
+/**   
+ * @} end of mean group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c
@@ -0,0 +1,119 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_mean_q15.c   
+*   
+* Description:	Mean value of a Q15 vector.  
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup mean   
+ * @{   
+ */
+
+/**   
+ * @brief Mean value of a Q15 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult mean value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ * \par   
+ * The function is implemented using a 32-bit internal accumulator.   
+ * The input is represented in 1.15 format and is accumulated in a 32-bit    
+ * accumulator in 17.15 format.    
+ * There is no risk of internal overflow with this approach, and the    
+ * full precision of intermediate result is preserved.    
+ * Finally, the accumulator is saturated and truncated to yield a result of 1.15 format.   
+ *   
+ */
+
+
+void arm_mean_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult)
+{
+  q31_t sum = 0;                                 /* Temporary result storage */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store the result to the destination */
+  *pResult = (q15_t) (sum / blockSize);
+}
+
+/**   
+ * @} end of mean group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c
@@ -0,0 +1,119 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_mean_q31.c   
+*   
+* Description:	Mean value of a Q31 vector.  
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup mean   
+ * @{   
+ */
+
+/**   
+ * @brief Mean value of a Q31 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult mean value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *\par   
+ * The function is implemented using a 64-bit internal accumulator.   
+ * The input is represented in 1.31 format and is accumulated in a 64-bit   
+ * accumulator in 33.31 format.   
+ * There is no risk of internal overflow with this approach, and the    
+ * full precision of intermediate result is preserved.    
+ * Finally, the accumulator is truncated to yield a result of 1.31 format.   
+ *   
+ */
+
+
+void arm_mean_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q63_t sum = 0;                                 /* Temporary result storage */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store the result to the destination */
+  *pResult = (q31_t) (sum / (int32_t) blockSize);
+}
+
+/**   
+ * @} end of mean group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c
@@ -0,0 +1,119 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_mean_q7.c   
+*   
+* Description:	Mean value of a Q7 vector.  
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup mean   
+ * @{   
+ */
+
+/**   
+ * @brief Mean value of a Q7 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult mean value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ * \par   
+ * The function is implemented using a 32-bit internal accumulator.    
+ * The input is represented in 1.7 format and is accumulated in a 32-bit   
+ * accumulator in 25.7 format.   
+ * There is no risk of internal overflow with this approach, and the    
+ * full precision of intermediate result is preserved.    
+ * Finally, the accumulator is truncated to yield a result of 1.7 format.   
+ *   
+ */
+
+
+void arm_mean_q7(
+  q7_t * pSrc,
+  uint32_t blockSize,
+  q7_t * pResult)
+{
+  q31_t sum = 0;                                 /* Temporary result storage */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store the result to the destination */
+  *pResult = (q7_t) (sum / (int32_t) blockSize);
+}
+
+/**   
+ * @} end of mean group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c
@@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_min_f32.c   
+*   
+* Description:	Minimum value of a floating-point vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @defgroup Min Minimum   
+ *   
+ * Computes the minimum value of an array of data.    
+ * The function returns both the minimum value and its position within the array.    
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.   
+ */
+
+/**   
+ * @addtogroup Min   
+ * @{   
+ */
+
+
+/**   
+ * @brief Minimum value of a floating-point vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult minimum value returned here   
+ * @param[out]      *pIndex index of minimum value returned here   
+  * @return none.   
+ *   
+ */
+
+void arm_min_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult,
+  uint32_t * pIndex)
+{
+  float32_t minVal, out;                         /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+
+  /* Loop over blockSize number of values */
+  blkCnt = (blockSize - 1u);
+
+  do
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    blkCnt--;
+
+  } while(blkCnt > 0u);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize - 1 number of values */
+  blkCnt = (blockSize - 1u);
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**   
+ * @} end of Min group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c
@@ -0,0 +1,127 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_min_q15.c   
+*   
+* Description:	Minimum value of a Q15 vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+
+/**   
+ * @addtogroup Min   
+ * @{   
+ */
+
+
+/**   
+ * @brief Minimum value of a Q15 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult minimum value returned here   
+ * @param[out]      *pIndex index of minimum value returned here   
+ * @return none.   
+ *   
+ */
+
+void arm_min_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult,
+  uint32_t * pIndex)
+{
+  q15_t minVal, out;                             /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+
+  /* Loop over blockSize number of values */
+  blkCnt = (blockSize - 1u);
+
+  do
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal)
+    {
+      /* Update the minimum value and its index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    blkCnt--;
+
+  } while(blkCnt > 0u);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize - 1 number of values */
+  blkCnt = (blockSize - 1u);
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal)
+    {
+      /* Update the minimum value and its index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+  /* Store the minimum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**   
+ * @} end of Min group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c
@@ -0,0 +1,125 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_min_q31.c   
+*   
+* Description:	Minimum value of a Q31 vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+
+/**   
+ * @addtogroup Min   
+ * @{   
+ */
+
+
+/**   
+ * @brief Minimum value of a Q31 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult minimum value returned here   
+ * @param[out]      *pIndex index of minimum value returned here   
+ * @return none.   
+ *   
+ */
+
+void arm_min_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult,
+  uint32_t * pIndex)
+{
+  q31_t minVal, out;                             /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = (blockSize - 1u);
+
+  do
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal)
+    {
+      /* Update the minimum value and its index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    blkCnt--;
+
+  } while(blkCnt > 0u);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize -1 number of values */
+  blkCnt = (blockSize - 1u);
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal)
+    {
+      /* Update the minimum value and its index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Store the minimum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**   
+ * @} end of Min group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c
@@ -0,0 +1,204 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_min_q7.c   
+*   
+* Description:	Minimum value of a Q7 vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup Min   
+ * @{   
+ */
+
+
+/**   
+ * @brief Minimum value of a Q7 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult minimum value returned here   
+ * @param[out]      *pIndex index of minimum value returned here   
+ * @return none.   
+ *   
+ */
+
+void arm_min_q7(
+  q7_t * pSrc,
+  uint32_t blockSize,
+  q7_t * pResult,
+  uint32_t * pIndex)
+{
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q7_t minVal, minVal1, minVal2, res, x0, x1;    /* Temporary variables to store the output value. */
+  uint32_t blkCnt, indx, index1, index2, index3, indxMod;       /* loop counter */
+
+  /* Initialise the index value to zero. */
+  indx = 0u;
+
+  /* Load first input value that act as reference value for comparision */
+  res = *pSrc++;
+
+  /* Loop over blockSize number of values */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  while(blkCnt > 0u)
+  {
+    indxMod = blockSize - (blkCnt * 4u);
+
+    /* Load two input values for comparision */
+    x0 = *pSrc++;
+    x1 = *pSrc++;
+
+    if(x0 > x1)
+    {
+      /* Update the minimum value and its index */
+      minVal1 = x1;
+      index1 = indxMod + 1u;
+    }
+    else
+    {
+      /* Update the minimum value and its index */
+      minVal1 = x0;
+      index1 = indxMod;
+    }
+
+    /* Load two input values for comparision */
+    x0 = *pSrc++;
+    x1 = *pSrc++;
+
+    if(x0 > x1)
+    {
+      /* Update the minimum value and its index */
+      minVal2 = x1;
+      index2 = indxMod + 3u;
+    }
+    else
+    {
+      /* Update the minimum value and its index */
+      minVal2 = x0;
+      index2 = indxMod + 2u;
+    }
+
+    if(minVal1 > minVal2)
+    {
+      /* Update the minimum value and its index */
+      minVal = minVal2;
+      index3 = index2;
+    }
+    else
+    {
+      /* Update the minimum value and its index */
+      minVal = minVal1;
+      index3 = index1;
+    }
+
+    if(res > minVal)
+    {
+      /* Update the minimum value and its index */
+      res = minVal;
+      indx = index3;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+  blkCnt = (blockSize - 1u) % 0x04u;
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if(res > minVal)
+    {
+      /* Update the minimum value and its index */
+      res = minVal;
+      indx = blockSize - blkCnt;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and its index into destination pointers */
+  *pResult = res;
+  *pIndex = indx;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q7_t minVal, out;                              /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop over blockSize - 1 number of values */
+  blkCnt = (blockSize - 1u);
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal)
+    {
+      /* Update the minimum value and its index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**   
+ * @} end of Min group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c
@@ -0,0 +1,135 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_power_f32.c   
+*   
+* Description:	Sum of the squares of the elements of a floating-point vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+*   
+* Version 0.0.7  2010/06/10    
+*    Misra-C changes done   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @defgroup power Power   
+ *   
+ * Calculates the sum of the squares of the elements in the input vector.   
+ * The underlying algorithm is used:   
+ *   
+ * <pre>   
+ * 	Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];   
+ * </pre>   
+ *  
+ * There are separate functions for floating point, Q31, Q15, and Q7 data types.    
+ */
+
+/**   
+ * @addtogroup power   
+ * @{   
+ */
+
+
+/**   
+ * @brief Sum of the squares of the elements of a floating-point vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult sum of the squares value returned here   
+ * @return none.   
+ *   
+ */
+
+
+void arm_power_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* accumulator */
+  float32_t in;                                  /* Temporary variable to store input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* compute power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Store the result to the destination */
+  *pResult = sum;
+}
+
+/**   
+ * @} end of power group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c
@@ -0,0 +1,141 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_power_q15.c   
+*   
+* Description:	Sum of the squares of the elements of a Q15 vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup power   
+ * @{   
+ */
+
+/**   
+ * @brief Sum of the squares of the elements of a Q15 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult sum of the squares value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *   
+ * \par   
+ * The function is implemented using a 64-bit internal accumulator.    
+ * The input is represented in 1.15 format.  
+ * Intermediate multiplication yields a 2.30 format, and this   
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.   
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the   
+ * full precision of the intermediate multiplication is preserved.   
+ * Finally, the return result is in 34.30 format.    
+ *   
+ */
+
+void arm_power_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q63_t * pResult)
+{
+  q63_t sum = 0;                                 /* Temporary result storage */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t in32;                                    /* Temporary variable to store input value */
+  q15_t in16;                                    /* Temporary variable to store input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+
+  /* loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in32 = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in32, in32, sum);
+    in32 = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in32, in32, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in16 = *pSrc++;
+    sum = __SMLALD(in16, in16, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q15_t in;                                      /* Temporary variable to store input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q31_t) in * in);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Store the results in 34.30 format  */
+  *pResult = sum;
+}
+
+/**   
+ * @} end of power group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c
@@ -0,0 +1,132 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_power_q31.c   
+*   
+* Description:	Sum of the squares of the elements of a Q31 vector.  
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup power   
+ * @{   
+ */
+
+/**   
+ * @brief Sum of the squares of the elements of a Q31 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult sum of the squares value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *   
+ * \par   
+ * The function is implemented using a 64-bit internal accumulator.   
+ * The input is represented in 1.31 format.   
+ * Intermediate multiplication yields a 2.62 format, and this   
+ * result is truncated to 2.48 format by discarding the lower 14 bits.   
+ * The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.   
+ * With 15 guard bits in the accumulator, there is no risk of overflow, and the   
+ * full precision of the intermediate multiplication is preserved.   
+ * Finally, the return result is in 16.48 format.    
+ *   
+ */
+
+void arm_power_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q63_t * pResult)
+{
+  q63_t sum = 0;                                 /* Temporary result storage */
+  q31_t in;
+  uint32_t blkCnt;                               /* loop counter */
+
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and then store the result in a temporary variable sum, providing 15 guard bits. */
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Store the results in 16.48 format  */
+  *pResult = sum;
+}
+
+/**   
+ * @} end of power group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c
@@ -0,0 +1,137 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_power_q7.c   
+*   
+* Description:	Sum of the squares of the elements of a Q7 vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup power   
+ * @{   
+ */
+
+/**   
+ * @brief Sum of the squares of the elements of a Q7 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult sum of the squares value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *   
+ * \par   
+ * The function is implemented using a 32-bit internal accumulator.    
+ * The input is represented in 1.7 format.  
+ * Intermediate multiplication yields a 2.14 format, and this   
+ * result is added without saturation to an accumulator in 18.14 format.   
+ * With 17 guard bits in the accumulator, there is no risk of overflow, and the   
+ * full precision of the intermediate multiplication is preserved.   
+ * Finally, the return result is in 18.14 format.    
+ *   
+ */
+
+void arm_power_q7(
+  q7_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q31_t sum = 0;                                 /* Temporary result storage */
+  q7_t in;                                       /* Temporary variable to store input */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t input1;                                  /* Temporary variable to store packed input */
+  q15_t in1, in2;                                /* Temporary variables to store input */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* Reading two inputs of pSrc vector and packing */
+    in1 = (q15_t) * pSrc++;
+    in2 = (q15_t) * pSrc++;
+    input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
+
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    sum = __SMLAD(input1, input1, sum);
+
+    /* Reading two inputs of pSrc vector and packing */
+    in1 = (q15_t) * pSrc++;
+    in2 = (q15_t) * pSrc++;
+    input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16);
+
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    sum = __SMLAD(input1, input1, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q15_t) in * in);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Store the result in 18.14 format  */
+  *pResult = sum;
+}
+
+/**   
+ * @} end of power group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c
@@ -0,0 +1,130 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_rms_f32.c   
+*   
+* Description:	Root mean square value of an array of F32 type   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @defgroup RMS Root mean square (RMS)   
+ *   
+ *    
+ * Calculates the Root Mean Sqaure of the elements in the input vector.   
+ * The underlying algorithm is used:   
+ *   
+ * <pre>   
+ * 	Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));   
+ * </pre>   
+ *  
+ * There are separate functions for floating point, Q31, and Q15 data types.    
+ */
+
+/**   
+ * @addtogroup RMS   
+ * @{   
+ */
+
+
+/**   
+ * @brief Root Mean Square of the elements of a floating-point vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult rms value returned here   
+ * @return none.   
+ *   
+ */
+
+void arm_rms_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* Accumulator */
+  float32_t in;                                  /* Tempoprary variable to store input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /* loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute sum of the squares and then store the result in a temporary variable, sum  */
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum  */
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Rms and store the result in the destination */
+  arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
+}
+
+/**   
+ * @} end of RMS group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c
@@ -0,0 +1,150 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_rms_q15.c   
+*   
+* Description:	Root Mean Square of the elements of a Q15 vector. 
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @addtogroup RMS   
+ * @{   
+ */
+
+/**   
+ * @brief Root Mean Square of the elements of a Q15 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult rms value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *   
+ * \par   
+ * The function is implemented using a 64-bit internal accumulator.   
+ * The input is represented in 1.15 format.   
+ * Intermediate multiplication yields a 2.30 format, and this   
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.   
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the   
+ * full precision of the intermediate multiplication is preserved.   
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower    
+ * 15 bits, and then saturated to yield a result in 1.15 format.   
+ *   
+ */
+
+void arm_rms_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult)
+{
+  q63_t sum = 0;                                 /* accumulator */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t in;                                      /* temporary variable to store the input value */
+  q15_t in1;                                     /* temporary variable to store the input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+  /* loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */
+    in = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in, in, sum);
+    in = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in, in, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */
+    in1 = *pSrc++;
+    sum = __SMLALD(in1, in1, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Truncating and saturating the accumulator to 1.15 format */
+  sum = __SSAT((q31_t) (sum >> 15), 16);
+
+  in1 = (q15_t) (sum / blockSize);
+
+  /* Store the result in the destination */
+  arm_sqrt_q15(in1, pResult);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q15_t in;                                      /* temporary variable to store the input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */
+    in = *pSrc++;
+    sum += ((q31_t) in * in);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Truncating and saturating the accumulator to 1.15 format */
+  sum = __SSAT((q31_t) (sum >> 15), 16);
+
+  in = (q15_t) (sum / blockSize);
+
+  /* Store the result in the destination */
+  arm_sqrt_q15(in, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**   
+ * @} end of RMS group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c
@@ -0,0 +1,143 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_rms_q31.c   
+*   
+* Description:	Root Mean Square of the elements of a Q31 vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @addtogroup RMS   
+ * @{   
+ */
+
+
+/**   
+ * @brief Root Mean Square of the elements of a Q31 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult rms value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *   
+ *\par   
+ * The function is implemented using an internal 64-bit accumulator.   
+ * The input is represented in 1.31 format, and intermediate multiplication   
+ * yields a 2.62 format.   
+ * The accumulator maintains full precision of the intermediate multiplication results,    
+ * but provides only a single guard bit.   
+ * There is no saturation on intermediate additions.   
+ * If the accumulator overflows, it wraps around and distorts the result.    
+ * In order to avoid overflows completely, the input signal must be scaled down by    
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.    
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.   
+ *   
+ */
+
+void arm_rms_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q63_t sum = 0;                                 /* accumulator */
+  q31_t in;                                      /* Temporary variable to store the input */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t *pIn1 = pSrc;                            /* SrcA pointer */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute sum of the squares and then store the result in a temporary variable, sum */
+    in = *pIn1++;
+    sum += (q63_t) in *in;
+    in = *pIn1++;
+    sum += (q63_t) in *in;
+    in = *pIn1++;
+    sum += (q63_t) in *in;
+    in = *pIn1++;
+    sum += (q63_t) in *in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */
+    in = *pIn1++;
+    sum += (q63_t) in *in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */
+    in = *pSrc++;
+    sum += (q63_t) in *in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Convert data in 2.62 to 1.31 by 31 right shifts */
+  sum = sum >> 31;
+
+  /* Compute Rms and store the result in the destination vector */
+  arm_sqrt_q31((q31_t) (sum / (int32_t) blockSize), pResult);
+}
+
+/**   
+ * @} end of RMS group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c
@@ -0,0 +1,222 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_std_f32.c   
+*   
+* Description:	Standard deviation of the elements of a floating-point vector. 
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @defgroup STD Standard deviation   
+ *   
+ * Calculates the standard deviation of the elements in the input vector.    
+ * The underlying algorithm is used:   
+ *  
+ * <pre>   
+ * 	Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))  
+ *  
+ *	   where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]  
+ *  
+ *	                   sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]  
+ * </pre>  
+ *   
+ * There are separate functions for floating point, Q31, and Q15 data types.   
+ */
+
+/**   
+ * @addtogroup STD   
+ * @{   
+ */
+
+
+/**   
+ * @brief Standard deviation of the elements of a floating-point vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult standard deviation value returned here   
+ * @return none.   
+ *   
+ */
+
+
+void arm_std_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  float32_t meanOfSquares, mean, in, squareOfMean;
+  uint32_t blkCnt;                               /* loop counter */
+  float32_t *pIn;                                /* Temporary pointer */
+
+  pIn = pSrc;
+
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples   
+   * and then store the result in a temporary variable, meanOfSquares. */
+  meanOfSquares = sum / ((float32_t) blockSize - 1.0f);
+
+  /* Reset the accumulator */
+  sum = 0.0f;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* Reset the input working pointer */
+  pSrc = pIn;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  /* Compute mean of all input values */
+  mean = sum / (float32_t) blockSize;
+
+  /* Compute square of mean */
+  squareOfMean = (mean * mean) * (((float32_t) blockSize) /
+                                  ((float32_t) blockSize - 1.0f));
+
+  /* Compute standard deviation and then store the result to the destination */
+  arm_sqrt_f32((meanOfSquares - squareOfMean), pResult);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  float32_t sumOfSquares = 0.0f;                 /* Sum of squares */
+  float32_t squareOfSum;                         /* Square of Sum */
+  float32_t in;                                  /* input value */
+  float32_t var;                                 /* Temporary varaince storage */
+  uint32_t blkCnt;                               /* loop counter */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += in * in;
+
+    /* C = (A[0] + A[1] + ... + A[blockSize-1]) */
+    /* Compute Sum of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute the square of sum */
+  squareOfSum = ((sum * sum) / (float32_t) blockSize);
+
+  /* Compute the variance */
+  var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
+
+  /* Compute standard deviation and then store the result to the destination */
+  arm_sqrt_f32(var, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**   
+ * @} end of STD group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c
@@ -0,0 +1,229 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_std_q15.c   
+*   
+* Description:	Standard deviation of an array of Q15 type.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup STD   
+ * @{   
+ */
+
+/**   
+ * @brief Standard deviation of the elements of a Q15 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult standard deviation value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *   
+ * \par   
+ * The function is implemented using a 64-bit internal accumulator.   
+ * The input is represented in 1.15 format.  
+ * Intermediate multiplication yields a 2.30 format, and this   
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.   
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the   
+ * full precision of the intermediate multiplication is preserved.   
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower    
+ * 15 bits, and then saturated to yield a result in 1.15 format.   
+ */
+
+void arm_std_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult)
+{
+  q63_t sum = 0;                                 /* Accumulator */
+  q31_t meanOfSquares, squareOfMean;             /* square of mean and mean of square */
+  q15_t mean;                                    /* mean */
+  uint32_t blkCnt;                               /* loop counter */
+  q15_t t;                                       /* Temporary variable */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q15_t *pIn;                                    /* Temporary pointer */
+  q31_t in;                                      /* input value */
+  q15_t in1;                                     /* input value */
+
+  pIn = pSrc;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in, in, sum);
+    in = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in, in, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in1 = *pSrc++;
+    sum = __SMLALD(in1, in1, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples   
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
+  sum = __SSAT((sum >> 15u), 16u);
+
+  meanOfSquares = (q31_t) ((sum * t) >> 14u);
+
+  /* Reset the accumulator */
+  sum = 0;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* Reset the input working pointer */
+  pSrc = pIn;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  /* Compute mean of all input values */
+  t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
+  mean = (q15_t) __SSAT(sum, 16u);
+
+  /* Compute square of mean */
+  squareOfMean = ((q31_t) mean * mean) >> 15;
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
+
+  /* mean of the squares minus the square of the mean. */
+  in1 = (q15_t) (meanOfSquares - squareOfMean);
+
+  /* Compute standard deviation and store the result to the destination */
+  arm_sqrt_q15(in1, pResult);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q63_t sumOfSquares = 0;                        /* Accumulator */
+  q15_t in;                                      /* input value */
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += (in * in);
+
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples    
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
+  sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+  /* Compute mean of all input values */
+  mean = (q15_t) __SSAT(sum, 16u);
+
+  /* Compute square of mean of the input samples  
+   * and then store the result in a temporary variable, squareOfMean.*/
+  t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
+  squareOfMean = ((q31_t) mean * mean) >> 15;
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
+
+  /* mean of the squares minus the square of the mean. */
+  in = (q15_t) (meanOfSquares - squareOfMean);
+
+  /* Compute standard deviation and store the result to the destination */
+  arm_sqrt_q15(in, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+}
+
+/**   
+ * @} end of STD group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c
@@ -0,0 +1,219 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_std_q31.c   
+*   
+* Description:	Standard deviation of an array of Q31 type.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup STD   
+ * @{   
+ */
+
+
+/**   
+ * @brief Standard deviation of the elements of a Q31 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult standard deviation value returned here   
+ * @return none.   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *   
+ *\par   
+ * The function is implemented using an internal 64-bit accumulator.   
+ * The input is represented in 1.31 format, and intermediate multiplication   
+ * yields a 2.62 format.   
+ * The accumulator maintains full precision of the intermediate multiplication results,    
+ * but provides only a single guard bit.   
+ * There is no saturation on intermediate additions.   
+ * If the accumulator overflows it wraps around and distorts the result.   
+ * In order to avoid overflows completely the input signal must be scaled down by    
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.    
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.   
+ *   
+ */
+
+
+void arm_std_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q63_t sum = 0;                                 /* Accumulator */
+  q31_t meanOfSquares, squareOfMean;             /* square of mean and mean of square */
+  q31_t mean;                                    /* mean */
+  q31_t in;                                      /* input value */
+  q31_t t;                                       /* Temporary variable */
+  uint32_t blkCnt;                               /* loop counter */
+
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t *pIn;                                    /* Temporary pointer */
+
+  pIn = pSrc;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
+
+  /* Compute Mean of squares of the input samples   
+   * and then store the result in a temporary variable, meanOfSquares. */
+  sum = (sum >> 31);
+  meanOfSquares = (q31_t) ((sum * t) >> 30);
+
+  /* Reset the accumulator */
+  sum = 0;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* Reset the input working pointer */
+  pSrc = pIn;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q63_t sumOfSquares = 0;                        /* Accumulator */
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += ((q63_t) (in) * (in));
+
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples    
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
+  sumOfSquares = (sumOfSquares >> 31);
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Compute mean of all input values */
+  t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f);
+  mean = (q31_t) (sum);
+
+  /* Compute square of mean */
+  squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
+
+
+  /* Compute standard deviation and then store the result to the destination */
+  arm_sqrt_q31(meanOfSquares - squareOfMean, pResult);
+
+}
+
+/**   
+ * @} end of STD group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c
@@ -0,0 +1,219 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_var_f32.c   
+*   
+* Description:	Variance of the elements of a floating-point vector.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @defgroup variance  Variance   
+ *   
+ * Calculates the variance of the elements in the input vector.   
+ * The underlying algorithm is used:   
+ *   
+ * <pre>   
+ * 	Result = (sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1)  
+ *  
+ *	   where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]  
+ *  
+ *	                   sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]  
+ * </pre>  
+ *   
+ * There are separate functions for floating point, Q31, and Q15 data types.   
+ */
+
+/**   
+ * @addtogroup variance   
+ * @{   
+ */
+
+
+/**   
+ * @brief Variance of the elements of a floating-point vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult variance value returned here   
+ * @return none.   
+ *   
+ */
+
+
+void arm_var_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  float32_t sum = (float32_t) 0.0;               /* Accumulator */
+  float32_t meanOfSquares, mean, in, squareOfMean;      /* Temporary variables */
+  uint32_t blkCnt;                               /* loop counter */
+  float32_t *pIn;                                /* Temporary pointer */
+
+  /* updating temporary pointer */
+  pIn = pSrc;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples   
+   * and then store the result in a temporary variable, meanOfSquares. */
+  meanOfSquares = sum / ((float32_t) blockSize - 1.0f);
+
+  /* Reset the accumulator */
+  sum = 0.0f;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* Reset the input working pointer */
+  pSrc = pIn;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  /* Compute mean of all input values */
+  mean = sum / (float32_t) blockSize;
+
+  /* Compute square of mean */
+  squareOfMean = (mean * mean) * (((float32_t) blockSize) /
+                                  ((float32_t) blockSize - 1.0f));
+
+  /* Compute variance and then store the result to the destination */
+  *pResult = meanOfSquares - squareOfMean;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+  float32_t sumOfSquares = 0.0f;                 /* Sum of squares */
+  float32_t squareOfSum;                         /* Square of Sum */
+  float32_t in;                                  /* input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += in * in;
+
+    /* C = (A[0] + A[1] + ... + A[blockSize-1]) */
+    /* Compute Sum of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute the square of sum */
+  squareOfSum = ((sum * sum) / (float32_t) blockSize);
+
+  /* Compute the variance */
+  *pResult = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**   
+ * @} end of variance group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c
@@ -0,0 +1,214 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_var_q15.c   
+*   
+* Description:	Variance of an array of Q15 type.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup variance   
+ * @{   
+ */
+
+/**   
+ * @brief Variance of the elements of a Q15 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult variance value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *   
+ * \par   
+ * The function is implemented using a 64-bit internal accumulator.   
+ * The input is represented in 1.15 format.  
+ * Intermediate multiplication yields a 2.30 format, and this   
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.   
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the   
+ * full precision of the intermediate multiplication is preserved.   
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower    
+ * 15 bits, and then saturated to yield a result in 1.15 format.   
+ *   
+ */
+
+
+void arm_var_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q63_t sum = 0;                                 /* Accumulator */
+  q31_t meanOfSquares, squareOfMean;             /* Mean of square and square of mean */
+  q15_t mean;                                    /* mean */
+  uint32_t blkCnt;                               /* loop counter */
+  q15_t t;                                       /* Temporary variable */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t in;                                      /* Input variable */
+  q15_t in1;                                     /* Temporary variable */
+  q15_t *pIn;                                    /* Temporary pointer */
+
+  pIn = pSrc;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in, in, sum);
+    in = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in, in, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in1 = *pSrc++;
+    sum = __SMLALD(in1, in1, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples   
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
+  sum = __SSAT((sum >> 15u), 16u);
+
+  meanOfSquares = (q31_t) ((sum * t) >> 14u);
+
+  /* Reset the accumulator */
+  sum = 0;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* Reset the input working pointer */
+  pSrc = pIn;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q63_t sumOfSquares = 0;                        /* Accumulator */
+  q15_t in;                                      /* Temporary variable */
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += (in * in);
+
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples    
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
+  sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Compute mean of all input values */
+  t = (q15_t) ((1.0f / (float32_t) (blockSize * (blockSize - 1u))) * 32768);
+  mean = __SSAT(sum, 16u);
+
+  /* Compute square of mean */
+  squareOfMean = ((q31_t) mean * mean) >> 15;
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
+
+  /* Compute variance and then store the result to the destination */
+  *pResult = (meanOfSquares - squareOfMean);
+
+}
+
+/**   
+ * @} end of variance group   
+ */
--- a/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c
+++ b/hardware/sam/system/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c
@@ -0,0 +1,216 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010 ARM Limited. All rights reserved.   
+*   
+* $Date:        15. July 2011  
+* $Revision: 	V1.0.10  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:		arm_var_q31.c   
+*   
+* Description:	Variance of an array of Q31 type.   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.0.10 2011/7/15 
+*    Big Endian support added and Merged M0 and M3/M4 Source code.  
+*   
+* Version 1.0.3 2010/11/29  
+*    Re-organized the CMSIS folders and updated documentation.   
+*    
+* Version 1.0.2 2010/11/11   
+*    Documentation updated.    
+*   
+* Version 1.0.1 2010/10/05    
+*    Production release and review comments incorporated.   
+*   
+* Version 1.0.0 2010/09/20    
+*    Production release and review comments incorporated.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**   
+ * @ingroup groupStats   
+ */
+
+/**   
+ * @addtogroup variance   
+ * @{   
+ */
+
+/**   
+ * @brief Variance of the elements of a Q31 vector.   
+ * @param[in]       *pSrc points to the input vector   
+ * @param[in]       blockSize length of the input vector   
+ * @param[out]      *pResult variance value returned here   
+ * @return none.   
+ *   
+ * @details   
+ * <b>Scaling and Overflow Behavior:</b>   
+ *   
+ *\par   
+ * The function is implemented using an internal 64-bit accumulator.   
+ * The input is represented in 1.31 format, and intermediate multiplication   
+ * yields a 2.62 format.   
+ * The accumulator maintains full precision of the intermediate multiplication results,    
+ * but provides only a single guard bit.   
+ * There is no saturation on intermediate additions.   
+ * If the accumulator overflows it wraps around and distorts the result.   
+ * In order to avoid overflows completely the input signal must be scaled down by    
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.    
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.   
+ *   
+ */
+
+
+void arm_var_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q63_t * pResult)
+{
+  q63_t sum = 0;                                 /* Accumulator */
+  q31_t meanOfSquares, squareOfMean;             /* Mean of square and square of mean */
+  q31_t mean;                                    /* Mean */
+  q31_t in;                                      /* Input variable */
+  q31_t t;                                       /* Temporary variable */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t *pIn;                                    /* Temporary pointer */
+
+  pIn = pSrc;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples   
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q63_t) (in) * (in));
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples   
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q31_t) ((1.0 / (blockSize - 1)) * 1073741824LL);
+  sum = (sum >> 31);
+  meanOfSquares = (q31_t) ((sum * t) >> 30);
+
+  /* Reset the accumulator */
+  sum = 0;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* Reset the input working pointer */
+  pSrc = pIn;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q63_t sumOfSquares = 0;                        /* Accumulator */
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += ((q63_t) (in) * (in));
+
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples    
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q31_t) ((1.0 / (blockSize - 1)) * 1073741824LL);
+  sumOfSquares = (sumOfSquares >> 31);
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Compute mean of all input values */
+  t = (q31_t) ((1.0 / (blockSize * (blockSize - 1u))) * 2147483648LL);
+  mean = (q31_t) (sum);
+
+  /* Compute square of mean */
+  squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
+
+  /* Compute variance and then store the result to the destination */
+  *pResult = (q63_t) meanOfSquares - squareOfMean;
+
+}
+
+/**   
+ * @} end of variance group   
+ */