Matrix Operation

1. Basic Setup 

Open STM32CubeMX and generate basic code for your board.
Setup Makefile or CMakeLists.txt for ARM Math. See arm math tutorial.

Update Makefile or CMakeLists.txt to add matrix functions.

C_SOURCES = \
... \
... \
Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f32.c \
Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f32.c \
Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f32.c \
Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f32.c \
Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f32.c \
Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c \
Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f32.c

 # Add sources to executable
 target_sources(${CMAKE_PROJECT_NAME} PRIVATE
     # Add user sources here
     Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f32.c
     Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f32.c
     Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f32.c
     Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f32.c
     Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f32.c
     Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c
     Drivers/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f32.c
 )

Add matrix check definition in Makefile or CMakeLists.txt.

# C defines
C_DEFS = \
... \
-DARM_MATH_CM4 \
-DARM_MATH_MATRIX_CHECK

# Add project symbols (macros)
target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE
    # Add user defined symbols
    ARM_MATH_CM4
    ARM_MATH_MATRIX_CHECK
)

2. Test Matrix Functions 

Navigate to Core > Src and open main.c.

Include headers.

/* USER CODE BEGIN Includes */
#include <stdio.h>
#include "arm_math.h"
/* USER CODE END Includes */

/* USER CODE BEGIN Includes */
#include <stdio.h>
#include "arm_math.h"
#include "usbd_cdc_if.h"
/* USER CODE END Includes */

Warning

Do not forget to add compiler flag -u _printf_float in Makefile or CMakeLists.txt to print floating point numbers otherwise it will not print the numbers.

Add to LDFLAGS.

LDFLAGS = $(MCU) -specs=nano.specs -T$(LDSCRIPT) $(LIBDIR) $(LIBS) -Wl,-Map=$(BUILD_DIR)/$(TARGET).map,--cref -Wl,--gc-sections
LDFLAGS += -u _printf_float

Create target_link_options at the bottom.

# Add compiler flags
target_link_options(${CMAKE_PROJECT_NAME} PRIVATE
    -u _printf_float
)

Overwrite definition of _write for printf as:

/* USER CODE BEGIN 0 */
int _write(int file, char *data, int len)
{
  for (int i = 0; i < len; ++i)
  {
    ITM_SendChar(data[i]);
  }
  return len;
}
/* USER CODE END 0 */

/* USER CODE BEGIN 0 */
int _write(int file, char *data, int len)
{
  CDC_Transmit_FS((uint8_t*)data, (uint16_t)len);
  return len;
}
/* USER CODE END 0 */

Write code to print matrix.

// ...

void print_matrix(arm_matrix_instance_f32 *matrix)
{
  for (int i = 0; i < matrix->numRows; i++)
  {
    for (int j = 0; j < matrix->numCols; j++)
    {
      printf("%f\t", matrix->pData[i * matrix->numCols + j]);
    }
    printf("\n");
  }
}
/* USER CODE END 0 */

Write code to test matrix functions.

/* USER CODE BEGIN 2 */
  arm_matrix_instance_f32 m1;
  float m1data[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
  arm_mat_init_f32(&m1, 3, 3, m1data);

  arm_matrix_instance_f32 m2;
  float m2data[9] = {9, 8, 7, 6, 5, 4, 3, 2, 1};
  arm_mat_init_f32(&m2, 3, 3, m2data);

  arm_matrix_instance_f32 result;
  float resultdata[9];
  arm_mat_init_f32(&result, 3, 3, resultdata);
  /* USER CODE END 2 */

  /* Infinite loop */
  /* USER CODE BEGIN WHILE */
  while (1)
  {
    /* USER CODE END WHILE */
    printf("Adding m1 and m2, result:\n");
    arm_mat_add_f32(&m1, &m2, &result);
    print_matrix(&result);
    HAL_Delay(1000);

    printf("Subtracting m1 and m2, result:\n");
    arm_mat_sub_f32(&m1, &m2, &result);
    print_matrix(&result);
    HAL_Delay(1000);

    printf("Multiplying m1 and m2, result:\n");
    arm_mat_mult_f32(&m1, &m2, &result);
    print_matrix(&result);
    HAL_Delay(1000);

    printf("Multiplying m1 by 2\n");
    arm_mat_scale_f32(&m1, 2, &result);
    print_matrix(&result);
    HAL_Delay(1000);

    printf("Transposing m1\n");
    arm_mat_trans_f32(&m1, &result);
    print_matrix(&result);
    HAL_Delay(1000);

    printf("Inverting m1\n");
    if (arm_mat_inverse_f32(&m1, &result) == ARM_MATH_SUCCESS)
    {
      print_matrix(&result);
    }
    else
    {
      printf("Matrix is not invertible\n");
    }
    /* USER CODE BEGIN 3 */
  }
  /* USER CODE END 3 */

3. Test Output:

Adding m1 and m2, result:
10.000000       10.000000       10.000000
10.000000       10.000000       10.000000
10.000000       10.000000       10.000000
Subtracting m1 and m2, result:
-8.000000       -6.000000       -4.000000
-2.000000       0.000000        2.000000
4.000000        6.000000        8.000000
Multiplying m1 and m2, result:
30.000000       24.000000       18.000000
84.000000       69.000000       54.000000
138.000000      114.000000      90.000000
Multiplying m1 by 2
2.000000        4.000000        6.000000
8.000000        10.000000       12.000000
14.000000       16.000000       18.000000
Transposing m1
1.000000        4.000000        7.000000
2.000000        5.000000        8.000000
3.000000        6.000000        9.000000
Inverting m1
Matrix is not invertible

4. Creating Matrix Class and Overloading Operators 

Setup your Makefile or CMakeLists.txt for C++. See cpp setup.

Create matrix32.hpp inside Core/Inc. Copy these contents.

matrix32.hpp

/*
    Dynamic memory allocation is avoided.
    Template struct is used, so wisely use only few sizes of matrix otherwise it will consume lots of memory.
    No exeption is handled, instead used static assertion.
    Assign or copy data carefully especialy from where array of data through pointer is used to do.
    constructor = default is avoided, otherwise one class pointer will point other class data.
*/

#ifndef MATRIX32_HPP
#define MATRIX32_HPP

#include <stdio.h>
#include "arm_math.h"

template <uint16_t numRows, uint16_t numCols>
class Matrix32
{
public:
    float32_t data[numRows * numCols];
    arm_matrix_instance_f32 arm_mat;

    Matrix32() noexcept
    {
        arm_mat_init_f32(&arm_mat, numRows, numCols, data);
    }

    Matrix32(const Matrix32 &other) noexcept
    {
        arm_mat_init_f32(&arm_mat, numRows, numCols, data);
        memcpy(arm_mat.pData, other.arm_mat.pData, numRows * numCols * 4);
    }

    template <typename... Args>
    Matrix32(Args... values)
        : data{static_cast<float32_t>(values)...}
    {
        static_assert(sizeof...(values) == numRows * numCols,
                      "Incorrect number of values. If there is static_cast error just above this line, then it is probably be due to mismatched size of matrix passed to copy or assign.");
        arm_mat_init_f32(&arm_mat, numRows, numCols, data);
    }

    Matrix32(const float32_t *pSrc) noexcept
    {
        arm_mat_init_f32(&arm_mat, numRows, numCols, data);
        memcpy(arm_mat.pData, pSrc, numRows * numCols * 4);
    }

    Matrix32 &operator=(const Matrix32 &other) noexcept
    {
        memcpy(arm_mat.pData, other.arm_mat.pData, numRows * numCols * 4);
        return *this;
    }

    Matrix32 &operator=(float32_t *pSrc) noexcept
    {
        memcpy(arm_mat.pData, pSrc, numRows * numCols * 4);
        return *this;
    }

    Matrix32 operator+(const Matrix32 &rhs) const noexcept
    {
        Matrix32 dst;
        arm_mat_add_f32(&this->arm_mat, &rhs.arm_mat, &dst.arm_mat);
        return dst;
    }

    Matrix32 operator-(const Matrix32 &rhs) const noexcept
    {
        Matrix32 dst;
        arm_mat_sub_f32(&this->arm_mat, &rhs.arm_mat, &dst.arm_mat);
        return dst;
    }

    template <uint16_t numAny>
    Matrix32<numRows, numAny> operator*(const Matrix32<numCols, numAny> &rhs) const noexcept
    {
        Matrix32<numRows, numAny> dst;
        arm_mat_mult_f32(&this->arm_mat, &rhs.arm_mat, &dst.arm_mat);
        return dst;
    }

    Matrix32<numCols, numRows> trans() const noexcept
    {
        Matrix32<numCols, numRows> dst;
        arm_mat_trans_f32(&this->arm_mat, &dst.arm_mat);
        return dst;
    }

    Matrix32 inverse() const noexcept
    {
        static_assert(numRows == numCols,
                      "Inverse of rectangular matrix does not exists. Matrix must be square to have inverse.");
        Matrix32 dst;
        arm_status status = arm_mat_inverse_f32(&this->arm_mat, &dst.arm_mat);
        if (status == ARM_MATH_SINGULAR)
        {
            printf("EXCEPTION:: MATRIX INVERSION FAILED !!\n");
        }

        return dst;
    }

    Matrix32 operator*(const float32_t &rhs) const noexcept
    {
        Matrix32 dst;
        arm_mat_scale_f32(&this->arm_mat, rhs, &dst.arm_mat);
        return dst;
    }

    Matrix32 scale(const float32_t &scaler) const noexcept
    {
        Matrix32 dst;
        arm_mat_scale_f32(&this->arm_mat, scaler, &dst.arm_mat);
        return dst;
    }

    void fill(const float32_t &value) noexcept
    {
        arm_fill_f32(value, arm_mat.pData, numRows * numCols);
    }

    void setIdentity() noexcept
    {
        static_assert(numRows == numCols, "Only square matrix can be identity matrix");
        for (uint16_t i = 0; i < numRows; ++i)
        {
            for (uint16_t j = 0; j < numCols; ++j)
            {
                arm_mat.pData[i * numCols + j] = (i == j) ? 1.0f : 0.0f;
            }
        }
    }

    void printData() const noexcept
    {
        for (uint16_t i = 0; i < numRows; ++i)
        {
            for (uint16_t j = 0; j < numCols; ++j)
            {
                printf("%f\t", arm_mat.pData[i * numCols + j]);
            }
            printf("\n");
        }
    }

    ~Matrix32() = default;
};

#endif // MATRIXF32_HPP

Create app.h inside Core/Inc and app.cpp inside Core/Src. Copy these contents.

app.h

#ifndef APP_H
#define APP_H

#ifdef __cplusplus
extern "C" {
#endif

void init();

void run();

#ifdef __cplusplus
}
#endif

#endif // APP_H

app.cpp

#include "main.h"
#include "matrix32.hpp"

#include "app.h"

Matrix32<3, 3> m1 = {1, 2, 3, 4, 5, 6, 7, 8, 9};
Matrix32<3, 3> m2 = {9, 8, 7, 6, 5, 4, 3, 2, 1};
Matrix32<3, 3> result;

void init()
{
    // nothing to do here
}

void run()
{
    printf("Adding m1 and m2, result:\n");
    result = m1 + m2;
    result.printData();
    HAL_Delay(1000);

    printf("Subtracting m1 and m2, result:\n");
    result = m1 - m2;
    result.printData();
    HAL_Delay(1000);

    printf("Multiplying m1 and m2, result:\n");
    result = m1 * m2;
    result.printData();
    HAL_Delay(1000);

    printf("Multiplying m1 by 2\n");
    result = m1.scale(2);
    result.printData();
    HAL_Delay(1000);

    printf("Transposing m1\n");
    result = m1.trans();
    result.printData();
    HAL_Delay(1000);

    printf("Inverting m1\n");
    result = m1.inverse();
    result.printData();
    HAL_Delay(1000);
}

Include app.h in main.c.

/* USER CODE BEGIN Includes */
// ..
#include "app.h"
/* USER CODE END Includes */

Call init() and run() functions. Also comment out previous codes from while loop.

/* USER CODE BEGIN 2 */
// arm_matrix_instance_f32 m1;
// float m1data[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
// arm_mat_init_f32(&m1, 3, 3, m1data);

// arm_matrix_instance_f32 m2;
// float m2data[9] = {9, 8, 7, 6, 5, 4, 3, 2, 1};
// arm_mat_init_f32(&m2, 3, 3, m2data);

// arm_matrix_instance_f32 result;
// float resultdata[9];
// arm_mat_init_f32(&result, 3, 3, resultdata);

init();
/* USER CODE END 2 */

/* Infinite loop */
/* USER CODE BEGIN WHILE */
while (1)
{
  /* USER CODE END WHILE */
  run();
  // printf("Adding m1 and m2, result:\n");
  // arm_mat_add_f32(&m1, &m2, &result);
  // print_matrix(&result);
  // HAL_Delay(1000);

  // printf("Subtracting m1 and m2, result:\n");
  // arm_mat_sub_f32(&m1, &m2, &result);
  // print_matrix(&result);
  // HAL_Delay(1000);

  // printf("Multiplying m1 and m2, result:\n");
  // arm_mat_mult_f32(&m1, &m2, &result);
  // print_matrix(&result);
  // HAL_Delay(1000);

  // printf("Multiplying m1 by 2\n");
  // arm_mat_scale_f32(&m1, 2, &result);
  // print_matrix(&result);
  // HAL_Delay(1000);

  // printf("Transposing m1\n");
  // arm_mat_trans_f32(&m1, &result);
  // print_matrix(&result);
  // HAL_Delay(1000);

  // printf("Inverting m1\n");
  // if (arm_mat_inverse_f32(&m1, &result) == ARM_MATH_SUCCESS)
  // {
  //   print_matrix(&result);
  // }
  // else
  // {
  //   printf("Matrix is not invertible\n");
  // }
  /* USER CODE BEGIN 3 */
}
/* USER CODE END 3 */

Add sources to Makefile or CMakeLists.txt.

# C sources
C_SOURCES = \
... \
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_f32.c

# CXX sources
CXX_SOURCES =  \
Core/Src/app.cpp

 # Add sources to executable
 target_sources(${CMAKE_PROJECT_NAME} PRIVATE
     # ...
     Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_f32.c
     Core/Src/app.cpp
 )

5. Test Ouput of Matrix Class 

Adding m1 and m2, result:
10.000000       10.000000       10.000000
10.000000       10.000000       10.000000
10.000000       10.000000       10.000000
Subtracting m1 and m2, result:
-8.000000       -6.000000       -4.000000
-2.000000       0.000000        2.000000
4.000000        6.000000        8.000000
Multiplying m1 and m2, result:
30.000000       24.000000       18.000000
84.000000       69.000000       54.000000
138.000000      114.000000      90.000000
Multiplying m1 by 2
2.000000        4.000000        6.000000
8.000000        10.000000       12.000000
14.000000       16.000000       18.000000
Transposing m1
1.000000        4.000000        7.000000
2.000000        5.000000        8.000000
3.000000        6.000000        9.000000
Inverting m1
EXCEPTION:: MATRIX INVERSION FAILED !!

Tip

The raw arm math matrix functions are efficient than the matrix class. So use the matrix class for small matrices only.

Matrix Operation

1. Basic Setup

2. Test Matrix Functions

3. Test Output:

4. Creating Matrix Class and Overloading Operators

5. Test Ouput of Matrix Class

1. Basic Setup 

2. Test Matrix Functions 

4. Creating Matrix Class and Overloading Operators 

5. Test Ouput of Matrix Class 