Compile FrogPilot

This commit is contained in:
FrogAi
2024-03-10 20:59:55 -07:00
parent 69e7feaf01
commit f1acd339d7
1485 changed files with 426154 additions and 398339 deletions

View File

@@ -1,52 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#include "blasfeo_processor_features.h"
#include "blasfeo_target.h"
#include "blasfeo_block_size.h"
#include "blasfeo_stdlib.h"
#include "blasfeo_common.h"
#include "blasfeo_d_aux.h"
#include "blasfeo_d_aux_ext_dep.h"
#include "blasfeo_d_kernel.h"
#include "blasfeo_d_blas.h"
#include "blasfeo_s_aux.h"
#include "blasfeo_s_aux_ext_dep.h"
#include "blasfeo_s_kernel.h"
#include "blasfeo_s_blas.h"
#include "blasfeo_i_aux_ext_dep.h"
#include "blasfeo_v_aux_ext_dep.h"
#include "blasfeo_timing.h"
#include "blasfeo_memory.h"

View File

@@ -1,447 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_BLOCK_SIZE_H_
#define BLASFEO_BLOCK_SIZE_H_
#define D_EL_SIZE 8 // double precision
#define S_EL_SIZE 4 // single precision
#if defined( TARGET_X64_INTEL_SKYLAKE_X )
// common
#define CACHE_LINE_SIZE 64 // data cache size: 64 bytes
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 8-way
#define L2_CACHE_SIZE (256*1024) //(1024*1024) // L2 data cache size: 1 MB ; DTLB1 64*4 kB = 256 kB
#define LLC_CACHE_SIZE (6*1024*1024) //(8*1024*1024) // LLC cache size: 8 MB ; TLB 1536*4 kB = 6 MB
// double
#define D_PS 8 // panel size
#define D_PLD 8 // 4 // GCD of panel length
#define D_M_KERNEL 24 // max kernel size
#define D_N_KERNEL 8 // max kernel size
#define D_KC 128 //256 // 192
#define D_NC 144 //72 //96 //72 // 120 // 512
#define D_MC 2400 // 6000
// single
#define S_PS 16 // panel size
#define S_PLD 4 // GCD of panel length TODO probably 16 when writing assebly
#define S_M_KERNEL 32 // max kernel size
#define S_N_KERNEL 8 // max kernel size
#define S_KC 128 //256
#define S_NC 128 //144
#define S_MC 3000
#elif defined( TARGET_X64_INTEL_HASWELL )
// common
#define CACHE_LINE_SIZE 64 // data cache size: 64 bytes
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 8-way
#define L2_CACHE_SIZE (256*1024) // L2 data cache size: 256 kB ; DTLB1 64*4 kB = 256 kB
#define LLC_CACHE_SIZE (6*1024*1024) // LLC cache size: 6 MB ; TLB 1024*4 kB = 4 MB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 12 // max kernel size
#define D_N_KERNEL 8 // max kernel size
#define D_KC 256 // 192
#define D_NC 64 //96 //72 // 120 // 512
#define D_MC 1500
// single
#define S_PS 8 // panel size
#define S_PLD 4 // 2 // GCD of panel length
#define S_M_KERNEL 24 // max kernel size
#define S_N_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 144
#define S_MC 3000
#elif defined( TARGET_X64_INTEL_SANDY_BRIDGE )
// common
#define CACHE_LINE_SIZE 64 // data cache size: 64 bytes
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 8-way
#define L2_CACHE_SIZE (256*1024) // L2 data cache size: 256 kB ; DTLB1 64*4 kB = 256 kB
#define LLC_CACHE_SIZE (4*1024*1024) // LLC cache size: 4 MB ; TLB 1024*4 kB = 4 MB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 8 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 256 //320 //256 //320
#define D_NC 72 //64 //72 //60 // 120
#define D_MC 1000 // 800
// single
#define S_PS 8 // panel size
#define S_PLD 4 // 2 // GCD of panel length
#define S_M_KERNEL 16 // max kernel size
#define S_N_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 144
#define S_MC 2000
#elif defined( TARGET_X64_INTEL_CORE )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_N_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_X64_AMD_BULLDOZER )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_N_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_X86_AMD_JAGUAR )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_N_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_X86_AMD_BARCELONA )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_N_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined(TARGET_ARMV8A_APPLE_M1)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (128*1024) // L1 data cache size (big cores): 64 kB, ?-way ; DTLB1 ?
#define LLC_CACHE_SIZE (12*1024*1024) // LLC (L2) cache size (big cores): 12 MB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 8 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 512 //256
#define D_NC 128 //256
#define D_MC 6000
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_N_KERNEL 8 // max kernel size
#define S_KC 512
#define S_NC 256
#define S_MC 6000
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A76)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (64*1024) // L1 data cache size: 64 kB, 4-way ; DTLB1 48*4 kB = 192 kB
#define LLC_CACHE_SIZE (1*1024*1024) // LLC cache size: 1 MB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 8 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 512 //256
#define D_NC 128 //256
#define D_MC 6000
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_N_KERNEL 8 // max kernel size
#define S_KC 512
#define S_NC 256
#define S_MC 6000
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A73)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 (64?) kB, 4-way, seen as 8-(16-)way ; DTLB1 48*4 kB = 192 kB
#define LLC_CACHE_SIZE (1*1024*1024) // LLC cache size: 1 MB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 8 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 320
#define D_NC 256
#define D_MC 6000
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_N_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A57)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 2-way ; DTLB1 32*4 kB = 128 kB
#define LLC_CACHE_SIZE (1*1024*1024) // LLC cache size: 1 MB // 2 MB ???
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 8 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 128 //224 //256 //192
#define D_NC 72 //40 //36 //48
#define D_MC (4*192) //512 //488 //600
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_N_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A55)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 4-way ; DTLB1 16*4 kB = 64 kB
#define LLC_CACHE_SIZE (512*1024) // LLC cache size: 512 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 12 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 224
#define D_NC 160
#define D_MC 6000
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_N_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A53)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 4-way ??? ; DTLB1 10*4 kB = 40 kB
#define LLC_CACHE_SIZE (256*1024) // LLC cache size: 256 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 12 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 160
#define D_NC 128
#define D_MC 6000
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_N_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_ARMV7A_ARM_CORTEX_A15 )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_N_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_ARMV7A_ARM_CORTEX_A7 )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_N_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_ARMV7A_ARM_CORTEX_A9 )
// common
#define CACHE_LINE_SIZE 32
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_N_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_GENERIC )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_N_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_N_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#else
#error "Unknown architecture"
#endif
#define D_CACHE_LINE_EL (CACHE_LINE_SIZE/D_EL_SIZE)
#define D_L1_CACHE_EL (L1_CACHE_SIZE/D_EL_SIZE)
#define D_L2_CACHE_EL (L2_CACHE_SIZE/D_EL_SIZE)
#define D_LLC_CACHE_EL (LLC_CACHE_SIZE/D_EL_SIZE)
#define S_CACHE_LINE_EL (CACHE_LINE_SIZE/S_EL_SIZE)
#define S_L1_CACHE_EL (L1_CACHE_SIZE/S_EL_SIZE)
#define S_L2_CACHE_EL (L2_CACHE_SIZE/S_EL_SIZE)
#define S_LLC_CACHE_EL (LLC_CACHE_SIZE/S_EL_SIZE)
#endif // BLASFEO_BLOCK_SIZE_H_

View File

@@ -1,274 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_COMMON_H_
#define BLASFEO_COMMON_H_
#include "blasfeo_target.h"
#ifdef __cplusplus
extern "C" {
#endif
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) || defined(__ICL) || defined(__ICC) || defined(__INTEL_LLVM_COMPILER)
#define ALIGNED(VEC, BYTES) VEC __attribute__ ((aligned ( BYTES )))
#elif defined (_MSC_VER)
#define ALIGNED(VEC, BYTES) __declspec(align( BYTES )) VEC
#else
#define ALIGNED(VEC, BYTES) VEC
#endif
#if ( defined(LA_HIGH_PERFORMANCE) & defined(MF_PANELMAJ) ) | ( defined(LA_REFERENCE) & defined(MF_PANELMAJ) )
#include "blasfeo_block_size.h"
// matrix structure
struct blasfeo_dmat
{
double *mem; // pointer to passed chunk of memory
double *pA; // pointer to a pm*pn array of doubles, the first is aligned to cache line size
double *dA; // pointer to a min(m,n) (or max???) array of doubles
int m; // rows
int n; // cols
int pm; // packed number or rows
int cn; // packed number or cols
int use_dA; // flag to tell if dA can be used
int memsize; // size of needed memory
};
struct blasfeo_smat
{
float *mem; // pointer to passed chunk of memory
float *pA; // pointer to a pm*pn array of floats, the first is aligned to cache line size
float *dA; // pointer to a min(m,n) (or max???) array of floats
int m; // rows
int n; // cols
int pm; // packed number or rows
int cn; // packed number or cols
int use_dA; // flag to tell if dA can be used
int memsize; // size of needed memory
};
// vector structure
struct blasfeo_dvec
{
double *mem; // pointer to passed chunk of memory
double *pa; // pointer to a pm array of doubles, the first is aligned to cache line size
int m; // size
int pm; // packed size
int memsize; // size of needed memory
};
struct blasfeo_svec
{
float *mem; // pointer to passed chunk of memory
float *pa; // pointer to a pm array of floats, the first is aligned to cache line size
int m; // size
int pm; // packed size
int memsize; // size of needed memory
};
#define BLASFEO_DMATEL(sA,ai,aj) ((sA)->pA[((ai)-((ai)&(D_PS-1)))*(sA)->cn+(aj)*D_PS+((ai)&(D_PS-1))])
#define BLASFEO_SMATEL(sA,ai,aj) ((sA)->pA[((ai)-((ai)&(S_PS-1)))*(sA)->cn+(aj)*S_PS+((ai)&(S_PS-1))])
#define BLASFEO_DVECEL(sa,ai) ((sa)->pa[ai])
#define BLASFEO_SVECEL(sa,ai) ((sa)->pa[ai])
#elif ( defined(LA_HIGH_PERFORMANCE) & defined(MF_COLMAJ) ) | ( defined(LA_REFERENCE) & defined(MF_COLMAJ) ) | defined(LA_EXTERNAL_BLAS_WRAPPER)
// matrix structure
struct blasfeo_dmat
{
double *mem; // pointer to passed chunk of memory
double *pA; // pointer to a m*n array of doubles
double *dA; // pointer to a min(m,n) (or max???) array of doubles
int m; // rows
int n; // cols
int use_dA; // flag to tell if dA can be used
int memsize; // size of needed memory
};
struct blasfeo_smat
{
float *mem; // pointer to passed chunk of memory
float *pA; // pointer to a m*n array of floats
float *dA; // pointer to a min(m,n) (or max???) array of floats
int m; // rows
int n; // cols
int use_dA; // flag to tell if dA can be used
int memsize; // size of needed memory
};
// vector structure
struct blasfeo_dvec
{
double *mem; // pointer to passed chunk of memory
double *pa; // pointer to a m array of doubles, the first is aligned to cache line size
int m; // size
int memsize; // size of needed memory
};
struct blasfeo_svec
{
float *mem; // pointer to passed chunk of memory
float *pa; // pointer to a m array of floats, the first is aligned to cache line size
int m; // size
int memsize; // size of needed memory
};
#define BLASFEO_DMATEL(sA,ai,aj) ((sA)->pA[(ai)+(aj)*(sA)->m])
#define BLASFEO_SMATEL(sA,ai,aj) ((sA)->pA[(ai)+(aj)*(sA)->m])
#define BLASFEO_DVECEL(sa,ai) ((sa)->pa[ai])
#define BLASFEO_SVECEL(sa,ai) ((sa)->pa[ai])
#else
#error : wrong LA or MF choice
#endif
// Explicitly panel-major matrix structure
struct blasfeo_pm_dmat
{
double *mem; // pointer to passed chunk of memory
double *pA; // pointer to a pm*pn array of doubles, the first is aligned to cache line size
double *dA; // pointer to a min(m,n) (or max???) array of doubles
int m; // rows
int n; // cols
int pm; // packed number or rows
int cn; // packed number or cols
int use_dA; // flag to tell if dA can be used
int ps; // panel size
int memsize; // size of needed memory
};
struct blasfeo_pm_smat
{
float *mem; // pointer to passed chunk of memory
float *pA; // pointer to a pm*pn array of floats, the first is aligned to cache line size
float *dA; // pointer to a min(m,n) (or max???) array of floats
int m; // rows
int n; // cols
int pm; // packed number or rows
int cn; // packed number or cols
int use_dA; // flag to tell if dA can be used
int ps; // panel size
int memsize; // size of needed memory
};
struct blasfeo_pm_dvec
{
double *mem; // pointer to passed chunk of memory
double *pa; // pointer to a pm array of doubles, the first is aligned to cache line size
int m; // size
int pm; // packed size
int memsize; // size of needed memory
};
struct blasfeo_pm_svec
{
float *mem; // pointer to passed chunk of memory
float *pa; // pointer to a pm array of floats, the first is aligned to cache line size
int m; // size
int pm; // packed size
int memsize; // size of needed memory
};
// Explicitly column-major matrix structure
struct blasfeo_cm_dmat
{
double *mem; // pointer to passed chunk of memory
double *pA; // pointer to a m*n array of doubles
double *dA; // pointer to a min(m,n) (or max???) array of doubles
int m; // rows
int n; // cols
int use_dA; // flag to tell if dA can be used
int memsize; // size of needed memory
};
struct blasfeo_cm_smat
{
float *mem; // pointer to passed chunk of memory
float *pA; // pointer to a m*n array of floats
float *dA; // pointer to a min(m,n) (or max???) array of floats
int m; // rows
int n; // cols
int use_dA; // flag to tell if dA can be used
int memsize; // size of needed memory
};
struct blasfeo_cm_dvec
{
double *mem; // pointer to passed chunk of memory
double *pa; // pointer to a m array of doubles, the first is aligned to cache line size
int m; // size
int memsize; // size of needed memory
};
struct blasfeo_cm_svec
{
float *mem; // pointer to passed chunk of memory
float *pa; // pointer to a m array of floats, the first is aligned to cache line size
int m; // size
int memsize; // size of needed memory
};
#define BLASFEO_PM_DMATEL(sA,ai,aj) ((sA)->pA[((ai)-((ai)&((sA)->ps-1)))*(sA)->cn+(aj)*((sA)->ps)+((ai)&((sA)->ps-1))])
#define BLASFEO_PM_SMATEL(sA,ai,aj) ((sA)->pA[((ai)-((ai)&((sA)->ps-1)))*(sA)->cn+(aj)*((sA)->ps)+((ai)&((sA)->ps-1))])
#define BLASFEO_PM_DVECEL(sa,ai) ((sa)->pa[ai])
#define BLASFEO_PM_SVECEL(sa,ai) ((sa)->pa[ai])
#define BLASFEO_CM_DMATEL(sA,ai,aj) ((sA)->pA[(ai)+(aj)*(sA)->m])
#define BLASFEO_CM_SMATEL(sA,ai,aj) ((sA)->pA[(ai)+(aj)*(sA)->m])
#define BLASFEO_CM_DVECEL(sa,ai) ((sa)->pa[ai])
#define BLASFEO_CM_SVECEL(sa,ai) ((sa)->pa[ai])
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_COMMON_H_

View File

@@ -1,255 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
/*
* auxiliary algebra operations header
*
* include/blasfeo_aux_lib*.h
*
*/
#ifndef BLASFEO_D_AUX_H_
#define BLASFEO_D_AUX_H_
#include <stdlib.h>
#include "blasfeo_common.h"
#include "blasfeo_d_aux_old.h"
#ifdef __cplusplus
extern "C" {
#endif
// --- memory size calculations
//
// returns the memory size (in bytes) needed for a dmat
size_t blasfeo_memsize_dmat(int m, int n);
// returns the memory size (in bytes) needed for the diagonal of a dmat
size_t blasfeo_memsize_diag_dmat(int m, int n);
// returns the memory size (in bytes) needed for a dvec
size_t blasfeo_memsize_dvec(int m);
// --- creation
//
// create a strmat for a matrix of size m*n by using memory passed by a pointer (pointer is not updated)
void blasfeo_create_dmat(int m, int n, struct blasfeo_dmat *sA, void *memory);
// create a strvec for a vector of size m by using memory passed by a pointer (pointer is not updated)
void blasfeo_create_dvec(int m, struct blasfeo_dvec *sA, void *memory);
// --- packing
// pack the column-major matrix A into the matrix struct B
void blasfeo_pack_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sB, int bi, int bj);
// pack the lower-triangular column-major matrix A into the matrix struct B
void blasfeo_pack_l_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sB, int bi, int bj);
// pack the upper-triangular column-major matrix A into the matrix struct B
void blasfeo_pack_u_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sB, int bi, int bj);
// transpose and pack the column-major matrix A into the matrix struct B
void blasfeo_pack_tran_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sB, int bi, int bj);
// pack the vector x into the vector structure y
void blasfeo_pack_dvec(int m, double *x, int xi, struct blasfeo_dvec *sy, int yi);
// unpack the matrix structure A into the column-major matrix B
void blasfeo_unpack_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, double *B, int ldb);
// transpose and unpack the matrix structure A into the column-major matrix B
void blasfeo_unpack_tran_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, double *B, int ldb);
// pack the vector structure x into the vector y
void blasfeo_unpack_dvec(int m, struct blasfeo_dvec *sx, int xi, double *y, int yi);
// --- cast
//
//void d_cast_mat2strmat(double *A, struct blasfeo_dmat *sA); // TODO
//void d_cast_diag_mat2strmat(double *dA, struct blasfeo_dmat *sA); // TODO
//void d_cast_vec2vecmat(double *a, struct blasfeo_dvec *sx); // TODO
// ge
// --- insert/extract
//
// sA[ai, aj] <= a
void blasfeo_dgein1(double a, struct blasfeo_dmat *sA, int ai, int aj);
// <= sA[ai, aj]
double blasfeo_dgeex1(struct blasfeo_dmat *sA, int ai, int aj);
// --- set
// A <= alpha
void blasfeo_dgese(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// --- copy / scale
// B <= A
void blasfeo_dgecp(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// A <= alpha*A
void blasfeo_dgesc(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// B <= alpha*A
void blasfeo_dgecpsc(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// B <= A, A lower triangular
void blasfeo_dtrcp_l(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
void blasfeo_dtrcpsc_l(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
void blasfeo_dtrsc_l(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// --- sum
// B <= B + alpha*A
void blasfeo_dgead(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// y <= y + alpha*x
void blasfeo_dvecad(int m, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi);
// --- traspositions
// B <= A'
void blasfeo_dgetr(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// B <= A', A lower triangular
void blasfeo_dtrtr_l(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// B <= A', A upper triangular
void blasfeo_dtrtr_u(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// dia
// diag(A) += alpha
void blasfeo_ddiare(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// diag(A) <= alpha*x
void blasfeo_ddiain(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
// diag(A)[idx] <= alpha*x
void blasfeo_ddiain_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// x <= diag(A)
void blasfeo_ddiaex(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi);
// x <= diag(A)[idx]
void blasfeo_ddiaex_sp(int kmax, double alpha, int *idx, struct blasfeo_dmat *sD, int di, int dj, struct blasfeo_dvec *sx, int xi);
// diag(A) += alpha*x
void blasfeo_ddiaad(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
// diag(A)[idx] += alpha*x
void blasfeo_ddiaad_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// diag(A)[idx] = y + alpha*x
void blasfeo_ddiaadin_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// row
void blasfeo_drowin(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_drowex(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi);
void blasfeo_drowad(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_drowad_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
void blasfeo_drowsw(int kmax, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int ci, int cj);
void blasfeo_drowpe(int kmax, int *ipiv, struct blasfeo_dmat *sA);
void blasfeo_drowpei(int kmax, int *ipiv, struct blasfeo_dmat *sA);
// col
void blasfeo_dcolex(int kmax, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi);
void blasfeo_dcolin(int kmax, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_dcolad(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_dcolsc(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_dcolsw(int kmax, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int ci, int cj);
void blasfeo_dcolpe(int kmax, int *ipiv, struct blasfeo_dmat *sA);
void blasfeo_dcolpei(int kmax, int *ipiv, struct blasfeo_dmat *sA);
// vec
// a <= alpha
void blasfeo_dvecse(int m, double alpha, struct blasfeo_dvec *sx, int xi);
// sx[xi] <= a
void blasfeo_dvecin1(double a, struct blasfeo_dvec *sx, int xi);
// <= sx[xi]
double blasfeo_dvecex1(struct blasfeo_dvec *sx, int xi);
// y <= x
void blasfeo_dveccp(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi);
// x <= alpha*x
void blasfeo_dvecsc(int m, double alpha, struct blasfeo_dvec *sx, int xi);
// y <= alpha*x
void blasfeo_dveccpsc(int m, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi);
// z[idx] += alpha * x
void blasfeo_dvecad_sp(int m, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dvec *sz, int zi);
// z[idx] <= alpha * x
void blasfeo_dvecin_sp(int m, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dvec *sz, int zi);
// z <= alpha * x[idx]
void blasfeo_dvecex_sp(int m, double alpha, int *idx, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z += alpha * x[idx]
void blasfeo_dvecexad_sp(int m, double alpha, int *idx, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
void blasfeo_dveccl(int m,
struct blasfeo_dvec *sxm, int xim, struct blasfeo_dvec *sx, int xi,
struct blasfeo_dvec *sxp, int xip, struct blasfeo_dvec *sz, int zi);
void blasfeo_dveccl_mask(int m,
struct blasfeo_dvec *sxm, int xim, struct blasfeo_dvec *sx, int xi,
struct blasfeo_dvec *sxp, int xip, struct blasfeo_dvec *sz, int zi,
struct blasfeo_dvec *sm, int mi);
void blasfeo_dvecze(int m, struct blasfeo_dvec *sm, int mi, struct blasfeo_dvec *sv, int vi, struct blasfeo_dvec *se, int ei);
void blasfeo_dvecnrm_inf(int m, struct blasfeo_dvec *sx, int xi, double *ptr_norm);
void blasfeo_dvecnrm_2(int m, struct blasfeo_dvec *sx, int xi, double *ptr_norm);
void blasfeo_dvecpe(int kmax, int *ipiv, struct blasfeo_dvec *sx, int xi);
void blasfeo_dvecpei(int kmax, int *ipiv, struct blasfeo_dvec *sx, int xi);
/*
* Explicitly panel-major matrix format
*/
// returns the memory size (in bytes) needed for a dmat
size_t blasfeo_pm_memsize_dmat(int ps, int m, int n);
// create a strmat for a matrix of size m*n by using memory passed by a pointer (pointer is not updated)
void blasfeo_pm_create_dmat(int ps, int m, int n, struct blasfeo_pm_dmat *sA, void *memory);
// print
void blasfeo_pm_print_dmat(int m, int n, struct blasfeo_pm_dmat *sA, int ai, int aj);
/*
* Explicitly panel-major matrix format
*/
// returns the memory size (in bytes) needed for a dmat
size_t blasfeo_cm_memsize_dmat(int m, int n);
// create a strmat for a matrix of size m*n by using memory passed by a pointer (pointer is not updated)
void blasfeo_cm_create_dmat(int m, int n, struct blasfeo_pm_dmat *sA, void *memory);
//
// BLAS API helper functions
//
#if ( defined(BLAS_API) & defined(MF_PANELMAJ) )
// aux
void blasfeo_cm_dgetr(int m, int n, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj);
#endif
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_AUX_H_

View File

@@ -1,145 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
/*
* auxiliary algebra operation external dependancies header
*
* include/blasfeo_d_aux_ext_dep.h
*
* - dynamic memory allocation
* - print
*
*/
#ifndef BLASFEO_D_AUX_EXT_DEP_H_
#define BLASFEO_D_AUX_EXT_DEP_H_
#include <stdio.h>
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef EXT_DEP
/* column-major matrices */
// dynamically allocate row*col doubles of memory and set accordingly a pointer to double; set allocated memory to zero
void d_zeros(double **pA, int row, int col);
// dynamically allocate row*col doubles of memory aligned to 64-byte boundaries and set accordingly a pointer to double; set allocated memory to zero
void d_zeros_align(double **pA, int row, int col);
// dynamically allocate size bytes of memory aligned to 64-byte boundaries and set accordingly a pointer to double; set allocated memory to zero
void d_zeros_align_bytes(double **pA, int size);
// free the memory allocated by d_zeros
void d_free(double *pA);
// free the memory allocated by d_zeros_align or d_zeros_align_bytes
void d_free_align(double *pA);
// print a column-major matrix
void d_print_mat(int m, int n, double *A, int lda);
// print the transposed of a column-major matrix
void d_print_tran_mat(int row, int col, double *A, int lda);
// print to file a column-major matrix
void d_print_to_file_mat(FILE *file, int row, int col, double *A, int lda);
// print to file a column-major matrix in exponential format
void d_print_to_file_exp_mat(FILE *file, int row, int col, double *A, int lda);
// print to string a column-major matrix
void d_print_to_string_mat(char **buf_out, int row, int col, double *A, int lda);
// print to file the transposed of a column-major matrix
void d_print_tran_to_file_mat(FILE *file, int row, int col, double *A, int lda);
// print to file the transposed of a column-major matrix in exponential format
void d_print_tran_to_file_exp_mat(FILE *file, int row, int col, double *A, int lda);
// print in exponential notation a column-major matrix
void d_print_exp_mat(int m, int n, double *A, int lda);
// print in exponential notation the transposed of a column-major matrix
void d_print_exp_tran_mat(int row, int col, double *A, int lda);
/* strmat and strvec */
// create a strmat for a matrix of size m*n by dynamically allocating memory
void blasfeo_allocate_dmat(int m, int n, struct blasfeo_dmat *sA);
// create a strvec for a vector of size m by dynamically allocating memory
void blasfeo_allocate_dvec(int m, struct blasfeo_dvec *sa);
// free the memory allocated by blasfeo_allocate_dmat
void blasfeo_free_dmat(struct blasfeo_dmat *sA);
// free the memory allocated by blasfeo_allocate_dvec
void blasfeo_free_dvec(struct blasfeo_dvec *sa);
// print a strmat
void blasfeo_print_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj);
// print in exponential notation a strmat
void blasfeo_print_exp_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj);
// print to file a strmat
void blasfeo_print_to_file_dmat(FILE *file, int m, int n, struct blasfeo_dmat *sA, int ai, int aj);
// print to file a strmat in exponential format
void blasfeo_print_to_file_exp_dmat(FILE *file, int m, int n, struct blasfeo_dmat *sA, int ai, int aj);
// print to string a strmat
void blasfeo_print_to_string_dmat(char **buf_out, int m, int n, struct blasfeo_dmat *sA, int ai, int aj);
// print the transposed of a strmat
void blasfeo_print_tran_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj);
// print a strvec
void blasfeo_print_dvec(int m, struct blasfeo_dvec *sa, int ai);
// print in exponential notation a strvec
void blasfeo_print_exp_dvec(int m, struct blasfeo_dvec *sa, int ai);
// print to file a strvec
void blasfeo_print_to_file_dvec(FILE *file, int m, struct blasfeo_dvec *sa, int ai);
// print to string a strvec
void blasfeo_print_to_string_dvec(char **buf_out, int m, struct blasfeo_dvec *sa, int ai);
// print the transposed of a strvec
void blasfeo_print_tran_dvec(int m, struct blasfeo_dvec *sa, int ai);
// print in exponential notation the transposed of a strvec
void blasfeo_print_exp_tran_dvec(int m, struct blasfeo_dvec *sa, int ai);
// print to file the transposed of a strvec
void blasfeo_print_to_file_tran_dvec(FILE *file, int m, struct blasfeo_dvec *sa, int ai);
// print to string the transposed of a strvec
void blasfeo_print_to_string_tran_dvec(char **buf_out, int m, struct blasfeo_dvec *sa, int ai);
#endif // EXT_DEP
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_AUX_EXT_DEP_H_

View File

@@ -1,84 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
/*
* auxiliary algebra operation external dependancies header
*
* include/blasfeo_d_aux_ext_dep.h
*
* - dynamic memory allocation
* - print
*
*/
#ifndef BLASFEO_D_AUX_EXT_DEP_REF_H_
#define BLASFEO_D_AUX_EXT_DEP_REF_H_
#include <stdio.h>
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
// expose reference BLASFEO for testing
// see blasfeo_d_aux_exp_dep.h for help
void blasfeo_print_dmat_ref(int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj);
void blasfeo_allocate_dmat_ref(int m, int n, struct blasfeo_dmat_ref *sA);
void blasfeo_allocate_dvec_ref(int m, struct blasfeo_dvec_ref *sa);
void blasfeo_free_dmat_ref(struct blasfeo_dmat_ref *sA);
void blasfeo_free_dvec_ref(struct blasfeo_dvec_ref *sa);
void blasfeo_print_dmat_ref(int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj);
void blasfeo_print_exp_dmat_ref(int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj);
void blasfeo_print_to_file_dmat_ref(FILE *file, int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj);
void blasfeo_print_to_file_exp_dmat_ref(FILE *file, int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj);
void blasfeo_print_to_string_dmat_ref(char **buf_out, int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj);
void blasfeo_print_dvec(int m, struct blasfeo_dvec *sa, int ai);
void blasfeo_print_exp_dvec(int m, struct blasfeo_dvec *sa, int ai);
void blasfeo_print_to_file_dvec(FILE *file, int m, struct blasfeo_dvec *sa, int ai);
void blasfeo_print_to_string_dvec(char **buf_out, int m, struct blasfeo_dvec *sa, int ai);
void blasfeo_print_tran_dvec(int m, struct blasfeo_dvec *sa, int ai);
void blasfeo_print_exp_tran_dvec(int m, struct blasfeo_dvec *sa, int ai);
void blasfeo_print_to_file_tran_dvec(FILE *file, int m, struct blasfeo_dvec *sa, int ai);
void blasfeo_print_to_string_tran_dvec(char **buf_out, int m, struct blasfeo_dvec *sa, int ai);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_AUX_EXT_DEP_REF_H_

View File

@@ -1,75 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
/*
* ----------- TOMOVE
*
* expecting column major matrices
*
*/
#include "blasfeo_common.h"
void dtrcp_l_lib(int m, double alpha, int offsetA, double *A, int sda, int offsetB, double *B, int sdb);
void dgead_lib(int m, int n, double alpha, int offsetA, double *A, int sda, int offsetB, double *B, int sdb);
// TODO remove ???
void ddiain_sqrt_lib(int kmax, double *x, int offset, double *pD, int sdd);
// TODO ddiaad1
void ddiareg_lib(int kmax, double reg, int offset, double *pD, int sdd);
void dgetr_lib(int m, int n, double alpha, int offsetA, double *pA, int sda, int offsetC, double *pC, int sdc);
void dtrtr_l_lib(int m, double alpha, int offsetA, double *pA, int sda, int offsetC, double *pC, int sdc);
void dtrtr_u_lib(int m, double alpha, int offsetA, double *pA, int sda, int offsetC, double *pC, int sdc);
void ddiaex_lib(int kmax, double alpha, int offset, double *pD, int sdd, double *x);
void ddiaad_lib(int kmax, double alpha, double *x, int offset, double *pD, int sdd);
void ddiain_libsp(int kmax, int *idx, double alpha, double *x, double *pD, int sdd);
void ddiaex_libsp(int kmax, int *idx, double alpha, double *pD, int sdd, double *x);
void ddiaad_libsp(int kmax, int *idx, double alpha, double *x, double *pD, int sdd);
void ddiaadin_libsp(int kmax, int *idx, double alpha, double *x, double *y, double *pD, int sdd);
void drowin_lib(int kmax, double alpha, double *x, double *pD);
void drowex_lib(int kmax, double alpha, double *pD, double *x);
void drowad_lib(int kmax, double alpha, double *x, double *pD);
void drowin_libsp(int kmax, double alpha, int *idx, double *x, double *pD);
void drowad_libsp(int kmax, int *idx, double alpha, double *x, double *pD);
void drowadin_libsp(int kmax, int *idx, double alpha, double *x, double *y, double *pD);
void dcolin_lib(int kmax, double *x, int offset, double *pD, int sdd);
void dcolad_lib(int kmax, double alpha, double *x, int offset, double *pD, int sdd);
void dcolin_libsp(int kmax, int *idx, double *x, double *pD, int sdd);
void dcolad_libsp(int kmax, double alpha, int *idx, double *x, double *pD, int sdd);
void dcolsw_lib(int kmax, int offsetA, double *pA, int sda, int offsetC, double *pC, int sdc);
void dvecin_libsp(int kmax, int *idx, double *x, double *y);
void dvecad_libsp(int kmax, int *idx, double alpha, double *x, double *y);

View File

@@ -1,208 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_D_AUX_REF_H_
#define BLASFEO_D_AUX_REF_H_
#include <stdlib.h>
#include "blasfeo_common.h"
#include "blasfeo_d_aux_old.h"
#ifdef __cplusplus
extern "C" {
#endif
// --- memory calculations
//
// returns the memory size (in bytes) needed for a dmat
size_t blasfeo_ref_memsize_dmat(int m, int n);
// returns the memory size (in bytes) needed for the diagonal of a dmat
size_t blasfeo_ref_memsize_diag_dmat(int m, int n);
// returns the memory size (in bytes) needed for a dvec
size_t blasfeo_ref_memsize_dvec(int m);
// --- creation
//
// create a strmat for a matrix of size m*n by using memory passed by a pointer (pointer is not updated)
void blasfeo_ref_create_dmat(int m, int n, struct blasfeo_dmat *sA, void *memory);
// create a strvec for a vector of size m by using memory passed by a pointer (pointer is not updated)
void blasfeo_ref_create_dvec(int m, struct blasfeo_dvec *sA, void *memory);
// --- packing
// pack the column-major matrix A into the matrix struct B
void blasfeo_ref_pack_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sB, int bi, int bj);
// pack the lower-triangular column-major matrix A into the matrix struct B
void blasfeo_ref_pack_l_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sB, int bi, int bj);
// pack the upper-triangular column-major matrix A into the matrix struct B
void blasfeo_ref_pack_u_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sB, int bi, int bj);
// transpose and pack the column-major matrix A into the matrix struct B
void blasfeo_ref_pack_tran_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sB, int bi, int bj);
// pack the vector x into the vector structure y
void blasfeo_ref_pack_dvec(int m, double *x, int xi, struct blasfeo_dvec *sy, int yi);
// unpack the matrix structure A into the column-major matrix B
void blasfeo_ref_unpack_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, double *B, int ldb);
// transpose and unpack the matrix structure A into the column-major matrix B
void blasfeo_ref_unpack_tran_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, double *B, int ldb);
// pack the vector structure x into the vector y
void blasfeo_ref_unpack_dvec(int m, struct blasfeo_dvec *sx, int xi, double *y, int yi);
// --- cast
//
void ref_d_cast_mat2strmat(double *A, struct blasfeo_dmat *sA); // TODO
void ref_d_cast_diag_mat2strmat(double *dA, struct blasfeo_dmat *sA); // TODO
void ref_d_cast_vec2vecmat(double *a, struct blasfeo_dvec *sx); // TODO
// ge
// --- insert/extract
//
// sA[ai, aj] <= a
void blasfeo_ref_dgein1(double a, struct blasfeo_dmat *sA, int ai, int aj);
// <= sA[ai, aj]
double blasfeo_ref_dgeex1(struct blasfeo_dmat *sA, int ai, int aj);
// --- set
// A <= alpha
void blasfeo_ref_dgese(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// --- copy / scale
// B <= A
void blasfeo_ref_dgecp(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// A <= alpha*A
void blasfeo_ref_dgesc(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// B <= alpha*A
void blasfeo_ref_dgecpsc(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// B <= A, A lower triangular
void blasfeo_ref_dtrcp_l(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
void blasfeo_ref_dtrcpsc_l(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
void blasfeo_ref_dtrsc_l(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// --- sum
// B <= B + alpha*A
void blasfeo_ref_dgead(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int yi, int cj);
// y <= y + alpha*x
void blasfeo_ref_dvecad(int m, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi);
// --- traspositions
// B <= A'
void blasfeo_ref_dgetr(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// B <= A', A lower triangular
void blasfeo_ref_dtrtr_l(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// B <= A', A upper triangular
void blasfeo_ref_dtrtr_u(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj);
// dia
// diag(A) += alpha
void blasfeo_ref_ddiare(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// diag(A) <= alpha*x
void blasfeo_ref_ddiain(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
// diag(A)[idx] <= alpha*x
void blasfeo_ref_ddiain_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// x <= diag(A)
void blasfeo_ref_ddiaex(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi);
// x <= diag(A)[idx]
void blasfeo_ref_ddiaex_sp(int kmax, double alpha, int *idx, struct blasfeo_dmat *sD, int di, int dj, struct blasfeo_dvec *sx, int xi);
// diag(A) += alpha*x
void blasfeo_ref_ddiaad(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
// diag(A)[idx] += alpha*x
void blasfeo_ref_ddiaad_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// diag(A)[idx] = y + alpha*x
void blasfeo_ref_ddiaadin_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// row
void blasfeo_ref_drowin(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_ref_drowex(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi);
void blasfeo_ref_drowad(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_ref_drowad_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
void blasfeo_ref_drowsw(int kmax, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int ci, int cj);
void blasfeo_ref_drowpe(int kmax, int *ipiv, struct blasfeo_dmat *sA);
void blasfeo_ref_drowpei(int kmax, int *ipiv, struct blasfeo_dmat *sA);
// col
void blasfeo_ref_dcolex(int kmax, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi);
void blasfeo_ref_dcolin(int kmax, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_ref_dcolad(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_ref_dcolsc(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
void blasfeo_ref_dcolsw(int kmax, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int ci, int cj);
void blasfeo_ref_dcolpe(int kmax, int *ipiv, struct blasfeo_dmat *sA);
void blasfeo_ref_dcolpei(int kmax, int *ipiv, struct blasfeo_dmat *sA);
// vec
// a <= alpha
void blasfeo_ref_dvecse(int m, double alpha, struct blasfeo_dvec *sx, int xi);
// sx[xi] <= a
void blasfeo_ref_dvecin1(double a, struct blasfeo_dvec *sx, int xi);
// <= sx[xi]
double blasfeo_ref_dvecex1(struct blasfeo_dvec *sx, int xi);
// y <= x
void blasfeo_ref_dveccp(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi);
// x <= alpha*x
void blasfeo_ref_dvecsc(int m, double alpha, struct blasfeo_dvec *sx, int xi);
// y <= alpha*x
void blasfeo_ref_dveccpsc(int m, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi);
void blasfeo_ref_dvecad_sp(int m, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dvec *sz, int zi);
void blasfeo_ref_dvecin_sp(int m, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dvec *sz, int zi);
void blasfeo_ref_dvecex_sp(int m, double alpha, int *idx, struct blasfeo_dvec *sx, int x, struct blasfeo_dvec *sz, int zi);
// z += alpha * x[idx]
void blasfeo_ref_dvecexad_sp(int m, double alpha, int *idx, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
void blasfeo_ref_dveccl(int m,
struct blasfeo_dvec *sxm, int xim, struct blasfeo_dvec *sx, int xi,
struct blasfeo_dvec *sxp, int xip, struct blasfeo_dvec *sz, int zi);
void blasfeo_ref_dveccl_mask(int m,
struct blasfeo_dvec *sxm, int xim, struct blasfeo_dvec *sx, int xi,
struct blasfeo_dvec *sxp, int xip, struct blasfeo_dvec *sz, int zi,
struct blasfeo_dvec *sm, int mi);
void blasfeo_ref_dvecze(int m, struct blasfeo_dvec *sm, int mi, struct blasfeo_dvec *sv, int vi, struct blasfeo_dvec *se, int ei);
void blasfeo_ref_dvecnrm_inf(int m, struct blasfeo_dvec *sx, int xi, double *ptr_norm);
void blasfeo_ref_dvecpe(int kmax, int *ipiv, struct blasfeo_dvec *sx, int xi);
void blasfeo_ref_dvecpei(int kmax, int *ipiv, struct blasfeo_dvec *sx, int xi);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_AUX_REF_H_

View File

@@ -1,226 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
/*
* auxiliary algebra operations header
*
* include/blasfeo_aux_lib*.h
*
*/
#ifndef BLASFEO_D_AUX_TEST_H_
#define BLASFEO_D_AUX_TEST_H_
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
// --- memory calculations
int test_blasfeo_memsize_dmat(int m, int n);
int test_blasfeo_memsize_diag_dmat(int m, int n);
int test_blasfeo_memsize_dvec(int m);
// --- creation
void test_blasfeo_create_dmat(int m, int n, struct blasfeo_dmat *sA, void *memory);
void test_blasfeo_create_dvec(int m, struct blasfeo_dvec *sA, void *memory);
// --- conversion
void test_blasfeo_pack_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sA, int ai, int aj);
void test_blasfeo_pack_dvec(int m, double *x, int xi, struct blasfeo_dvec *sa, int ai);
void test_blasfeo_pack_tran_dmat(int m, int n, double *A, int lda, struct blasfeo_dmat *sA, int ai, int aj);
void test_blasfeo_unpack_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, double *A, int lda);
void test_blasfeo_unpack_dvec(int m, struct blasfeo_dvec *sa, int ai, double *x, int xi);
void test_blasfeo_unpack_tran_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, double *A, int lda);
// --- cast
void test_d_cast_mat2strmat(double *A, struct blasfeo_dmat *sA);
void test_d_cast_diag_mat2strmat(double *dA, struct blasfeo_dmat *sA);
void test_d_cast_vec2vecmat(double *a, struct blasfeo_dvec *sa);
// ------ copy / scale
// B <= A
void test_blasfeo_dgecp(int m, int n,
struct blasfeo_dmat *sA, int ai, int aj,
struct blasfeo_dmat *sB, int bi, int bj);
// A <= alpha*A
void test_blasfeo_dgesc(int m, int n,
double alpha,
struct blasfeo_dmat *sA, int ai, int aj);
// B <= alpha*A
void test_blasfeo_dgecpsc(int m, int n,
double alpha,
struct blasfeo_dmat *sA, int ai, int aj,
struct blasfeo_dmat *sB, int bi, int bj);
// // --- insert/extract
// //
// // <= sA[ai, aj]
// void test_blasfeo_dgein1(double a, struct blasfeo_dmat *sA, int ai, int aj);
// // <= sA[ai, aj]
// double blasfeo_dgeex1(struct blasfeo_dmat *sA, int ai, int aj);
// // sx[xi] <= a
// void test_blasfeo_dvecin1(double a, struct blasfeo_dvec *sx, int xi);
// // <= sx[xi]
// double blasfeo_dvecex1(struct blasfeo_dvec *sx, int xi);
// // A <= alpha
// // --- set
// void test_blasfeo_dgese(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// // a <= alpha
// void test_blasfeo_dvecse(int m, double alpha, struct blasfeo_dvec *sx, int xi);
// // B <= A
// // --- vector
// // y <= x
// void test_blasfeo_dveccp(int m, struct blasfeo_dvec *sa, int ai, struct blasfeo_dvec *sc, int ci);
// // x <= alpha*x
// void test_blasfeo_dvecsc(int m, double alpha, struct blasfeo_dvec *sa, int ai);
// // TODO
// // x <= alpha*x
// void test_blasfeo_dveccpsc(int m, double alpha, struct blasfeo_dvec *sa, int ai, struct blasfeo_dvec *sc, int ci);
// // B <= A, A lower triangular
// void test_blasfeo_dtrcp_l(int m,
// struct blasfeo_dmat *sA, int ai, int aj,
// struct blasfeo_dmat *sB, int bi, int bj);
// void test_blasfeo_dtrcpsc_l(int m, double alpha,
// struct blasfeo_dmat *sA, int ai, int aj,
// struct blasfeo_dmat *sB, int bi, int bj);
// void test_blasfeo_dtrsc_l(int m, double alpha,
// struct blasfeo_dmat *sA, int ai, int aj);
// // B <= B + alpha*A
// void test_blasfeo_dgead(int m, int n, double alpha,
// struct blasfeo_dmat *sA, int ai, int aj,
// struct blasfeo_dmat *sC, int ci, int cj);
// // y <= y + alpha*x
// void test_blasfeo_dvecad(int m, double alpha,
// struct blasfeo_dvec *sa, int ai,
// struct blasfeo_dvec *sc, int ci);
// // --- traspositions
// void test_dgetr_lib(int m, int n, double alpha, int offsetA, double *pA, int sda, int offsetC, double *pC, int sdc);
// void test_blasfeo_dgetr(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int ci, int cj);
// void test_dtrtr_l_lib(int m, double alpha, int offsetA, double *pA, int sda, int offsetC, double *pC, int sdc);
// void test_blasfeo_dtrtr_l(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int ci, int cj);
// void test_dtrtr_u_lib(int m, double alpha, int offsetA, double *pA, int sda, int offsetC, double *pC, int sdc);
// void test_blasfeo_dtrtr_u(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int ci, int cj);
// void test_blasfeo_ddiare(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj);
// void test_blasfeo_ddiain(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
// void test_ddiaex_lib(int kmax, double alpha, int offset, double *pD, int sdd, double *x);
// void test_ddiaad_lib(int kmax, double alpha, double *x, int offset, double *pD, int sdd);
// void test_ddiain_libsp(int kmax, int *idx, double alpha, double *x, double *pD, int sdd);
// void test_blasfeo_ddiain_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// void test_ddiaex_libsp(int kmax, int *idx, double alpha, double *pD, int sdd, double *x);
// void test_blasfeo_ddiaex(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi);
// void test_blasfeo_ddiaex_sp(int kmax, double alpha, int *idx, struct blasfeo_dmat *sD, int di, int dj, struct blasfeo_dvec *sx, int xi);
// void test_blasfeo_ddiaad(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
// void test_ddiaad_libsp(int kmax, int *idx, double alpha, double *x, double *pD, int sdd);
// void test_blasfeo_ddiaad_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// void test_ddiaadin_libsp(int kmax, int *idx, double alpha, double *x, double *y, double *pD, int sdd);
// void test_blasfeo_ddiaadin_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// void test_drowin_lib(int kmax, double alpha, double *x, double *pD);
// void test_blasfeo_drowin(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
// void test_drowex_lib(int kmax, double alpha, double *pD, double *x);
// void test_blasfeo_drowex(int kmax, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi);
// void test_drowad_lib(int kmax, double alpha, double *x, double *pD);
// void test_blasfeo_drowad(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
// void test_drowin_libsp(int kmax, double alpha, int *idx, double *x, double *pD);
// void test_drowad_libsp(int kmax, int *idx, double alpha, double *x, double *pD);
// void test_blasfeo_drowad_sp(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dmat *sD, int di, int dj);
// void test_drowadin_libsp(int kmax, int *idx, double alpha, double *x, double *y, double *pD);
// void test_drowsw_lib(int kmax, double *pA, double *pC);
// void test_blasfeo_drowsw(int kmax, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int ci, int cj);
// void test_blasfeo_drowpe(int kmax, int *ipiv, struct blasfeo_dmat *sA);
// void test_blasfeo_dcolex(int kmax, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi);
// void test_dcolin_lib(int kmax, double *x, int offset, double *pD, int sdd);
// void test_blasfeo_dcolin(int kmax, struct blasfeo_dvec *sx, int xi, struct blasfeo_dmat *sA, int ai, int aj);
// void test_dcolad_lib(int kmax, double alpha, double *x, int offset, double *pD, int sdd);
// void test_dcolin_libsp(int kmax, int *idx, double *x, double *pD, int sdd);
// void test_dcolad_libsp(int kmax, double alpha, int *idx, double *x, double *pD, int sdd);
// void test_dcolsw_lib(int kmax, int offsetA, double *pA, int sda, int offsetC, double *pC, int sdc);
// void test_blasfeo_dcolsw(int kmax, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sC, int ci, int cj);
// void test_blasfeo_dcolpe(int kmax, int *ipiv, struct blasfeo_dmat *sA);
// void test_dvecin_libsp(int kmax, int *idx, double *x, double *y);
// void test_dvecad_libsp(int kmax, int *idx, double alpha, double *x, double *y);
// void test_blasfeo_dvecad_sp(int m, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dvec *sz, int zi);
// void test_blasfeo_dvecin_sp(int m, double alpha, struct blasfeo_dvec *sx, int xi, int *idx, struct blasfeo_dvec *sz, int zi);
// void test_blasfeo_dvecex_sp(int m, double alpha, int *idx, struct blasfeo_dvec *sx, int x, struct blasfeo_dvec *sz, int zi);
// void test_blasfeo_dveccl(int m, struct blasfeo_dvec *sxm, int xim, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sxp, int xip, struct blasfeo_dvec *sz, int zi);
// void test_blasfeo_dveccl_mask(int m, struct blasfeo_dvec *sxm, int xim, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sxp, int xip, struct blasfeo_dvec *sz, int zi, struct blasfeo_dvec *sm, int mi);
// void test_blasfeo_dvecze(int m, struct blasfeo_dvec *sm, int mi, struct blasfeo_dvec *sv, int vi, struct blasfeo_dvec *se, int ei);
// void test_blasfeo_dvecnrm_inf(int m, struct blasfeo_dvec *sx, int xi, double *ptr_norm);
// void test_blasfeo_dvecpe(int kmax, int *ipiv, struct blasfeo_dvec *sx, int xi);
// void test_blasfeo_dvecpei(int kmax, int *ipiv, struct blasfeo_dvec *sx, int xi);
// ext_dep
void test_blasfeo_allocate_dmat(int m, int n, struct blasfeo_dmat *sA);
void test_blasfeo_allocate_dvec(int m, struct blasfeo_dvec *sa);
void test_blasfeo_free_dmat(struct blasfeo_dmat *sA);
void test_blasfeo_free_dvec(struct blasfeo_dvec *sa);
void test_blasfeo_print_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj);
void test_blasfeo_print_dvec(int m, struct blasfeo_dvec *sa, int ai);
void test_blasfeo_print_tran_dvec(int m, struct blasfeo_dvec *sa, int ai);
void test_blasfeo_print_to_file_dmat(FILE *file, int m, int n, struct blasfeo_dmat *sA, int ai, int aj);
void test_blasfeo_print_to_file_dvec(FILE *file, int m, struct blasfeo_dvec *sa, int ai);
void test_blasfeo_print_to_file_tran_dvec(FILE *file, int m, struct blasfeo_dvec *sa, int ai);
void test_blasfeo_print_exp_dmat(int m, int n, struct blasfeo_dmat *sA, int ai, int aj);
void test_blasfeo_print_exp_dvec(int m, struct blasfeo_dvec *sa, int ai);
void test_blasfeo_print_exp_tran_dvec(int m, struct blasfeo_dvec *sa, int ai);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_AUX_TEST_H_

View File

@@ -1,46 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_D_BLAS_H_
#define BLASFEO_D_BLAS_H_
#include "blasfeo_d_blasfeo_api.h"
#include "blasfeo_d_blas_api.h"
#endif // BLASFEO_D_BLAS_H_

View File

@@ -1,281 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_D_BLAS_API_H_
#define BLASFEO_D_BLAS_API_H_
#include "blasfeo_target.h"
#ifdef BLAS_API
#ifdef CBLAS_API
#ifndef BLASFEO_CBLAS_ENUM
#define BLASFEO_CBLAS_ENUM
#ifdef FORTRAN_BLAS_API
#ifndef CBLAS_H
enum CBLAS_LAYOUT {CblasRowMajor=101, CblasColMajor=102};
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113};
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
#define CBLAS_ORDER CBLAS_LAYOUT /* this for backward compatibility with CBLAS_ORDER */
#endif // CBLAS_H
#else // FORTRAN_BLAS_API
enum BLASFEO_CBLAS_LAYOUT {BlasfeoCblasRowMajor=101, BlasfeoCblasColMajor=102};
enum BLASFEO_CBLAS_TRANSPOSE {BlasfeoCblasNoTrans=111, BlasfeoCblasTrans=112, BlasfeoCblasConjTrans=113};
enum BLASFEO_CBLAS_UPLO {BlasfeoCblasUpper=121, BlasfeoCblasLower=122};
enum BLASFEO_CBLAS_DIAG {BlasfeoCblasNonUnit=131, BlasfeoCblasUnit=132};
enum BLASFEO_CBLAS_SIDE {BlasfeoCblasLeft=141, BlasfeoCblasRight=142};
#define BLASFEO_CBLAS_ORDER BLASFEO_CBLAS_LAYOUT /* this for backward compatibility with BLASFEO_CBLAS_ORDER */
#endif // FORTRAN_BLAS_API
#endif // BLASFEO_CBLAS_ENUM
#endif // CBLAS_API
#endif // BLAS_API
#ifdef __cplusplus
extern "C" {
#endif
#ifdef BLAS_API
#ifdef FORTRAN_BLAS_API
// BLAS 1
//
void daxpy_(int *n, double *alpha, double *x, int *incx, double *y, int *incy);
//
void dcopy_(int *n, double *x, int *incx, double *y, int *incy);
//
double ddot_(int *n, double *x, int *incx, double *y, int *incy);
// BLAS 2
//
void dgemv_(char *tran, int *m, int *n, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy);
//
void dsymv_(char *uplo, int *n, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy);
//
void dger_(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *A, int *lda);
// BLAS 3
//
void dgemm_(char *ta, char *tb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc);
//
void dsyrk_(char *uplo, char *ta, int *m, int *k, double *alpha, double *A, int *lda, double *beta, double *C, int *ldc);
//
void dtrmm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb);
//
void dtrsm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb);
//
void dsyr2k_(char *uplo, char *ta, int *m, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc);
// LAPACK
//
void dgesv_(int *m, int *n, double *A, int *lda, int *ipiv, double *B, int *ldb, int *info);
//
void dgetrf_(int *m, int *n, double *A, int *lda, int *ipiv, int *info);
//
void dgetrf_np_(int *m, int *n, double *A, int *lda, int *info);
//
void dgetrs_(char *trans, int *m, int *n, double *A, int *lda, int *ipiv, double *B, int *ldb, int *info);
//
void dlaswp_(int *n, double *A, int *lda, int *k1, int *k2, int *ipiv, int *incx);
//
void dposv_(char *uplo, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info);
//
void dpotrf_(char *uplo, int *m, double *A, int *lda, int *info);
//
void dpotrs_(char *uplo, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info);
//
void dtrtrs_(char *uplo, char *trans, char *diag, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info);
// aux
void dgetr_(int *m, int *n, double *A, int *lda, double *B, int *ldb);
#ifdef CBLAS_API
// CBLAS 1
//
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY);
//
void cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY);
//
void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY);
// CBLAS 2
//
void cblas_dgemv(const enum CBLAS_LAYOUT layout, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY);
// CBLAS 3
//
void cblas_dgemm(const enum CBLAS_LAYOUT layout, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc);
//
void cblas_dsyrk(const enum CBLAS_LAYOUT layout, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc);
//
void cblas_dtrmm(const enum CBLAS_LAYOUT layout, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb);
//
void cblas_dtrsm(const enum CBLAS_LAYOUT layout, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb);
#endif // CBLAS_API
#else // BLASFEO_API
// BLAS 1
//
void blasfeo_blas_daxpy(int *n, double *alpha, double *x, int *incx, double *y, int *incy);
//
double blasfeo_blas_ddot(int *n, double *x, int *incx, double *y, int *incy);
//
void blasfeo_blas_dcopy(int *n, double *x, int *incx, double *y, int *incy);
// BLAS 2
//
void blasfeo_blas_dgemv(char *trans, int *m, int *n, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy);
//
void blasfeo_blas_dsymv(char *uplo, int *n, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy);
//
void blasfeo_blas_dger(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *A, int *lda);
// BLAS 3
//
void blasfeo_blas_dgemm(char *ta, char *tb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc);
//
void blasfeo_blas_dsyrk(char *uplo, char *ta, int *m, int *k, double *alpha, double *A, int *lda, double *beta, double *C, int *ldc);
//
void blasfeo_blas_dtrmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb);
//
void blasfeo_blas_dtrsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb);
//
void blasfeo_blas_dsyr2k(char *uplo, char *ta, int *m, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc);
// LAPACK
//
void blasfeo_lapack_dgesv(int *m, int *n, double *A, int *lda, int *ipiv, double *B, int *ldb, int *info);
//
void blasfeo_lapack_dgetrf(int *m, int *n, double *A, int *lda, int *ipiv, int *info);
//
void blasfeo_lapack_dgetrf_np(int *m, int *n, double *A, int *lda, int *info);
//
void blasfeo_lapack_dgetrs(char *trans, int *m, int *n, double *A, int *lda, int *ipiv, double *B, int *ldb, int *info);
//
void blasfeo_lapack_dlaswp(int *n, double *A, int *lda, int *k1, int *k2, int *ipiv, int *incx);
//
void blasfeo_lapack_dposv(char *uplo, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info);
//
void blasfeo_lapack_dpotrf(char *uplo, int *m, double *A, int *lda, int *info);
//
void blasfeo_lapack_dpotrs(char *uplo, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info);
//
void blasfeo_lapack_dtrtrs(char *uplo, char *trans, char *diag, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info);
// aux
void blasfeo_blas_dgetr(int *m, int *n, double *A, int *lda, double *B, int *ldb);
#ifdef CBLAS_API
// CBLAS 1
//
void blasfeo_cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY);
//
void blasfeo_cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY);
//
void blasfeo_cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY);
// CBLAS 2
//
void blasfeo_cblas_dgemv(const enum BLASFEO_CBLAS_LAYOUT layout, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY);
// CBLAS 3
//
void blasfeo_cblas_dgemm(const enum BLASFEO_CBLAS_LAYOUT layout, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc);
//
void blasfeo_cblas_dsyrk(const enum BLASFEO_CBLAS_LAYOUT layout, const enum BLASFEO_CBLAS_UPLO Uplo, const enum BLASFEO_CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc);
//
void blasfeo_cblas_dtrmm(const enum BLASFEO_CBLAS_LAYOUT layout, const enum BLASFEO_CBLAS_SIDE Side, const enum BLASFEO_CBLAS_UPLO Uplo, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb);
//
void blasfeo_cblas_dtrsm(const enum BLASFEO_CBLAS_LAYOUT layout, const enum BLASFEO_CBLAS_SIDE Side, const enum BLASFEO_CBLAS_UPLO Uplo, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb);
#endif // CBLAS_API
#endif // BLASFEO_API
#endif // BLAS_API
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_BLAS_API_H_

View File

@@ -1,364 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_D_BLASFEO_API_H_
#define BLASFEO_D_BLASFEO_API_H_
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
//
// level 1 BLAS
//
// z = y + alpha*x
// z[zi:zi+n] = alpha*x[xi:xi+n] + y[yi:yi+n]
// NB: Different arguments semantic compare to equivalent standard BLAS routine
void blasfeo_daxpy(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z = beta*y + alpha*x
void blasfeo_daxpby(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z = x .* y
void blasfeo_dvecmul(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z += x .* y
void blasfeo_dvecmulacc(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z = x .* y, return sum(z) = x^T * y
double blasfeo_dvecmuldot(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// return x^T * y
double blasfeo_ddot(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi);
// construct givens plane rotation
void blasfeo_drotg(double a, double b, double *c, double *s);
// apply plane rotation [a b] [c -s; s; c] to the aj0 and aj1 columns of A at row index ai
void blasfeo_dcolrot(int m, struct blasfeo_dmat *sA, int ai, int aj0, int aj1, double c, double s);
// apply plane rotation [c s; -s c] [a; b] to the ai0 and ai1 rows of A at column index aj
void blasfeo_drowrot(int m, struct blasfeo_dmat *sA, int ai0, int ai1, int aj, double c, double s);
//
// level 2 BLAS
//
// dense
// z <= beta * y + alpha * A * x
void blasfeo_dgemv_n(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z <= beta * y + alpha * A^T * x
void blasfeo_dgemv_t(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A ) * x, A (m)x(n)
void blasfeo_dtrsv_lnn_mn(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A^T ) * x, A (m)x(n)
void blasfeo_dtrsv_ltn_mn(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A ) * x, A (m)x(m) lower, not_transposed, not_unit
void blasfeo_dtrsv_lnn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A ) * x, A (m)x(m) lower, not_transposed, unit
void blasfeo_dtrsv_lnu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A^T ) * x, A (m)x(m) lower, transposed, not_unit
void blasfeo_dtrsv_ltn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A^T ) * x, A (m)x(m) lower, transposed, unit
void blasfeo_dtrsv_ltu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A^T ) * x, A (m)x(m) upper, not_transposed, not_unit
void blasfeo_dtrsv_unn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A^T ) * x, A (m)x(m) upper, transposed, not_unit
void blasfeo_dtrsv_utn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A * x ; A lower triangular
void blasfeo_dtrmv_lnn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A * x ; A lower triangular, unit diagonal
void blasfeo_dtrmv_lnu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A^T * x ; A lower triangular
void blasfeo_dtrmv_ltn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A^T * x ; A lower triangular, unit diagonal
void blasfeo_dtrmv_ltu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= beta * y + alpha * A * x ; A upper triangular
void blasfeo_dtrmv_unn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A^T * x ; A upper triangular
void blasfeo_dtrmv_utn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z_n <= beta_n * y_n + alpha_n * A * x_n
// z_t <= beta_t * y_t + alpha_t * A^T * x_t
void blasfeo_dgemv_nt(int m, int n, double alpha_n, double alpha_t, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx_n, int xi_n, struct blasfeo_dvec *sx_t, int xi_t, double beta_n, double beta_t, struct blasfeo_dvec *sy_n, int yi_n, struct blasfeo_dvec *sy_t, int yi_t, struct blasfeo_dvec *sz_n, int zi_n, struct blasfeo_dvec *sz_t, int zi_t);
// z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed
void blasfeo_dsymv_l(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
void blasfeo_dsymv_l_mn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z <= beta * y + alpha * A * x, where A is symmetric and only the upper triangular patr of A is accessed
void blasfeo_dsymv_u(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// D = C + alpha * x * y^T
void blasfeo_dger(int m, int n, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// diagonal
// z <= beta * y + alpha * A * x, A diagonal
void blasfeo_dgemv_d(int m, double alpha, struct blasfeo_dvec *sA, int ai, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
//
// level 3 BLAS
//
// dense
// D <= beta * C + alpha * A * B
void blasfeo_dgemm_nn(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T
void blasfeo_dgemm_nt(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B
void blasfeo_dgemm_tn(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B^T
void blasfeo_dgemm_tt(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T ; C, D lower triangular
void blasfeo_dsyrk_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
void blasfeo_dsyrk_ln_mn(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
#if ( defined(LA_HIGH_PERFORMANCE) & defined(MF_COLMAJ) )
void blasfeo_dsyrk3_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
#endif
// D <= beta * C + alpha * A^T * B ; C, D lower triangular
void blasfeo_dsyrk_lt(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
#if ( defined(LA_HIGH_PERFORMANCE) & defined(MF_COLMAJ) )
void blasfeo_dsyrk3_lt(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
#endif
// D <= beta * C + alpha * A * B^T ; C, D upper triangular
void blasfeo_dsyrk_un(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
#if ( defined(LA_HIGH_PERFORMANCE) & defined(MF_COLMAJ) )
void blasfeo_dsyrk3_un(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
#endif
// D <= beta * C + alpha * A^T * B ; C, D upper triangular
void blasfeo_dsyrk_ut(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
#if ( defined(LA_HIGH_PERFORMANCE) & defined(MF_COLMAJ) )
void blasfeo_dsyrk3_ut(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
#endif
// D <= alpha * A * B ; A lower triangular
void blasfeo_dtrmm_llnn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A * B ; A lower triangular
void blasfeo_dtrmm_llnu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^T * B ; A lower triangular
void blasfeo_dtrmm_lltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^T * B ; A lower triangular
void blasfeo_dtrmm_lltu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A * B ; A upper triangular
void blasfeo_dtrmm_lunn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A * B ; A upper triangular
void blasfeo_dtrmm_lunu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^T * B ; A upper triangular
void blasfeo_dtrmm_lutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^T * B ; A upper triangular
void blasfeo_dtrmm_lutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A ; A lower triangular
void blasfeo_dtrmm_rlnn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A ; A lower triangular
void blasfeo_dtrmm_rlnu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^T ; A lower triangular
void blasfeo_dtrmm_rltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^T ; A lower triangular
void blasfeo_dtrmm_rltu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A ; A upper triangular
void blasfeo_dtrmm_runn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A ; A upper triangular
void blasfeo_dtrmm_runu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^T ; A upper triangular
void blasfeo_dtrmm_rutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^T ; A upper triangular
void blasfeo_dtrmm_rutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A lower triangular employint explicit inverse of diagonal
// D <= alpha * A^{-1} * B , with A lower triangular employing explicit inverse of diagonal
void blasfeo_dtrsm_llnn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A lower triangular with unit diagonal
void blasfeo_dtrsm_llnu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A lower triangular employing explicit inverse of diagonal
void blasfeo_dtrsm_lltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A lower triangular with unit diagonal
void blasfeo_dtrsm_lltu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A upper triangular employing explicit inverse of diagonal
void blasfeo_dtrsm_lunn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A upper triangular with unit diagonal
void blasfeo_dtrsm_lunu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A upper triangular employing explicit inverse of diagonal
void blasfeo_dtrsm_lutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A upper triangular with unit diagonal
void blasfeo_dtrsm_lutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A lower triangular employing explicit inverse of diagonal
void blasfeo_dtrsm_rlnn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A lower triangular with unit diagonal
void blasfeo_dtrsm_rlnu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A lower triangular employing explicit inverse of diagonal
void blasfeo_dtrsm_rltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A lower triangular with unit diagonal
void blasfeo_dtrsm_rltu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A upper triangular employing explicit inverse of diagonal
void blasfeo_dtrsm_runn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A upper triangular with unit diagonal
void blasfeo_dtrsm_runu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A upper triangular employing explicit inverse of diagonal
void blasfeo_dtrsm_rutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A upper triangular with unit diagonal
void blasfeo_dtrsm_rutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T + alpha * B * A^T; C, D lower triangular
void blasfeo_dsyr2k_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B + alpha * B^T * A; C, D lower triangular
void blasfeo_dsyr2k_lt(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T + alpha * B * A^T; C, D upper triangular
void blasfeo_dsyr2k_un(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B + alpha * B^T * A; C, D upper triangular
void blasfeo_dsyr2k_ut(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// diagonal
// D <= alpha * A * B + beta * C, with A diagonal (stored as strvec)
void dgemm_diag_left_lib(int m, int n, double alpha, double *dA, double *pB, int sdb, double beta, double *pC, int sdc, double *pD, int sdd);
void blasfeo_dgemm_dn(int m, int n, double alpha, struct blasfeo_dvec *sA, int ai, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A * B + beta * C, with B diagonal (stored as strvec)
void blasfeo_dgemm_nd(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sB, int bi, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
//
// LAPACK
//
// D <= chol( C ) ; C, D lower triangular
void blasfeo_dpotrf_l(int m, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
void blasfeo_dpotrf_l_mn(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= chol( C ) ; C, D upper triangular
void blasfeo_dpotrf_u(int m, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= chol( C + A * B' ) ; C, D lower triangular
// D <= chol( C + A * B^T ) ; C, D lower triangular
void blasfeo_dsyrk_dpotrf_ln(int m, int k, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
void blasfeo_dsyrk_dpotrf_ln_mn(int m, int n, int k, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= lu( C ) ; no pivoting
void blasfeo_dgetrf_np(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= lu( C ) ; row pivoting
void blasfeo_dgetrf_rp(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, int *ipiv);
// D <= qr( C )
int blasfeo_dgeqrf_worksize(int m, int n); // in bytes
void blasfeo_dgeqrf(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, void *work);
// D <= Q factor, where C is the output of the LQ factorization
int blasfeo_dorglq_worksize(int m, int n, int k); // in bytes
void blasfeo_dorglq(int m, int n, int k, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, void *work);
// D <= lq( C )
void blasfeo_dgelqf(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, void *work);
int blasfeo_dgelqf_worksize(int m, int n); // in bytes
// D <= lq( C ), positive diagonal elements
void blasfeo_dgelqf_pd(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, void *work);
// [L, A] <= lq( [L, A] ), positive diagonal elements, array of matrices, with
// L lower triangular, of size (m)x(m)
// A full, of size (m)x(n1)
void blasfeo_dgelqf_pd_la(int m, int n1, struct blasfeo_dmat *sL, int li, int lj, struct blasfeo_dmat *sA, int ai, int aj, void *work);
// [L, L, A] <= lq( [L, L, A] ), positive diagonal elements, array of matrices, with:
// L lower triangular, of size (m)x(m)
// A full, of size (m)x(n1)
void blasfeo_dgelqf_pd_lla(int m, int n1, struct blasfeo_dmat *sL0, int l0i, int l0j, struct blasfeo_dmat *sL1, int l1i, int l1j, struct blasfeo_dmat *sA, int ai, int aj, void *work);
//
// BLAS API helper functions
//
#if ( defined(BLAS_API) & defined(MF_PANELMAJ) )
// BLAS 3
void blasfeo_cm_dgemm_nn(int m, int n, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dgemm_nt(int m, int n, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dgemm_tn(int m, int n, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dgemm_tt(int m, int n, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyrk_ln(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyrk_lt(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyrk_un(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyrk_ut(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyrk3_ln(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyrk3_lt(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyrk3_un(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyrk3_ut(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_llnn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_llnu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_lltn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_lltu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_lunn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_lunu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_lutn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_lutu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_rlnn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_rlnu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_rltn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_rltu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_runn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_runu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_rutn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrsm_rutu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_llnn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_llnu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_lltn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_lltu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_lunn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_lunu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_lutn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_lutu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_rlnn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_rlnu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_rltn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_rltu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_runn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_runu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_rutn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dtrmm_rutu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyr2k_ln(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyr2k_lt(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyr2k_un(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dsyr2k_ut(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
// BLAS 2
void blasfeo_cm_dgemv_n(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dvec *sx, int xi, double beta, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dvec *sz, int zi);
void blasfeo_cm_dgemv_t(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dvec *sx, int xi, double beta, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dvec *sz, int zi);
void blasfeo_cm_dsymv_l(int m, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dvec *sx, int xi, double beta, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dvec *sz, int zi);
void blasfeo_cm_dsymv_u(int m, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dvec *sx, int xi, double beta, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dvec *sz, int zi);
void blasfeo_cm_dger(int m, int n, double alpha, struct blasfeo_cm_dvec *sx, int xi, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
// LAPACK
void blasfeo_cm_dpotrf_l(int m, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dpotrf_u(int m, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj);
void blasfeo_cm_dgetrf_rp(int m, int n, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj, int *ipiv);
#endif
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_BLASFEO_API_H_

View File

@@ -1,147 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_D_BLASFEO_API_REF_H_
#define BLASFEO_D_BLASFEO_API_REF_H_
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
// expose reference BLASFEO for testing
// --- level 1
void blasfeo_daxpy_ref(int kmax, double alpha, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sy, int yi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_daxpby_ref(int kmax, double alpha, struct blasfeo_dvec_ref *sx, int xi, double beta, struct blasfeo_dvec_ref *sy, int yi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dvecmul_ref(int m, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sy, int yi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dvecmulacc_ref(int m, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sy, int yi, struct blasfeo_dvec_ref *sz, int zi);
double blasfeo_dvecmuldot_ref(int m, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sy, int yi, struct blasfeo_dvec_ref *sz, int zi);
double blasfeo_ddot_ref(int m, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sy, int yi);
void blasfeo_drotg_ref(double a, double b, double *c, double *s);
void blasfeo_dcolrot_ref(int m, struct blasfeo_dmat_ref *sA, int ai, int aj0, int aj1, double c, double s);
void blasfeo_drowrot_ref(int m, struct blasfeo_dmat_ref *sA, int ai0, int ai1, int aj, double c, double s);
// --- level 2
// dense
void blasfeo_dgemv_n_ref(int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, double beta, struct blasfeo_dvec_ref *sy, int yi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dgemv_t_ref(int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, double beta, struct blasfeo_dvec_ref *sy, int yi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrsv_lnn_mn_ref(int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrsv_ltn_mn_ref(int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrsv_lnn_ref(int m, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrsv_lnu_ref(int m, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrsv_ltn_ref(int m, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrsv_ltu_ref(int m, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrsv_unn_ref(int m, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrsv_utn_ref(int m, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrmv_unn_ref(int m, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrmv_utn_ref(int m, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrmv_lnn_ref(int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrmv_ltn_ref(int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrmv_lnu_ref(int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dtrmv_ltu_ref(int m, int n, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, struct blasfeo_dvec_ref *sz, int zi);
void blasfeo_dgemv_nt_ref(int m, int n, double alpha_n, double alpha_t, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx_n, int xi_n, struct blasfeo_dvec_ref *sx_t, int xi_t, double beta_n, double beta_t, struct blasfeo_dvec_ref *sy_n, int yi_n, struct blasfeo_dvec_ref *sy_t, int yi_t, struct blasfeo_dvec_ref *sz_n, int zi_n, struct blasfeo_dvec_ref *sz_t, int zi_t);
void blasfeo_dsymv_l_ref(int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sx, int xi, double beta, struct blasfeo_dvec_ref *sy, int yi, struct blasfeo_dvec_ref *sz, int zi);
// diagonal
void blasfeo_dgemv_d_ref(int m, double alpha, struct blasfeo_dvec_ref *sA, int ai, struct blasfeo_dvec_ref *sx, int xi, double beta, struct blasfeo_dvec_ref *sy, int yi, struct blasfeo_dvec_ref *sz, int zi);
// --- level 3
// dense
void blasfeo_dgemm_nn_ref( int m, int n, int k, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dgemm_nt_ref( int m, int n, int k, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dgemm_tn_ref(int m, int n, int k, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dgemm_tt_ref(int m, int n, int k, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dsyrk_ln_mn_ref( int m, int n, int k, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dsyrk_ln_ref( int m, int k, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dsyrk_lt_ref( int m, int k, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dsyrk_un_ref( int m, int k, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dsyrk_ut_ref( int m, int k, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrmm_rutn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrmm_rlnn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_lunu_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_lunn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_lutu_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_lutn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_llnu_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_llnn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_lltu_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_lltn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_runu_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_runn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_rutu_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_rutn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_rlnu_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_rlnn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_rltu_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dtrsm_rltn_ref( int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sD, int di, int dj);
// diagonal
void dgemm_diag_left_lib_ref(int m, int n, double alpha, double *dA, double *pB, int sdb, double beta, double *pC, int sdc, double *pD, int sdd);
void blasfeo_dgemm_dn_ref(int m, int n, double alpha, struct blasfeo_dvec_ref *sA, int ai, struct blasfeo_dmat_ref *sB, int bi, int bj, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dgemm_nd_ref(int m, int n, double alpha, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dvec_ref *sB, int bi, double beta, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
// --- lapack
void blasfeo_dgetrf_nopivot_ref(int m, int n, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dgetrf_rowpivot_ref(int m, int n, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj, int *ipiv);
void blasfeo_dpotrf_l_ref(int m, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dpotrf_l_mn_ref(int m, int n, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dsyrk_dpotrf_ln_ref(int m, int k, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dsyrk_dpotrf_ln_mn_ref(int m, int n, int k, struct blasfeo_dmat_ref *sA, int ai, int aj, struct blasfeo_dmat_ref *sB, int bi, int bj, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dgetrf_nopivot_ref(int m, int n, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj);
void blasfeo_dgetrf_rowpivot_ref(int m, int n, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj, int *ipiv);
void blasfeo_dgeqrf_ref(int m, int n, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj, void *work);
void blasfeo_dgelqf_ref(int m, int n, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj, void *work);
void blasfeo_dgelqf_pd_ref(int m, int n, struct blasfeo_dmat_ref *sC, int ci, int cj, struct blasfeo_dmat_ref *sD, int di, int dj, void *work);
void blasfeo_dgelqf_pd_la_ref(int m, int n1, struct blasfeo_dmat_ref *sL, int li, int lj, struct blasfeo_dmat_ref *sA, int ai, int aj, void *work);
void blasfeo_dgelqf_pd_lla_ref(int m, int n1, struct blasfeo_dmat_ref *sL0, int l0i, int l0j, struct blasfeo_dmat_ref *sL1, int l1i, int l1j, struct blasfeo_dmat_ref *sA, int ai, int aj, void *work);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_BLASFEO_API_REF_H_

View File

@@ -1,84 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_D_BLASFEO_HP_API_H_
#define BLASFEO_D_BLASFEO_HP_API_H_
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
//
// level 3 BLAS
//
// dense
// D <= beta * C + alpha * A^T * B
void blasfeo_hp_dgemm_tn(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T; C, D lower triangular
void blasfeo_hp_dsyrk_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * A^T ; C, D lower triangular
void blasfeo_hp_dsyrk3_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A lower triangular
void blasfeo_hp_dtrsm_rltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
//
// level 2 BLAS
//
// dense
// z <= beta * y + alpha * A * x
void blasfeo_hp_dgemv_n(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_BLASFEO_HP_API_H_

View File

@@ -1,283 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_D_BLASFEO_REF_API_H_
#define BLASFEO_D_BLASFEO_REF_API_H_
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
//
// level 1 BLAS
//
// z = y + alpha*x
// z[zi:zi+n] = alpha*x[xi:xi+n] + y[yi:yi+n]
// NB: Different arguments semantic compare to equivalent standard BLAS routine
void blasfeo_ref_daxpy(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z = beta*y + alpha*x
void blasfeo_ref_daxpby(int kmax, double alpha, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z = x .* y
void blasfeo_ref_dvecmul(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z += x .* y
void blasfeo_ref_dvecmulacc(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z = x .* y, return sum(z) = x^T * y
double blasfeo_ref_dvecmuldot(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// return x^T * y
double blasfeo_ref_ddot(int m, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi);
// construct givens plane rotation
void blasfeo_ref_drotg(double a, double b, double *c, double *s);
// apply plane rotation [a b] [c -s; s; c] to the aj0 and aj1 columns of A at row index ai
void blasfeo_ref_dcolrot(int m, struct blasfeo_dmat *sA, int ai, int aj0, int aj1, double c, double s);
// apply plane rotation [c s; -s c] [a; b] to the ai0 and ai1 rows of A at column index aj
void blasfeo_ref_drowrot(int m, struct blasfeo_dmat *sA, int ai0, int ai1, int aj, double c, double s);
//
// level 2 BLAS
//
// dense
// z <= beta * y + alpha * A * x
void blasfeo_ref_dgemv_n(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z <= beta * y + alpha * A' * x
void blasfeo_ref_dgemv_t(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A ) * x, A (m)x(n)
void blasfeo_ref_dtrsv_lnn_mn(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(n)
void blasfeo_ref_dtrsv_ltn_mn(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A ) * x, A (m)x(m) lower, not_transposed, not_unit
void blasfeo_ref_dtrsv_lnn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A ) * x, A (m)x(m) lower, not_transposed, unit
void blasfeo_ref_dtrsv_lnu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) lower, transposed, not_unit
void blasfeo_ref_dtrsv_ltn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) lower, transposed, unit
void blasfeo_ref_dtrsv_ltu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) upper, not_transposed, not_unit
void blasfeo_ref_dtrsv_unn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) upper, transposed, not_unit
void blasfeo_ref_dtrsv_utn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A * x ; A lower triangular
void blasfeo_ref_dtrmv_lnn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A * x ; A lower triangular, unit diagonal
void blasfeo_ref_dtrmv_lnu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A' * x ; A lower triangular
void blasfeo_ref_dtrmv_ltn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A' * x ; A lower triangular, unit diagonal
void blasfeo_ref_dtrmv_ltu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= beta * y + alpha * A * x ; A upper triangular
void blasfeo_ref_dtrmv_unn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z <= A' * x ; A upper triangular
void blasfeo_ref_dtrmv_utn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi);
// z_n <= beta_n * y_n + alpha_n * A * x_n
// z_t <= beta_t * y_t + alpha_t * A' * x_t
void blasfeo_ref_dgemv_nt(int m, int n, double alpha_n, double alpha_t, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx_n, int xi_n, struct blasfeo_dvec *sx_t, int xi_t, double beta_n, double beta_t, struct blasfeo_dvec *sy_n, int yi_n, struct blasfeo_dvec *sy_t, int yi_t, struct blasfeo_dvec *sz_n, int zi_n, struct blasfeo_dvec *sz_t, int zi_t);
// z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed
void blasfeo_ref_dsymv_l(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
void blasfeo_ref_dsymv_l_mn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// z <= beta * y + alpha * A * x, where A is symmetric and only the upper triangular patr of A is accessed
void blasfeo_ref_dsymv_u(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
// D = C + alpha * x * y^T
void blasfeo_ref_dger(int m, int n, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// diagonal
// z <= beta * y + alpha * A * x, A diagonal
void blasfeo_ref_dgemv_d(int m, double alpha, struct blasfeo_dvec *sA, int ai, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi);
//
// level 3 BLAS
//
// dense
// D <= beta * C + alpha * A * B
void blasfeo_ref_dgemm_nn(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T
void blasfeo_ref_dgemm_nt(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B
void blasfeo_ref_dgemm_tn(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B^T
void blasfeo_ref_dgemm_tt(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T ; C, D lower triangular
void blasfeo_ref_dsyrk_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
void blasfeo_ref_dsyrk_ln_mn(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B ; C, D lower triangular
void blasfeo_ref_dsyrk_lt(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T ; C, D upper triangular
void blasfeo_ref_dsyrk_un(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B ; C, D upper triangular
void blasfeo_ref_dsyrk_ut(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A * B ; A lower triangular
void blasfeo_ref_dtrmm_llnn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A * B ; A lower triangular
void blasfeo_ref_dtrmm_llnu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^T * B ; A lower triangular
void blasfeo_ref_dtrmm_lltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^T * B ; A lower triangular
void blasfeo_ref_dtrmm_lltu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A * B ; A upper triangular
void blasfeo_ref_dtrmm_lunn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A * B ; A upper triangular
void blasfeo_ref_dtrmm_lunu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^T * B ; A upper triangular
void blasfeo_ref_dtrmm_lutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^T * B ; A upper triangular
void blasfeo_ref_dtrmm_lutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A ; A lower triangular
void blasfeo_ref_dtrmm_rlnn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A ; A lower triangular
void blasfeo_ref_dtrmm_rlnu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^T ; A lower triangular
void blasfeo_ref_dtrmm_rltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^T ; A lower triangular
void blasfeo_ref_dtrmm_rltu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A ; A upper triangular
void blasfeo_ref_dtrmm_runn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A ; A upper triangular
void blasfeo_ref_dtrmm_runu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^T ; A upper triangular
void blasfeo_ref_dtrmm_rutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^T ; A upper triangular
void blasfeo_ref_dtrmm_rutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A lower triangular employint explicit inverse of diagonal
void blasfeo_ref_dtrsm_llnn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A lower triangular with unit diagonal
void blasfeo_ref_dtrsm_llnu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A lower triangular employint explicit inverse of diagonal
void blasfeo_ref_dtrsm_lltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A lower triangular with unit diagonal
void blasfeo_ref_dtrsm_lltu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A upper triangular employing explicit inverse of diagonal
void blasfeo_ref_dtrsm_lunn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A upper triangular withunit diagonal
void blasfeo_ref_dtrsm_lunu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A upper triangular employing explicit inverse of diagonal
void blasfeo_ref_dtrsm_lutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A upper triangular withunit diagonal
void blasfeo_ref_dtrsm_lutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A lower triangular employing explicit inverse of diagonal
void blasfeo_ref_dtrsm_rlnn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A lower triangular with unit diagonal
void blasfeo_ref_dtrsm_rlnu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A lower triangular employing explicit inverse of diagonal
void blasfeo_ref_dtrsm_rltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A lower triangular with unit diagonal
void blasfeo_ref_dtrsm_rltu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A upper triangular employing explicit inverse of diagonal
void blasfeo_ref_dtrsm_runn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A upper triangular with unit diagonal
void blasfeo_ref_dtrsm_runu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A upper triangular employing explicit inverse of diagonal
void blasfeo_ref_dtrsm_rutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A upper triangular with unit diagonal
void blasfeo_ref_dtrsm_rutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T + alpha * B * A^T ; C, D lower triangular
void blasfeo_ref_dsyr2k_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B + alpha * B^T * A ; C, D lower triangular
void blasfeo_ref_dsyr2k_lt(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T + alpha * B * A^T ; C, D upper triangular
void blasfeo_ref_dsyr2k_un(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B + alpha * B^T * A ; C, D upper triangular
void blasfeo_ref_dsyr2k_ut(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// diagonal
// D <= alpha * A * B + beta * C, with A diagonal (stored as strvec)
void dgemm_diag_left_lib(int m, int n, double alpha, double *dA, double *pB, int sdb, double beta, double *pC, int sdc, double *pD, int sdd);
void blasfeo_ref_dgemm_dn(int m, int n, double alpha, struct blasfeo_dvec *sA, int ai, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= alpha * A * B + beta * C, with B diagonal (stored as strvec)
void blasfeo_ref_dgemm_nd(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sB, int bi, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
//
// LAPACK
//
// D <= chol( C ) ; C, D lower triangular
void blasfeo_ref_dpotrf_l(int m, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
void blasfeo_ref_dpotrf_l_mn(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= chol( C ) ; C, D upper triangular
void blasfeo_ref_dpotrf_u(int m, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= chol( C + A * B' ) ; C, D lower triangular
void blasfeo_ref_dsyrk_dpotrf_ln(int m, int k, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
void blasfeo_ref_dsyrk_dpotrf_ln_mn(int m, int n, int k, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= lu( C ) ; no pivoting
void blasfeo_ref_dgetrf_np(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj);
// D <= lu( C ) ; row pivoting
void blasfeo_ref_dgetrf_rp(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, int *ipiv);
// D <= qr( C )
int blasfeo_ref_dgeqrf_worksize(int m, int n); // in bytes
void blasfeo_ref_dgeqrf(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, void *work);
// D <= Q factor, where C is the output of the LQ factorization
int blasfeo_ref_dorglq_worksize(int m, int n, int k); // in bytes
void blasfeo_ref_dorglq(int m, int n, int k, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, void *work);
// D <= lq( C )
void blasfeo_ref_dgelqf(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, void *work);
int blasfeo_ref_dgelqf_worksize(int m, int n); // in bytes
// D <= lq( C ), positive diagonal elements
void blasfeo_ref_dgelqf_pd(int m, int n, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj, void *work);
// [L, A] <= lq( [L, A] ), positive diagonal elements, array of matrices, with
// L lower triangular, of size (m)x(m)
// A full, of size (m)x(n1)
void blasfeo_ref_dgelqf_pd_la(int m, int n1, struct blasfeo_dmat *sL, int li, int lj, struct blasfeo_dmat *sA, int ai, int aj, void *work);
// [L, L, A] <= lq( [L, L, A] ), positive diagonal elements, array of matrices, with:
// L lower triangular, of size (m)x(m)
// A full, of size (m)x(n1)
void blasfeo_ref_dgelqf_pd_lla(int m, int n1, struct blasfeo_dmat *sL0, int l0i, int l0j, struct blasfeo_dmat *sL1, int l1i, int l1j, struct blasfeo_dmat *sA, int ai, int aj, void *work);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_D_BLASFEO_REF_API_H_

File diff suppressed because it is too large Load Diff

View File

@@ -1,69 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_I_AUX_EXT_DEP_H_
#define BLASFEO_I_AUX_EXT_DEP_H_
#include "blasfeo_target.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef EXT_DEP
// i_aux_extern_depend_lib
void int_zeros(int **pA, int row, int col);
void int_zeros_align(int **pA, int row, int col);
void int_free(int *pA);
void int_free_align(int *pA);
void int_print_mat(int row, int col, int *A, int lda);
int int_print_to_string_mat(char **buf_out, int row, int col, int *A, int lda);
#endif // EXT_DEP
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_I_AUX_EXT_DEP_H_

View File

@@ -1,57 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_M_AUX_H_
#define BLASFEO_M_AUX_H_
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
void blasfeo_cvt_d2s_vec(int m, struct blasfeo_dvec *vd, int vdi, struct blasfeo_svec *vs, int vsi);
void blasfeo_cvt_s2d_vec(int m, struct blasfeo_svec *vs, int vsi, struct blasfeo_dvec *vd, int vdi);
void blasfeo_cvt_d2s_mat(int m, int n, struct blasfeo_dmat *Md, int mid, int nid, struct blasfeo_smat *Ms, int mis, int nis);
void blasfeo_cvt_s2d_mat(int m, int n, struct blasfeo_smat *Ms, int mis, int nis, struct blasfeo_dmat *Md, int mid, int nid);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_M_AUX_H_

View File

@@ -1,62 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2020 by Gianluca Frison. *
* All rights reserved. *
* *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_MEMORY_H_
#define BLASFEO_MEMORY_H_
#ifdef __cplusplus
extern "C" {
#endif
//
int blasfeo_is_init();
//
void blasfeo_init();
//
void blasfeo_quit();
//
void *blasfeo_get_buffer();
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_MEMORY_H_

View File

@@ -1,77 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
/*
* ----------- Naming conventions
*
* (precision)(data)
*
* 1) d(double)
* s(single)
*
* 2) ge(general)
* tr(triangular)
* vec(vector)
* row(row)
* col(column)
* dia(diagonal)
*
* 3) se(set)
* cp(copy)
* sc(scale)
* ad(add)
* tr(transpose)
* in(insert)
* ex(extract)
* pe(premute)
* sw(swap)
*
* f(factorization)
*
* lqf(LQ factorization)
* qrf (factorization)
* trf (LU factorization using partial pivoting with row interchanges.)
*
* 4) _l(lower) / _u(upper)
* _lib8 (hp implementation, 8 rows kernel)
* _lib4 (hp implementation, 4 rows kernel)
* _lib0 (hp interface with reference implentation)
* _lib (reference implementation)
* _libref (reference implementation with dedicated namespace)
*
* 5) _sp(sparse)
* _exp(exponential format)
*/

View File

@@ -1,88 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Ian McInerney *
* *
**************************************************************************************************/
#ifndef BLASFEO_PROCESSOR_FEATURES_H_
#define BLASFEO_PROCESSOR_FEATURES_H_
/**
* Flags to indicate the different processor features
*/
//enum
//{
// // x86-64 CPU features
// BLASFEO_PROCESSOR_FEATURE_AVX = 0x0001, /// AVX instruction set
// BLASFEO_PROCESSOR_FEATURE_AVX2 = 0x0002, /// AVX2 instruction set
// BLASFEO_PROCESSOR_FEATURE_FMA = 0x0004, /// FMA instruction set
// BLASFEO_PROCESSOR_FEATURE_SSE3 = 0x0008, /// SSE3 instruction set
//
// // ARM CPU features
// BLASFEO_PROCESSOR_FEATURE_VFPv3 = 0x0100, /// VFPv3 instruction set
// BLASFEO_PROCESSOR_FEATURE_NEON = 0x0100, /// NEON instruction set
// BLASFEO_PROCESSOR_FEATURE_VFPv4 = 0x0100, /// VFPv4 instruction set
// BLASFEO_PROCESSOR_FEATURE_NEONv2 = 0x0100, /// NEONv2 instruction set
//} BLASFEO_PROCESSOR_FEATURES;
/**
* Test the features that this processor provides against what the library was compiled with.
*
* @param features - Pointer to an integer to store the supported feature set (using the flags in the BLASFEO_PROCESSOR_FEATURES enum)
* @return 0 if current processor doesn't support all features required for this library, 1 otherwise
*/
int blasfeo_processor_cpu_features( int* features );
/**
* Test the features that this processor provides against what the library was compiled with.
*
* @param features - Pointer to an integer to store the supported feature set (using the flags in the BLASFEO_PROCESSOR_FEATURES enum)
* @return 0 if current processor doesn't support all features required for this library, 1 otherwise
*/
void blasfeo_processor_library_features( int* features );
/**
* Create a string listing the features the current processor supports.
*
* @param features - Flags from the BLASFEO_PROCESSOR_FEATURES enum indicating the features supported
* @param featureString - Character array to store the feature string in
*/
void blasfeo_processor_feature_string( int features, char* featureString );
/**
* Get a string listing the processor features that this library version needs to run.
*
* @param featureString - Character array to store the feature string in
*/
void blasfeo_processor_library_string( char* featureString );
#endif // BLASFEO_PROCESSOR_FEATURES_H_

View File

@@ -1,168 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_AUX_H_
#define BLASFEO_S_AUX_H_
#include <stdlib.h>
#include "blasfeo_s_aux_old.h"
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
/************************************************
* d_aux_lib.c
************************************************/
// returns the memory size (in bytes) needed for a smat
size_t blasfeo_memsize_smat(int m, int n);
size_t blasfeo_memsize_smat_ps(int ps, int m, int n);
// returns the memory size (in bytes) needed for the diagonal of a smat
size_t blasfeo_memsize_diag_smat(int m, int n);
// returns the memory size (in bytes) needed for a svec
size_t blasfeo_memsize_svec(int m);
// create a strmat for a matrix of size m*n by using memory passed by a pointer (pointer is not updated)
void blasfeo_create_smat(int m, int n, struct blasfeo_smat *sA, void *memory);
void blasfeo_create_smat_ps(int ps, int m, int n, struct blasfeo_smat *sA, void *memory);
// create a strvec for a vector of size m by using memory passed by a pointer (pointer is not updated)
void blasfeo_create_svec(int m, struct blasfeo_svec *sA, void *memory);
void blasfeo_pack_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_pack_l_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_pack_u_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_pack_tran_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_pack_svec(int m, float *x, int xi, struct blasfeo_svec *sa, int ai);
void blasfeo_unpack_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj, float *A, int lda);
void blasfeo_unpack_tran_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj, float *A, int lda);
void blasfeo_unpack_svec(int m, struct blasfeo_svec *sa, int ai, float *x, int xi);
//void s_cast_mat2strmat(float *A, struct blasfeo_smat *sA);
//void s_cast_diag_mat2strmat(float *dA, struct blasfeo_smat *sA);
//void s_cast_vec2vecmat(float *a, struct blasfeo_svec *sa);
// ge
void blasfeo_sgese(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_sgecpsc(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_sgecp(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_sgesc(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_sgein1(float a, struct blasfeo_smat *sA, int ai, int aj);
float blasfeo_sgeex1(struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_sgead(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_sgetr(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// tr
void blasfeo_strcp_l(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_strtr_l(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_strtr_u(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// dia
void blasfeo_sdiare(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_sdiaex(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi);
void blasfeo_sdiain(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_sdiain_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_sdiaex_sp(int kmax, float alpha, int *idx, struct blasfeo_smat *sD, int di, int dj, struct blasfeo_svec *sx, int xi);
void blasfeo_sdiaad(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_sdiaad_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_sdiaadin_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, int *idx, struct blasfeo_smat *sD, int di, int dj);
// row
void blasfeo_srowin(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_srowex(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi);
void blasfeo_srowad(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_srowad_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_srowsw(int kmax, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_srowpe(int kmax, int *ipiv, struct blasfeo_smat *sA);
void blasfeo_srowpei(int kmax, int *ipiv, struct blasfeo_smat *sA);
// col
void blasfeo_scolex(int kmax, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi);
void blasfeo_scolin(int kmax, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_scolad(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_scolsc(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_scolsw(int kmax, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_scolpe(int kmax, int *ipiv, struct blasfeo_smat *sA);
void blasfeo_scolpei(int kmax, int *ipiv, struct blasfeo_smat *sA);
// vec
void blasfeo_svecse(int m, float alpha, struct blasfeo_svec *sx, int xi);
void blasfeo_sveccp(int m, struct blasfeo_svec *sa, int ai, struct blasfeo_svec *sc, int ci);
void blasfeo_svecsc(int m, float alpha, struct blasfeo_svec *sa, int ai);
void blasfeo_sveccpsc(int m, float alpha, struct blasfeo_svec *sa, int ai, struct blasfeo_svec *sc, int ci);
void blasfeo_svecad(int m, float alpha, struct blasfeo_svec *sa, int ai, struct blasfeo_svec *sc, int ci);
void blasfeo_svecin1(float a, struct blasfeo_svec *sx, int xi);
float blasfeo_svecex1(struct blasfeo_svec *sx, int xi);
void blasfeo_svecad_sp(int m, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_svec *sz, int zi);
void blasfeo_svecin_sp(int m, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_svec *sz, int zi);
void blasfeo_svecex_sp(int m, float alpha, int *idx, struct blasfeo_svec *sx, int x, struct blasfeo_svec *sz, int zi);
// z += alpha * x[idx]
void blasfeo_svecexad_sp(int m, double alpha, int *idx, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
void blasfeo_sveccl(int m, struct blasfeo_svec *sxm, int xim, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sxp, int xip, struct blasfeo_svec *sz, int zi);
void blasfeo_sveccl_mask(int m, struct blasfeo_svec *sxm, int xim, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sxp, int xip, struct blasfeo_svec *sz, int zi, struct blasfeo_svec *sm, int mi);
void blasfeo_svecze(int m, struct blasfeo_svec *sm, int mi, struct blasfeo_svec *sv, int vi, struct blasfeo_svec *se, int ei);
void blasfeo_svecnrm_inf(int m, struct blasfeo_svec *sx, int xi, float *ptr_norm);
void blasfeo_svecnrm_2(int m, struct blasfeo_svec *sx, int xi, float *ptr_norm);
void blasfeo_svecpe(int kmax, int *ipiv, struct blasfeo_svec *sx, int xi);
void blasfeo_svecpei(int kmax, int *ipiv, struct blasfeo_svec *sx, int xi);
/*
* Explicitly panel-major matrix format
*/
// returns the memory size (in bytes) needed for a dmat
size_t blasfeo_pm_memsize_smat(int ps, int m, int n);
// create a strmat for a matrix of size m*n by using memory passed by a pointer (pointer is not updated)
void blasfeo_pm_create_smat(int ps, int m, int n, struct blasfeo_pm_smat *sA, void *memory);
/*
* Explicitly column-major matrix format
*/
// returns the memory size (in bytes) needed for a dmat
size_t blasfeo_cm_memsize_smat(int m, int n);
// create a strmat for a matrix of size m*n by using memory passed by a pointer (pointer is not updated)
void blasfeo_cm_create_smat(int m, int n, struct blasfeo_pm_smat *sA, void *memory);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_S_AUX_H_

View File

@@ -1,141 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_AUX_EXT_DEP_H_
#define BLASFEO_S_AUX_EXT_DEP_H_
#include <stdio.h>
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef EXT_DEP
/************************************************
* s_aux_extern_depend_lib.c
************************************************/
/* column-major matrices */
// dynamically allocate row*col floats of memory and set accordingly a pointer to float; set allocated memory to zero
void s_zeros(float **pA, int row, int col);
// dynamically allocate row*col floats of memory aligned to 64-byte boundaries and set accordingly a pointer to float; set allocated memory to zero
void s_zeros_align(float **pA, int row, int col);
// dynamically allocate size bytes of memory aligned to 64-byte boundaries and set accordingly a pointer to float; set allocated memory to zero
void s_zeros_align_bytes(float **pA, int size);
// free the memory allocated by d_zeros
void s_free(float *pA);
// free the memory allocated by d_zeros_align or d_zeros_align_bytes
void s_free_align(float *pA);
// print a column-major matrix
void s_print_mat(int m, int n, float *A, int lda);
// print the transposed of a column-major matrix
void s_print_tran_mat(int row, int col, float *A, int lda);
// print to file a column-major matrix
void s_print_to_file_mat(FILE *file, int row, int col, float *A, int lda);
// print to file a column-major matrix in exponential format
void s_print_to_file_exp_mat(FILE *file, int row, int col, float *A, int lda);
// print to string a column-major matrix
void s_print_to_string_mat(char **buf_out, int row, int col, float *A, int lda);
// print to file the transposed of a column-major matrix
void s_print_tran_to_file_mat(FILE *file, int row, int col, float *A, int lda);
// print to file the transposed of a column-major matrix in exponential format
void s_print_tran_to_file_exp_mat(FILE *file, int row, int col, float *A, int lda);
// print in exponential notation a column-major matrix
void s_print_exp_mat(int m, int n, float *A, int lda);
// print in exponential notation the transposed of a column-major matrix
void s_print_exp_tran_mat(int row, int col, float *A, int lda);
/* strmat and strvec */
// create a strmat for a matrix of size m*n by dynamically allocating memory
void blasfeo_allocate_smat(int m, int n, struct blasfeo_smat *sA);
// create a strvec for a vector of size m by dynamically allocating memory
void blasfeo_allocate_svec(int m, struct blasfeo_svec *sa);
// free the memory allocated by blasfeo_allocate_dmat
void blasfeo_free_smat(struct blasfeo_smat *sA);
// free the memory allocated by blasfeo_allocate_dvec
void blasfeo_free_svec(struct blasfeo_svec *sa);
// print a strmat
void blasfeo_print_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj);
// print in exponential notation a strmat
void blasfeo_print_exp_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj);
// print to file a strmat
void blasfeo_print_to_file_smat(FILE *file, int m, int n, struct blasfeo_smat *sA, int ai, int aj);
// print to file a strmat in exponential format
void blasfeo_print_to_file_exp_smat(FILE *file, int m, int n, struct blasfeo_smat *sA, int ai, int aj);
// print to string a strmat
void blasfeo_print_to_string_smat(char **buf_out, int m, int n, struct blasfeo_smat *sA, int ai, int aj);
// print the transpose of a strmat
void blasfeo_print_tran_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj);
// print a strvec
void blasfeo_print_svec(int m, struct blasfeo_svec *sa, int ai);
// print in exponential notation a strvec
void blasfeo_print_exp_svec(int m, struct blasfeo_svec *sa, int ai);
// print to file a strvec
void blasfeo_print_to_file_svec(FILE *file, int m, struct blasfeo_svec *sa, int ai);
// print to string a strvec
void blasfeo_print_to_string_svec(char **buf_out, int m, struct blasfeo_svec *sa, int ai);
// print the transposed of a strvec
void blasfeo_print_tran_svec(int m, struct blasfeo_svec *sa, int ai);
// print in exponential notation the transposed of a strvec
void blasfeo_print_exp_tran_svec(int m, struct blasfeo_svec *sa, int ai);
// print to file the transposed of a strvec
void blasfeo_print_to_file_tran_svec(FILE *file, int m, struct blasfeo_svec *sa, int ai);
// print to string the transposed of a strvec
void blasfeo_print_to_string_tran_svec(char **buf_out, int m, struct blasfeo_svec *sa, int ai);
#endif // EXT_DEP
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_S_AUX_EXT_DEP_H_

View File

@@ -1,82 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_AUX_EXT_DEP_REF_H_
#define BLASFEO_S_AUX_EXT_DEP_REF_H_
#if defined(EXT_DEP)
#include <stdio.h>
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
// expose reference BLASFEO for testing
// see blasfeo_s_aux_exp_dep.h for help
void blasfeo_print_smat_ref(int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj);
void blasfeo_allocate_smat_ref(int m, int n, struct blasfeo_smat_ref *sA);
void blasfeo_allocate_svec_ref(int m, struct blasfeo_svec_ref *sa);
void blasfeo_free_smat_ref(struct blasfeo_smat_ref *sA);
void blasfeo_free_svec_ref(struct blasfeo_svec_ref *sa);
void blasfeo_print_smat_ref(int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj);
void blasfeo_print_exp_smat_ref(int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj);
void blasfeo_print_to_file_smat_ref(FILE *file, int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj);
void blasfeo_print_to_file_exp_smat_ref(FILE *file, int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj);
void blasfeo_print_to_string_smat_ref(char **buf_out, int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj);
void blasfeo_print_svec(int m, struct blasfeo_svec *sa, int ai);
void blasfeo_print_exp_svec(int m, struct blasfeo_svec *sa, int ai);
void blasfeo_print_to_file_svec(FILE *file, int m, struct blasfeo_svec *sa, int ai);
void blasfeo_print_to_string_svec(char **buf_out, int m, struct blasfeo_svec *sa, int ai);
void blasfeo_print_tran_svec(int m, struct blasfeo_svec *sa, int ai);
void blasfeo_print_exp_tran_svec(int m, struct blasfeo_svec *sa, int ai);
void blasfeo_print_to_file_tran_svec(FILE *file, int m, struct blasfeo_svec *sa, int ai);
void blasfeo_print_to_string_tran_svec(char **buf_out, int m, struct blasfeo_svec *sa, int ai);
#ifdef __cplusplus
}
#endif
#endif // EXT_DEP
#endif // BLASFEO_S_AUX_EXT_DEP_REF_H_

View File

@@ -1,64 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
// TODO remove
//
void strcp_l_lib(int m, float alpha, int offsetA, float *A, int sda, int offsetB, float *B, int sdb);
void sgead_lib(int m, int n, float alpha, int offsetA, float *A, int sda, int offsetB, float *B, int sdb);
void sgetr_lib(int m, int n, float alpha, int offsetA, float *pA, int sda, int offsetC, float *pC, int sdc);
void strtr_l_lib(int m, float alpha, int offsetA, float *pA, int sda, int offsetC, float *pC, int sdc);
void strtr_u_lib(int m, float alpha, int offsetA, float *pA, int sda, int offsetC, float *pC, int sdc);
void sdiareg_lib(int kmax, float reg, int offset, float *pD, int sdd);
void sdiain_sqrt_lib(int kmax, float *x, int offset, float *pD, int sdd);
void sdiaex_lib(int kmax, float alpha, int offset, float *pD, int sdd, float *x);
void sdiaad_lib(int kmax, float alpha, float *x, int offset, float *pD, int sdd);
void sdiain_libsp(int kmax, int *idx, float alpha, float *x, float *pD, int sdd);
void sdiaex_libsp(int kmax, int *idx, float alpha, float *pD, int sdd, float *x);
void sdiaad_libsp(int kmax, int *idx, float alpha, float *x, float *pD, int sdd);
void sdiaadin_libsp(int kmax, int *idx, float alpha, float *x, float *y, float *pD, int sdd);
void srowin_lib(int kmax, float alpha, float *x, float *pD);
void srowex_lib(int kmax, float alpha, float *pD, float *x);
void srowad_lib(int kmax, float alpha, float *x, float *pD);
void srowin_libsp(int kmax, float alpha, int *idx, float *x, float *pD);
void srowad_libsp(int kmax, int *idx, float alpha, float *x, float *pD);
void srowadin_libsp(int kmax, int *idx, float alpha, float *x, float *y, float *pD);
void srowsw_lib(int kmax, float *pA, float *pC);
void scolin_lib(int kmax, float *x, int offset, float *pD, int sdd);
void scolad_lib(int kmax, float alpha, float *x, int offset, float *pD, int sdd);
void scolin_libsp(int kmax, int *idx, float *x, float *pD, int sdd);
void scolad_libsp(int kmax, float alpha, int *idx, float *x, float *pD, int sdd);
void scolsw_lib(int kmax, int offsetA, float *pA, int sda, int offsetC, float *pC, int sdc);
void svecin_libsp(int kmax, int *idx, float *x, float *y);
void svecad_libsp(int kmax, int *idx, float alpha, float *x, float *y);

View File

@@ -1,147 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_AUX_REF_H_
#define BLASFEO_S_AUX_REF_H_
#include <stdlib.h>
#include "blasfeo_s_aux_old.h"
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
/************************************************
* d_aux_lib.c
************************************************/
// returns the memory size (in bytes) needed for a smat
size_t blasfeo_ref_memsize_smat(int m, int n);
size_t blasfeo_ref_memsize_smat_ps(int ps, int m, int n);
// returns the memory size (in bytes) needed for the diagonal of a smat
size_t blasfeo_ref_memsize_diag_smat(int m, int n);
// returns the memory size (in bytes) needed for a svec
size_t blasfeo_ref_memsize_svec(int m);
// create a strmat for a matrix of size m*n by using memory passed by a pointer (pointer is not updated)
void blasfeo_ref_create_smat(int m, int n, struct blasfeo_smat *sA, void *memory);
void blasfeo_ref_create_smat_ps(int ps, int m, int n, struct blasfeo_smat *sA, void *memory);
// create a strvec for a vector of size m by using memory passed by a pointer (pointer is not updated)
void blasfeo_ref_create_svec(int m, struct blasfeo_svec *sA, void *memory);
void blasfeo_ref_pack_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_pack_l_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sB, int bi, int bj);
void blasfeo_ref_pack_l_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sB, int bi, int bj);
void blasfeo_ref_pack_tran_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_pack_svec(int m, float *x, int xi, struct blasfeo_svec *sa, int ai);
void blasfeo_ref_unpack_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj, float *A, int lda);
void blasfeo_ref_unpack_tran_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj, float *A, int lda);
void blasfeo_ref_unpack_svec(int m, struct blasfeo_svec *sa, int ai, float *x, int xi);
void ref_s_cast_mat2strmat(float *A, struct blasfeo_smat *sA);
void ref_s_cast_diag_mat2strmat(float *dA, struct blasfeo_smat *sA);
void ref_s_cast_vec2vecmat(float *a, struct blasfeo_svec *sa);
// ge
void blasfeo_ref_sgese(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_sgecpsc(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_ref_sgecp(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_ref_sgesc(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_sgein1(float a, struct blasfeo_smat *sA, int ai, int aj);
float blasfeo_ref_sgeex1(struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_sgead(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_ref_sgetr(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// tr
void blasfeo_ref_strcp_l(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_ref_strtr_l(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_ref_strtr_u(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// dia
void blasfeo_ref_sdiare(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_sdiaex(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi);
void blasfeo_ref_sdiain(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_sdiain_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_ref_sdiaex_sp(int kmax, float alpha, int *idx, struct blasfeo_smat *sD, int di, int dj, struct blasfeo_svec *sx, int xi);
void blasfeo_ref_sdiaad(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_sdiaad_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_ref_sdiaadin_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, int *idx, struct blasfeo_smat *sD, int di, int dj);
// row
void blasfeo_ref_srowin(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_srowex(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi);
void blasfeo_ref_srowad(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_srowad_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_ref_srowsw(int kmax, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_ref_srowpe(int kmax, int *ipiv, struct blasfeo_smat *sA);
void blasfeo_ref_srowpei(int kmax, int *ipiv, struct blasfeo_smat *sA);
// col
void blasfeo_ref_scolex(int kmax, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi);
void blasfeo_ref_scolin(int kmax, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_scolad(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_scolsc(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj);
void blasfeo_ref_scolsw(int kmax, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void blasfeo_ref_scolpe(int kmax, int *ipiv, struct blasfeo_smat *sA);
void blasfeo_ref_scolpei(int kmax, int *ipiv, struct blasfeo_smat *sA);
// vec
void blasfeo_ref_svecse(int m, float alpha, struct blasfeo_svec *sx, int xi);
void blasfeo_ref_sveccp(int m, struct blasfeo_svec *sa, int ai, struct blasfeo_svec *sc, int ci);
void blasfeo_ref_svecsc(int m, float alpha, struct blasfeo_svec *sa, int ai);
void blasfeo_ref_sveccpsc(int m, float alpha, struct blasfeo_svec *sa, int ai, struct blasfeo_svec *sc, int ci);
void blasfeo_ref_svecad(int m, float alpha, struct blasfeo_svec *sa, int ai, struct blasfeo_svec *sc, int ci);
void blasfeo_ref_svecin1(float a, struct blasfeo_svec *sx, int xi);
float blasfeo_ref_svecex1(struct blasfeo_svec *sx, int xi);
void blasfeo_ref_svecad_sp(int m, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_svec *sz, int zi);
void blasfeo_ref_svecin_sp(int m, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_svec *sz, int zi);
void blasfeo_ref_svecex_sp(int m, float alpha, int *idx, struct blasfeo_svec *sx, int x, struct blasfeo_svec *sz, int zi);
// z += alpha * x[idx]
void blasfeo_ref_svecexad_sp(int m, double alpha, int *idx, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
void blasfeo_ref_sveccl(int m, struct blasfeo_svec *sxm, int xim, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sxp, int xip, struct blasfeo_svec *sz, int zi);
void blasfeo_ref_sveccl_mask(int m, struct blasfeo_svec *sxm, int xim, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sxp, int xip, struct blasfeo_svec *sz, int zi, struct blasfeo_svec *sm, int mi);
void blasfeo_ref_svecze(int m, struct blasfeo_svec *sm, int mi, struct blasfeo_svec *sv, int vi, struct blasfeo_svec *se, int ei);
void blasfeo_ref_svecnrm_inf(int m, struct blasfeo_svec *sx, int xi, float *ptr_norm);
void blasfeo_ref_svecpe(int kmax, int *ipiv, struct blasfeo_svec *sx, int xi);
void blasfeo_ref_svecpei(int kmax, int *ipiv, struct blasfeo_svec *sx, int xi);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_S_AUX_REF_H_

View File

@@ -1,177 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_AUX_TEST_H_
#define BLASFEO_S_AUX_TEST_H_
#include <stdio.h>
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
/************************************************
* d_aux_lib.c
************************************************/
int test_blasfeo_memsize_smat(int m, int n);
int test_blasfeo_memsize_diag_smat(int m, int n);
int test_blasfeo_memsize_svec(int m);
void test_blasfeo_create_smat(int m, int n, struct blasfeo_smat *sA, void *memory);
void test_blasfeo_create_svec(int m, struct blasfeo_svec *sA, void *memory);
void test_blasfeo_pack_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sA, int ai, int aj);
void test_blasfeo_pack_svec(int m, float *x, int xi, struct blasfeo_svec *sa, int ai);
void test_blasfeo_pack_tran_smat(int m, int n, float *A, int lda, struct blasfeo_smat *sA, int ai, int aj);
void test_blasfeo_unpack_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj, float *A, int lda);
void test_blasfeo_unpack_svec(int m, struct blasfeo_svec *sa, int ai, float *x, int xi);
void test_blasfeo_unpack_tran_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj, float *A, int lda);
void test_s_cast_mat2strmat(float *A, struct blasfeo_smat *sA);
void test_s_cast_diag_mat2strmat(float *dA, struct blasfeo_smat *sA);
void test_s_cast_vec2vecmat(float *a, struct blasfeo_svec *sa);
// copy and scale
void test_blasfeo_sgecpsc(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void test_blasfeo_sgecp(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
void test_blasfeo_sgesc(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj);
// void test_blasfeo_sgein1(float a, struct blasfeo_smat *sA, int ai, int aj);
// float test_blasfeo_sgeex1(struct blasfeo_smat *sA, int ai, int aj);
// void test_blasfeo_svecin1(float a, struct blasfeo_svec *sx, int xi);
// float test_blasfeo_svecex1(struct blasfeo_svec *sx, int xi);
// // A <= alpha
// void test_blasfeo_sgese(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj);
// // a <= alpha
// void test_blasfeo_svecse(int m, float alpha, struct blasfeo_svec *sx, int xi);
// void test_blasfeo_sveccp(int m, struct blasfeo_svec *sa, int ai, struct blasfeo_svec *sc, int ci);
// void test_blasfeo_svecsc(int m, float alpha, struct blasfeo_svec *sa, int ai);
// void test_strcp_l_lib(int m, float alpha, int offsetA, float *A, int sda, int offsetB, float *B, int sdb);
// void test_blasfeo_strcp_l(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// void test_sgead_lib(int m, int n, float alpha, int offsetA, float *A, int sda, int offsetB, float *B, int sdb);
// void test_blasfeo_sgead(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// void test_blasfeo_svecad(int m, float alpha, struct blasfeo_svec *sa, int ai, struct blasfeo_svec *sc, int ci);
// void test_sgetr_lib(int m, int n, float alpha, int offsetA, float *pA, int sda, int offsetC, float *pC, int sdc);
// void test_blasfeo_sgetr(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// void test_strtr_l_lib(int m, float alpha, int offsetA, float *pA, int sda, int offsetC, float *pC, int sdc);
// void test_blasfeo_strtr_l(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// void test_strtr_u_lib(int m, float alpha, int offsetA, float *pA, int sda, int offsetC, float *pC, int sdc);
// void test_blasfeo_strtr_u(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// void test_sdiareg_lib(int kmax, float reg, int offset, float *pD, int sdd);
// void test_blasfeo_sdiaex(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi);
// void test_blasfeo_sdiain(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
// void test_sdiain_sqrt_lib(int kmax, float *x, int offset, float *pD, int sdd);
// void test_sdiaex_lib(int kmax, float alpha, int offset, float *pD, int sdd, float *x);
// void test_sdiaad_lib(int kmax, float alpha, float *x, int offset, float *pD, int sdd);
// void test_sdiain_libsp(int kmax, int *idx, float alpha, float *x, float *pD, int sdd);
// void test_blasfeo_sdiain_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_smat *sD, int di, int dj);
// void test_sdiaex_libsp(int kmax, int *idx, float alpha, float *pD, int sdd, float *x);
// void test_blasfeo_sdiaex_sp(int kmax, float alpha, int *idx, struct blasfeo_smat *sD, int di, int dj, struct blasfeo_svec *sx, int xi);
// void test_blasfeo_sdiaad(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
// void test_sdiaad_libsp(int kmax, int *idx, float alpha, float *x, float *pD, int sdd);
// void test_blasfeo_sdiaad_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_smat *sD, int di, int dj);
// void test_sdiaadin_libsp(int kmax, int *idx, float alpha, float *x, float *y, float *pD, int sdd);
// void test_blasfeo_sdiaadin_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, int *idx, struct blasfeo_smat *sD, int di, int dj);
// void test_srowin_lib(int kmax, float alpha, float *x, float *pD);
// void test_blasfeo_srowin(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
// void test_srowex_lib(int kmax, float alpha, float *pD, float *x);
// void test_blasfeo_srowex(int kmax, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi);
// void test_srowad_lib(int kmax, float alpha, float *x, float *pD);
// void test_blasfeo_srowad(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
// void test_srowin_libsp(int kmax, float alpha, int *idx, float *x, float *pD);
// void test_srowad_libsp(int kmax, int *idx, float alpha, float *x, float *pD);
// void test_blasfeo_srowad_sp(int kmax, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_smat *sD, int di, int dj);
// void test_srowadin_libsp(int kmax, int *idx, float alpha, float *x, float *y, float *pD);
// void test_srowsw_lib(int kmax, float *pA, float *pC);
// void test_blasfeo_srowsw(int kmax, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// void test_blasfeo_srowpe(int kmax, int *ipiv, struct blasfeo_smat *sA);
// void test_scolin_lib(int kmax, float *x, int offset, float *pD, int sdd);
// void test_blasfeo_scolin(int kmax, struct blasfeo_svec *sx, int xi, struct blasfeo_smat *sA, int ai, int aj);
// void test_scolad_lib(int kmax, float alpha, float *x, int offset, float *pD, int sdd);
// void test_scolin_libsp(int kmax, int *idx, float *x, float *pD, int sdd);
// void test_scolad_libsp(int kmax, float alpha, int *idx, float *x, float *pD, int sdd);
// void test_scolsw_lib(int kmax, int offsetA, float *pA, int sda, int offsetC, float *pC, int sdc);
// void test_blasfeo_scolsw(int kmax, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sC, int ci, int cj);
// void test_blasfeo_scolpe(int kmax, int *ipiv, struct blasfeo_smat *sA);
// void test_svecin_libsp(int kmax, int *idx, float *x, float *y);
// void test_svecad_libsp(int kmax, int *idx, float alpha, float *x, float *y);
// void test_blasfeo_svecad_sp(int m, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_svec *sz, int zi);
// void test_blasfeo_svecin_sp(int m, float alpha, struct blasfeo_svec *sx, int xi, int *idx, struct blasfeo_svec *sz, int zi);
// void test_blasfeo_svecex_sp(int m, float alpha, int *idx, struct blasfeo_svec *sx, int x, struct blasfeo_svec *sz, int zi);
// void test_blasfeo_sveccl(int m, struct blasfeo_svec *sxm, int xim, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sxp, int xip, struct blasfeo_svec *sz, int zi);
// void test_blasfeo_sveccl_mask(int m, struct blasfeo_svec *sxm, int xim, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sxp, int xip, struct blasfeo_svec *sz, int zi, struct blasfeo_svec *sm, int mi);
// void test_blasfeo_svecze(int m, struct blasfeo_svec *sm, int mi, struct blasfeo_svec *sv, int vi, struct blasfeo_svec *se, int ei);
// void test_blasfeo_svecnrm_inf(int m, struct blasfeo_svec *sx, int xi, float *ptr_norm);
// void test_blasfeo_svecpe(int kmax, int *ipiv, struct blasfeo_svec *sx, int xi);
// ext_dep
void test_blasfeo_allocate_smat(int m, int n, struct blasfeo_smat *sA);
void test_blasfeo_allocate_svec(int m, struct blasfeo_svec *sa);
void test_blasfeo_free_smat(struct blasfeo_smat *sA);
void test_blasfeo_free_svec(struct blasfeo_svec *sa);
void test_blasfeo_print_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj);
void test_blasfeo_print_svec(int m, struct blasfeo_svec *sa, int ai);
void test_blasfeo_print_tran_svec(int m, struct blasfeo_svec *sa, int ai);
void test_blasfeo_print_to_file_smat(FILE *file, int m, int n, struct blasfeo_smat *sA, int ai, int aj);
void test_blasfeo_print_to_file_svec(FILE *file, int m, struct blasfeo_svec *sa, int ai);
void test_blasfeo_print_to_file_tran_svec(FILE *file, int m, struct blasfeo_svec *sa, int ai);
void test_blasfeo_print_exp_smat(int m, int n, struct blasfeo_smat *sA, int ai, int aj);
void test_blasfeo_print_exp_svec(int m, struct blasfeo_svec *sa, int ai);
void test_blasfeo_print_exp_tran_svec(int m, struct blasfeo_svec *sa, int ai);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_S_AUX_TEST_H_

View File

@@ -1,46 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_BLAS_H_
#define BLASFEO_S_BLAS_H_
#include "blasfeo_s_blasfeo_api.h"
#include "blasfeo_s_blas_api.h"
#endif // BLASFEO_S_BLAS_H_

View File

@@ -1,182 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_BLAS_API_H_
#define BLASFEO_S_BLAS_API_H_
#include "blasfeo_target.h"
#ifdef BLAS_API
#ifdef CBLAS_API
#ifndef BLASFEO_CBLAS_ENUM
#define BLASFEO_CBLAS_ENUM
#ifdef FORTRAN_BLAS_API
#ifndef CBLAS_H
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113};
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
#endif // CBLAS_H
#else // FORTRAN_BLAS_API
enum BLASFEO_CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
enum BLASFEO_CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113};
enum BLASFEO_CBLAS_UPLO {CblasUpper=121, CblasLower=122};
enum BLASFEO_CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
enum BLASFEO_CBLAS_SIDE {CblasLeft=141, CblasRight=142};
#endif // FORTRAN_BLAS_API
#endif // BLASFEO_CBLAS_ENUM
#endif // CBLAS_API
#endif // BLAS_API
#ifdef __cplusplus
extern "C" {
#endif
#ifdef BLAS_API
#ifdef FORTRAN_BLAS_API
// BLAS 1
//
void saxpy_(int *n, float *alpha, float *x, int *incx, float *y, int *incy);
//
float sdot_(int *n, float *x, int *incx, float *y, int *incy);
// BLAS 3
//
void sgemm_(char *ta, char *tb, int *m, int *n, int *k, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc);
//
void strsm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *A, int *lda, float *B, int *ldb);
// LAPACK
//
void spotrf_(char *uplo, int *m, float *A, int *lda, int *info);
#ifdef CBLAS_API
// CBLAS 1
//
void cblas_saxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY);
// CBLAS 3
//
void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc);
//
void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb);
#endif // CBLAS_API
#else // BLASFEO_API
// BLAS 1
//
void blasfeo_blas_saxpy(int *n, float *alpha, float *x, int *incx, float *y, int *incy);
//
float blasfeo_blas_sdot(int *n, float *x, int *incx, float *y, int *incy);
// BLAS 3
//
void blasfeo_blas_sgemm(char *ta, char *tb, int *m, int *n, int *k, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc);
//
void blasfeo_blas_strsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *A, int *lda, float *B, int *ldb);
// LAPACK
//
void blasfeo_lapack_spotrf(char *uplo, int *m, float *A, int *lda, int *info);
#ifdef CBLAS_API
// CBLAS 1
//
void blasfeo_cblas_saxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY);
// CBLAS 3
//
void blasfeo_cblas_sgemm(const enum BLASFEO_CBLAS_ORDER Order, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc);
//
void blasfeo_cblas_strsm(const enum BLASFEO_CBLAS_ORDER Order, const enum BLASFEO_CBLAS_SIDE Side, const enum BLASFEO_CBLAS_UPLO Uplo, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb);
#endif // CBLAS_API
#endif // BLASFEO_API
#endif // BLAS_API
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_S_BLAS_API_H_

View File

@@ -1,284 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_BLASFEO_API_H_
#define BLASFEO_S_BLASFEO_API_H_
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
//
// level 1 BLAS
//
// z = y + alpha*x
void blasfeo_saxpy(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z = beta*y + alpha*x
void blasfeo_saxpby(int kmax, float alpha, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z = x .* y
void blasfeo_svecmul(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z += x .* y
void blasfeo_svecmulacc(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z = x .* y, return sum(z) = x^T * y
float blasfeo_svecmuldot(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// return x^T * y
float blasfeo_sdot(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi);
// construct givens plane rotation
void blasfeo_srotg(float a, float b, float *c, float *s);
// apply plane rotation [a b] [c -s; s; c] to the aj0 and aj1 columns of A at row index ai
void blasfeo_scolrot(int m, struct blasfeo_smat *sA, int ai, int aj0, int aj1, float c, float s);
// apply plane rotation [c s; -s c] [a; b] to the ai0 and ai1 rows of A at column index aj
void blasfeo_srowrot(int m, struct blasfeo_smat *sA, int ai0, int ai1, int aj, float c, float s);
//
// level 2 BLAS
//
// dense
// z <= beta * y + alpha * A * x
void blasfeo_sgemv_n(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z <= beta * y + alpha * A' * x
void blasfeo_sgemv_t(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z <= inv( A ) * x, A (m)x(n)
void blasfeo_strsv_lnn_mn(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(n)
void blasfeo_strsv_ltn_mn(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A ) * x, A (m)x(m) lower, not_transposed, not_unit
void blasfeo_strsv_lnn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A ) * x, A (m)x(m) lower, not_transposed, unit
void blasfeo_strsv_lnu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) lower, transposed, not_unit
void blasfeo_strsv_ltn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) lower, transposed, unit
void blasfeo_strsv_ltu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) upper, not_transposed, not_unit
void blasfeo_strsv_unn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) upper, transposed, not_unit
void blasfeo_strsv_utn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A * x ; A lower triangular
void blasfeo_strmv_lnn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A * x ; A lower triangular, unit diagonal
void blasfeo_strmv_lnu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A' * x ; A lower triangular
void blasfeo_strmv_ltn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A' * x ; A lower triangular, unit diagonal
void blasfeo_strmv_ltu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= beta * y + alpha * A * x ; A upper triangular
void blasfeo_strmv_unn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A' * x ; A upper triangular
void blasfeo_strmv_utn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z_n <= beta_n * y_n + alpha_n * A * x_n
// z_t <= beta_t * y_t + alpha_t * A' * x_t
void blasfeo_sgemv_nt(int m, int n, float alpha_n, float alpha_t, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx_n, int xi_n, struct blasfeo_svec *sx_t, int xi_t, float beta_n, float beta_t, struct blasfeo_svec *sy_n, int yi_n, struct blasfeo_svec *sy_t, int yi_t, struct blasfeo_svec *sz_n, int zi_n, struct blasfeo_svec *sz_t, int zi_t);
// z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed
void blasfeo_ssymv_l(int m, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
void blasfeo_ssymv_l_mn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z <= beta * y + alpha * A * x, where A is symmetric and only the upper triangular patr of A is accessed
void blasfeo_ssymv_u(int m, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// D = C + alpha * x * y^T
void blasfeo_sger(int m, int n, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// diagonal
// z <= beta * y + alpha * A * x, A diagonal
void blasfeo_sgemv_d(int m, float alpha, struct blasfeo_svec *sA, int ai, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
//
// level 3 BLAS
//
// dense
// D <= beta * C + alpha * A * B
void blasfeo_sgemm_nn(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T
void blasfeo_sgemm_nt(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B
void blasfeo_sgemm_tn(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B
void blasfeo_sgemm_tt(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T ; C, D lower triangular
void blasfeo_ssyrk_ln(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_ssyrk_ln_mn(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B ; C, D lower triangular
void blasfeo_ssyrk_lt(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T ; C, D upper triangular
void blasfeo_ssyrk_un(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B ; C, D upper triangular
void blasfeo_ssyrk_ut(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^T ; B upper triangular
void blasfeo_strmm_rutn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A ; A lower triangular
void blasfeo_strmm_rlnn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A lower triangular employint explicit inverse of diagonal
void blasfeo_strsm_llnn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A lower triangular with unit diagonal
void blasfeo_strsm_llnu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A lower triangular employint explicit inverse of diagonal
void blasfeo_strsm_lltn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A lower triangular with unit diagonal
void blasfeo_strsm_lltu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A upper triangular employing explicit inverse of diagonal
void blasfeo_strsm_lunn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A upper triangular withunit diagonal
void blasfeo_strsm_lunu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A upper triangular employing explicit inverse of diagonal
void blasfeo_strsm_lutn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A upper triangular withunit diagonal
void blasfeo_strsm_lutu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A lower triangular employing explicit inverse of diagonal
void blasfeo_strsm_rlnn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A lower triangular with unit diagonal
void blasfeo_strsm_rlnu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A lower triangular employing explicit inverse of diagonal
void blasfeo_strsm_rltn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A lower triangular with unit diagonal
void blasfeo_strsm_rltu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A upper triangular employing explicit inverse of diagonal
void blasfeo_strsm_runn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A upper triangular with unit diagonal
void blasfeo_strsm_runu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A upper triangular employing explicit inverse of diagonal
void blasfeo_strsm_rutn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A upper triangular with unit diagonal
void blasfeo_strsm_rutu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T + alpha * B * A^T; C, D lower triangular
void blasfeo_ssyr2k_ln(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B + alpha * B^T * A; C, D lower triangular
void blasfeo_ssyr2k_lt(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T + alpha * B * A^T; C, D upper triangular
void blasfeo_ssyr2k_un(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B + alpha * B^T * A; C, D upper triangular
void blasfeo_ssyr2k_ut(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// diagonal
// D <= alpha * A * B + beta * C, with A diagonal (stored as strvec)
void sgemm_diag_left_ib(int m, int n, float alpha, float *dA, float *pB, int sdb, float beta, float *pC, int sdc, float *pD, int sdd);
void blasfeo_sgemm_dn(int m, int n, float alpha, struct blasfeo_svec *sA, int ai, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A * B + beta * C, with B diagonal (stored as strvec)
void blasfeo_sgemm_nd(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sB, int bi, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
//
// LAPACK
//
// D <= chol( C ) ; C, D lower triangular
void blasfeo_spotrf_l(int m, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_spotrf_l_mn(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= chol( C ) ; C, D upper triangular
void blasfeo_spotrf_u(int m, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= chol( C + A * B' ) ; C, D lower triangular
void blasfeo_ssyrk_spotrf_ln(int m, int k, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_ssyrk_spotrf_ln_mn(int m, int n, int k, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= lu( C ) ; no pivoting
void blasfeo_sgetrf_np(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= lu( C ) ; row pivoting
void blasfeo_sgetrf_rp(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, int *ipiv);
// D <= qr( C )
void blasfeo_sgeqrf(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, void *work);
int blasfeo_sgeqrf_worksize(int m, int n); // in bytes
// D <= Q factor, where C is the output of the LQ factorization
int blasfeo_sorglq_worksize(int m, int n, int k); // in bytes
void blasfeo_sorglq(int m, int n, int k, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, void *work);
// D <= lq( C )
void blasfeo_sgelqf(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, void *work);
int blasfeo_sgelqf_worksize(int m, int n); // in bytes
// D <= lq( C ), positive diagonal elements
void blasfeo_sgelqf_pd(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, void *work);
// [L, A] <= lq( [L, A] ), positive diagonal elements, array of matrices, with
// L lower triangular, of size (m)x(m)
// A full, of size (m)x(n1)
void blasfeo_sgelqf_pd_la(int m, int n1, struct blasfeo_smat *sL, int li, int lj, struct blasfeo_smat *sA, int ai, int aj, void *work);
// [L, L, A] <= lq( [L, L, A] ), positive diagonal elements, array of matrices, with:
// L lower triangular, of size (m)x(m)
// A full, of size (m)x(n1)
void blasfeo_sgelqf_pd_lla(int m, int n1, struct blasfeo_smat *sL0, int l0i, int l0j, struct blasfeo_smat *sL1, int l1i, int l1j, struct blasfeo_smat *sA, int ai, int aj, void *work);
//
// BLAS API helper functions
//
#if ( defined(BLAS_API) & defined(MF_PANELMAJ) )
// BLAS 3
void blasfeo_cm_sgemm_nn(int m, int n, int k, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, float beta, struct blasfeo_cm_smat *sC, int ci, int cj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_sgemm_nt(int m, int n, int k, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, float beta, struct blasfeo_cm_smat *sC, int ci, int cj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_sgemm_tn(int m, int n, int k, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, float beta, struct blasfeo_cm_smat *sC, int ci, int cj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_sgemm_tt(int m, int n, int k, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, float beta, struct blasfeo_cm_smat *sC, int ci, int cj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_llnn(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_llnu(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_lltn(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_lltu(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_lunn(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_lunu(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_lutn(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_lutu(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_rlnn(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_rlnu(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_rltn(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_rltu(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_runn(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_runu(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_rutn(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_strsm_rutu(int m, int n, float alpha, struct blasfeo_cm_smat *sA, int ai, int aj, struct blasfeo_cm_smat *sB, int bi, int bj, struct blasfeo_cm_smat *sD, int di, int dj);
// LAPACK
void blasfeo_cm_spotrf_l(int m, struct blasfeo_cm_smat *sC, int ci, int cj, struct blasfeo_cm_smat *sD, int di, int dj);
void blasfeo_cm_spotrf_u(int m, struct blasfeo_cm_smat *sC, int ci, int cj, struct blasfeo_cm_smat *sD, int di, int dj);
#endif
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_S_BLASFEO_API_H_

View File

@@ -1,135 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_BLASFEO_API_REF_H_
#define BLASFEO_S_BLASFEO_API_REF_H_
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
// expose reference BLASFEO for testing
// --- level 1
void blasfeo_saxpy_ref(int kmax, float alpha, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sy, int yi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_saxpby_ref(int kmax, float alpha, struct blasfeo_svec_ref *sx, int xi, float beta, struct blasfeo_svec_ref *sy, int yi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_svecmul_ref(int m, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sy, int yi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_svecmulacc_ref(int m, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sy, int yi, struct blasfeo_svec_ref *sz, int zi);
float blasfeo_svecmuldot_ref(int m, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sy, int yi, struct blasfeo_svec_ref *sz, int zi);
float blasfeo_sdot_ref(int m, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sy, int yi);
void blasfeo_srotg_ref(float a, float b, float *c, float *s);
void blasfeo_scolrot_ref(int m, struct blasfeo_smat_ref *sA, int ai, int aj0, int aj1, float c, float s);
void blasfeo_srowrot_ref(int m, struct blasfeo_smat_ref *sA, int ai0, int ai1, int aj, float c, float s);
// --- level 2
// dense
void blasfeo_sgemv_n_ref(int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, float beta, struct blasfeo_svec_ref *sy, int yi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_sgemv_t_ref(int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, float beta, struct blasfeo_svec_ref *sy, int yi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strsv_lnn_mn_ref(int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strsv_ltn_mn_ref(int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strsv_lnn_ref(int m, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strsv_lnu_ref(int m, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strsv_ltn_ref(int m, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strsv_ltu_ref(int m, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strsv_unn_ref(int m, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strsv_utn_ref(int m, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strmv_unn_ref(int m, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strmv_utn_ref(int m, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strmv_lnn_ref(int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strmv_ltn_ref(int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strmv_lnu_ref(int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_strmv_ltu_ref(int m, int n, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, struct blasfeo_svec_ref *sz, int zi);
void blasfeo_sgemv_nt_ref(int m, int n, float alpha_n, float alpha_t, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx_n, int xi_n, struct blasfeo_svec_ref *sx_t, int xi_t, float beta_n, float beta_t, struct blasfeo_svec_ref *sy_n, int yi_n, struct blasfeo_svec_ref *sy_t, int yi_t, struct blasfeo_svec_ref *sz_n, int zi_n, struct blasfeo_svec_ref *sz_t, int zi_t);
void blasfeo_ssymv_l_ref(int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sx, int xi, float beta, struct blasfeo_svec_ref *sy, int yi, struct blasfeo_svec_ref *sz, int zi);
// diagonal
void blasfeo_sgemv_d_ref(int m, float alpha, struct blasfeo_svec_ref *sA, int ai, struct blasfeo_svec_ref *sx, int xi, float beta, struct blasfeo_svec_ref *sy, int yi, struct blasfeo_svec_ref *sz, int zi);
// --- level 3
// dense
void blasfeo_sgemm_nn_ref( int m, int n, int k, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_sgemm_nt_ref( int m, int n, int k, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_sgemm_tn_ref(int m, int n, int k, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_sgemm_tt_ref(int m, int n, int k, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_ssyrk_ln_mn_ref( int m, int n, int k, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_ssyrk_ln_ref( int m, int k, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_ssyrk_lt_ref( int m, int k, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_ssyrk_un_ref( int m, int k, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_ssyrk_ut_ref( int m, int k, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_strmm_rutn_ref( int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_strmm_rlnn_ref( int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_strsm_rltn_ref( int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_strsm_rltu_ref( int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_strsm_rutn_ref( int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_strsm_llnu_ref( int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_strsm_lunn_ref( int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, struct blasfeo_smat_ref *sD, int di, int dj);
// diagonal
void dgemm_diag_left_lib_ref(int m, int n, float alpha, float *dA, float *pB, int sdb, float beta, float *pC, int sdc, float *pD, int sdd);
void blasfeo_sgemm_dn_ref(int m, int n, float alpha, struct blasfeo_svec_ref *sA, int ai, struct blasfeo_smat_ref *sB, int bi, int bj, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_sgemm_nd_ref(int m, int n, float alpha, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_svec_ref *sB, int bi, float beta, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
// --- lapack
void blasfeo_sgetrf_nopivot_ref(int m, int n, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_sgetrf_rowpivot_ref(int m, int n, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj, int *ipiv);
void blasfeo_spotrf_l_ref(int m, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_spotrf_l_mn_ref(int m, int n, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_ssyrk_dpotrf_ln_ref(int m, int k, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_ssyrk_dpotrf_ln_mn_ref(int m, int n, int k, struct blasfeo_smat_ref *sA, int ai, int aj, struct blasfeo_smat_ref *sB, int bi, int bj, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_sgetrf_nopivot_ref(int m, int n, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj);
void blasfeo_sgetrf_rowpivot_ref(int m, int n, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj, int *ipiv);
void blasfeo_sgeqrf_ref(int m, int n, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj, void *work);
void blasfeo_sgelqf_ref(int m, int n, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj, void *work);
void blasfeo_sgelqf_pd_ref(int m, int n, struct blasfeo_smat_ref *sC, int ci, int cj, struct blasfeo_smat_ref *sD, int di, int dj, void *work);
void blasfeo_sgelqf_pd_la_ref(int m, int n1, struct blasfeo_smat_ref *sL, int li, int lj, struct blasfeo_smat_ref *sA, int ai, int aj, void *work);
void blasfeo_sgelqf_pd_lla_ref(int m, int n1, struct blasfeo_smat_ref *sL0, int l0i, int l0j, struct blasfeo_smat_ref *sL1, int l1i, int l1j, struct blasfeo_smat_ref *sA, int ai, int aj, void *work);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_S_BLASFEO_API_REF_H_

View File

@@ -1,252 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_BLASFEO_REF_API_H_
#define BLASFEO_S_BLASFEO_REF_API_H_
#include "blasfeo_common.h"
#ifdef __cplusplus
extern "C" {
#endif
//
// level 1 BLAS
//
// z = y + alpha*x
void blasfeo_ref_saxpy(int kmax, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z = beta*y + alpha*x
void blasfeo_ref_saxpby(int kmax, float alpha, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z = x .* y
void blasfeo_ref_svecmul(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z += x .* y
void blasfeo_ref_svecmulacc(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z = x .* y, return sum(z) = x^T * y
float blasfeo_ref_svecmuldot(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// return x^T * y
float blasfeo_ref_sdot(int m, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi);
// construct givens plane rotation
void blasfeo_ref_srotg(float a, float b, float *c, float *s);
// apply plane rotation [a b] [c -s; s; c] to the aj0 and aj1 columns of A at row index ai
void blasfeo_ref_scolrot(int m, struct blasfeo_smat *sA, int ai, int aj0, int aj1, float c, float s);
// apply plane rotation [c s; -s c] [a; b] to the ai0 and ai1 rows of A at column index aj
void blasfeo_ref_srowrot(int m, struct blasfeo_smat *sA, int ai0, int ai1, int aj, float c, float s);
//
// level 2 BLAS
//
// dense
// z <= beta * y + alpha * A * x
void blasfeo_ref_sgemv_n(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z <= beta * y + alpha * A' * x
void blasfeo_ref_sgemv_t(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z <= inv( A ) * x, A (m)x(n)
void blasfeo_ref_strsv_lnn_mn(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(n)
void blasfeo_ref_strsv_ltn_mn(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A ) * x, A (m)x(m) lower, not_transposed, not_unit
void blasfeo_ref_strsv_lnn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A ) * x, A (m)x(m) lower, not_transposed, unit
void blasfeo_ref_strsv_lnu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) lower, transposed, not_unit
void blasfeo_ref_strsv_ltn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) lower, transposed, unit
void blasfeo_ref_strsv_ltu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) upper, not_transposed, not_unit
void blasfeo_ref_strsv_unn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= inv( A' ) * x, A (m)x(m) upper, transposed, not_unit
void blasfeo_ref_strsv_utn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A * x ; A lower triangular
void blasfeo_ref_strmv_lnn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A * x ; A lower triangular, unit diagonal
void blasfeo_ref_strmv_lnu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A' * x ; A lower triangular
void blasfeo_ref_strmv_ltn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A' * x ; A lower triangular, unit diagonal
void blasfeo_ref_strmv_ltu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= beta * y + alpha * A * x ; A upper triangular
void blasfeo_ref_strmv_unn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z <= A' * x ; A upper triangular
void blasfeo_ref_strmv_utn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi);
// z_n <= beta_n * y_n + alpha_n * A * x_n
// z_t <= beta_t * y_t + alpha_t * A' * x_t
void blasfeo_ref_sgemv_nt(int m, int n, float alpha_n, float alpha_t, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx_n, int xi_n, struct blasfeo_svec *sx_t, int xi_t, float beta_n, float beta_t, struct blasfeo_svec *sy_n, int yi_n, struct blasfeo_svec *sy_t, int yi_t, struct blasfeo_svec *sz_n, int zi_n, struct blasfeo_svec *sz_t, int zi_t);
// z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed
void blasfeo_ref_ssymv_l(int m, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
void blasfeo_ref_ssymv_l_mn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// z <= beta * y + alpha * A * x, where A is symmetric and only the upper triangular patr of A is accessed
void blasfeo_ref_ssymv_u(int m, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
// D = C + alpha * x * y^T
void blasfeo_ref_sger(int m, int n, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// diagonal
// z <= beta * y + alpha * A * x, A diagonal
void blasfeo_ref_sgemv_d(int m, float alpha, struct blasfeo_svec *sA, int ai, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi);
//
// level 3 BLAS
//
// dense
// D <= beta * C + alpha * A * B
void blasfeo_ref_sgemm_nn(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T
void blasfeo_ref_sgemm_nt(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B
void blasfeo_ref_sgemm_tn(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B
void blasfeo_ref_sgemm_tt(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T ; C, D lower triangular
void blasfeo_ref_ssyrk_ln(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_ref_ssyrk_ln_mn(int m, int n, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B ; C, D lower triangular
void blasfeo_ref_ssyrk_lt(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T ; C, D upper triangular
void blasfeo_ref_ssyrk_un(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B ; C, D upper triangular
void blasfeo_ref_ssyrk_ut(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^T ; B upper triangular
void blasfeo_ref_strmm_rutn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A ; A lower triangular
void blasfeo_ref_strmm_rlnn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A lower triangular employint explicit inverse of diagonal
void blasfeo_ref_strsm_llnn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A lower triangular with unit diagonal
void blasfeo_ref_strsm_llnu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A lower triangular employint explicit inverse of diagonal
void blasfeo_ref_strsm_lltn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A lower triangular with unit diagonal
void blasfeo_ref_strsm_lltu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A upper triangular employing explicit inverse of diagonal
void blasfeo_ref_strsm_lunn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-1} * B , with A upper triangular withunit diagonal
void blasfeo_ref_strsm_lunu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A upper triangular employing explicit inverse of diagonal
void blasfeo_ref_strsm_lutn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A^{-T} * B , with A upper triangular withunit diagonal
void blasfeo_ref_strsm_lutu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A lower triangular employing explicit inverse of diagonal
void blasfeo_ref_strsm_rlnn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A lower triangular with unit diagonal
void blasfeo_ref_strsm_rlnu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A lower triangular employing explicit inverse of diagonal
void blasfeo_ref_strsm_rltn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A lower triangular with unit diagonal
void blasfeo_ref_strsm_rltu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A upper triangular employing explicit inverse of diagonal
void blasfeo_ref_strsm_runn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-1} , with A upper triangular with unit diagonal
void blasfeo_ref_strsm_runu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A upper triangular employing explicit inverse of diagonal
void blasfeo_ref_strsm_rutn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * B * A^{-T} , with A upper triangular with unit diagonal
void blasfeo_ref_strsm_rutu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T + alpha * B * A^T ; C, D lower triangular
void blasfeo_ref_ssyr2k_ln(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B + alpha * B^T * A ; C, D lower triangular
void blasfeo_ref_ssyr2k_lt(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A * B^T + alpha * B * A^T ; C, D upper triangular
void blasfeo_ref_ssyr2k_un(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= beta * C + alpha * A^T * B + alpha * B^T * A ; C, D upper triangular
void blasfeo_ref_ssyr2k_ut(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// diagonal
// D <= alpha * A * B + beta * C, with A diagonal (stored as strvec)
void sgemm_diag_left_ib(int m, int n, float alpha, float *dA, float *pB, int sdb, float beta, float *pC, int sdc, float *pD, int sdd);
void blasfeo_ref_sgemm_dn(int m, int n, float alpha, struct blasfeo_svec *sA, int ai, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= alpha * A * B + beta * C, with B diagonal (stored as strvec)
void blasfeo_ref_sgemm_nd(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sB, int bi, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
//
// LAPACK
//
// D <= chol( C ) ; C, D lower triangular
void blasfeo_ref_spotrf_l(int m, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_ref_spotrf_l_mn(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= chol( C ) ; C, D upper triangular
void blasfeo_ref_spotrf_u(int m, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= chol( C + A * B' ) ; C, D lower triangular
void blasfeo_ref_ssyrk_spotrf_ln(int m, int k, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
void blasfeo_ref_ssyrk_spotrf_ln_mn(int m, int n, int k, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= lu( C ) ; no pivoting
void blasfeo_ref_sgetrf_np(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj);
// D <= lu( C ) ; row pivoting
void blasfeo_ref_sgetrf_rp(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, int *ipiv);
// D <= qr( C )
void blasfeo_ref_sgeqrf(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, void *work);
int blasfeo_ref_sgeqrf_worksize(int m, int n); // in bytes
// D <= Q factor, where C is the output of the LQ factorization
int blasfeo_ref_sorglq_worksize(int m, int n, int k); // in bytes
void blasfeo_ref_sorglq(int m, int n, int k, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, void *work);
// D <= lq( C )
void blasfeo_ref_sgelqf(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, void *work);
int blasfeo_ref_sgelqf_worksize(int m, int n); // in bytes
// D <= lq( C ), positive diagonal elements
void blasfeo_ref_sgelqf_pd(int m, int n, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj, void *work);
// [L, A] <= lq( [L, A] ), positive diagonal elements, array of matrices, with
// L lower triangular, of size (m)x(m)
// A full, of size (m)x(n1)
void blasfeo_ref_sgelqf_pd_la(int m, int n1, struct blasfeo_smat *sL, int li, int lj, struct blasfeo_smat *sA, int ai, int aj, void *work);
// [L, L, A] <= lq( [L, L, A] ), positive diagonal elements, array of matrices, with:
// L lower triangular, of size (m)x(m)
// A full, of size (m)x(n1)
void blasfeo_ref_sgelqf_pd_lla(int m, int n1, struct blasfeo_smat *sL0, int l0i, int l0j, struct blasfeo_smat *sL1, int l1i, int l1j, struct blasfeo_smat *sA, int ai, int aj, void *work);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_S_BLASFEO_REF_API_H_

View File

@@ -1,692 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_S_KERNEL_H_
#define BLASFEO_S_KERNEL_H_
#ifdef __cplusplus
extern "C" {
#endif
// utils
void blasfeo_align_2MB(void *ptr, void **ptr_align);
void blasfeo_align_4096_byte(void *ptr, void **ptr_align);
void blasfeo_align_64_byte(void *ptr, void **ptr_align);
//
// lib8
//
// 24x4
void kernel_sgemm_nt_24x4_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_sgemm_nt_24x4_vs_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int km, int kn);
void kernel_sgemm_nt_24x4_gen_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_sgemm_nn_24x4_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_sgemm_nn_24x4_vs_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd, int km, int kn);
void kernel_sgemm_nn_24x4_gen_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_ssyrk_nt_l_24x4_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_ssyrk_nt_l_24x4_vs_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int km, int kn);
void kernel_ssyrk_nt_l_20x4_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_ssyrk_nt_l_20x4_vs_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int km, int kn);
void kernel_spotrf_nt_l_24x4_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_spotrf_nt_l_24x4_vs_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int km, int kn);
void kernel_spotrf_nt_l_20x4_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_spotrf_nt_l_20x4_vs_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int km, int kn);
void kernel_strsm_nt_rl_inv_24x4_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E);
void kernel_strsm_nt_rl_inv_24x4_vs_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E, int km, int kn);
void kernel_sgemm_strsm_nt_rl_inv_24x4_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E);
void kernel_sgemm_strsm_nt_rl_inv_24x4_vs_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_20x4_vs_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_20x4_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_ssyrk_spotrf_nt_l_24x4_vs_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_24x4_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_strmm_nn_rl_24x4_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *D, int sdd);
void kernel_strmm_nn_rl_24x4_vs_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *D, int sdd, int km, int kn);
// 16x8
void kernel_sgemm_nt_16x8_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, float *spil);
// 16x4
void kernel_sgemm_nt_16x4_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_sgemm_nt_16x4_vs_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int km, int kn);
void kernel_sgemm_nt_16x4_gen_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_sgemm_nn_16x4_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_sgemm_nn_16x4_vs_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd, int km, int kn);
void kernel_sgemm_nn_16x4_gen_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_ssyrk_nt_l_16x4_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_ssyrk_nt_l_16x4_vs_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int km, int kn);
void kernel_ssyrk_nt_l_12x4_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_ssyrk_nt_l_12x4_vs_lib8(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int km, int kn);
void kernel_spotrf_nt_l_16x4_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_spotrf_nt_l_16x4_vs_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int km, int kn);
void kernel_spotrf_nt_l_12x4_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_spotrf_nt_l_12x4_vs_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int km, int kn);
void kernel_strsm_nt_rl_inv_16x4_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E);
void kernel_strsm_nt_rl_inv_16x4_vs_lib8(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E, int km, int kn);
void kernel_sgemm_strsm_nt_rl_inv_16x4_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E);
void kernel_sgemm_strsm_nt_rl_inv_16x4_vs_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_12x4_vs_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_12x4_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_ssyrk_spotrf_nt_l_16x4_vs_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_16x4_lib8(int kp, float *Ap, int sdap, float *Bp, int km_, float *Am, int sdam, float *Bm, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_strmm_nn_rl_16x4_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *D, int sdd);
void kernel_strmm_nn_rl_16x4_vs_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *D, int sdd, int km, int kn);
void kernel_strmm_nn_rl_16x4_gen_lib8(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
// 8x8
void kernel_sgemm_nt_8x8_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D);
void kernel_sgemm_nt_8x8_vs_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D, int km, int kn);
void kernel_sgemm_nt_8x8_gen_lib8(int k, float *alpha, float *A, float *B, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_sgemm_nn_8x8_lib8(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *beta, float *C, float *D);
void kernel_sgemm_nn_8x8_vs_lib8(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *beta, float *C, float *D, int km, int kn);
void kernel_sgemm_nn_8x8_gen_lib8(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_ssyrk_nt_l_8x8_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D);
void kernel_ssyrk_nt_l_8x8_vs_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D, int km, int kn);
void kernel_spotrf_nt_l_8x8_lib8(int k, float *A, float *B, float *C, float *D, float *inv_diag_D);
void kernel_spotrf_nt_l_8x8_vs_lib8(int k, float *A, float *B, float *C, float *D, float *inv_diag_D, int km, int kn);
void kernel_strsm_nt_rl_inv_8x8_lib8(int k, float *A, float *B, float *C, float *D, float *E, float *inv_diag_E);
void kernel_strsm_nt_rl_inv_8x8_vs_lib8(int k, float *A, float *B, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
void kernel_sgemm_strsm_nt_rl_inv_8x8_lib8(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *E, float *inv_diag_E);
void kernel_sgemm_strsm_nt_rl_inv_8x8_vs_lib8(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_8x8_vs_lib8(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *inv_diag_D, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_8x8_lib8(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *inv_diag_D);
// 8x4
void kernel_sgemm_nt_8x4_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D);
void kernel_sgemm_nt_8x4_vs_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D, int km, int kn);
void kernel_sgemm_nt_8x4_gen_lib8(int k, float *alpha, float *A, float *B, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_sgemm_nn_8x4_lib8(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *beta, float *C, float *D);
void kernel_sgemm_nn_8x4_vs_lib8(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *beta, float *C, float *D, int km, int kn);
void kernel_sgemm_nn_8x4_gen_lib8(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
//void kernel_ssyrk_nt_l_8x4_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D);
void kernel_ssyrk_nt_l_8x4_vs_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D, int km, int kn);
void kernel_spotrf_nt_l_8x4_lib8(int k, float *A, float *B, float *C, float *D, float *inv_diag_D);
void kernel_spotrf_nt_l_8x4_vs_lib8(int k, float *A, float *B, float *C, float *D, float *inv_diag_D, int km, int kn);
void kernel_strsm_nt_rl_inv_8x4_lib8(int k, float *A, float *B, float *C, float *D, float *E, float *inv_diag_E);
void kernel_strsm_nt_rl_inv_8x4_vs_lib8(int k, float *A, float *B, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
void kernel_sgemm_strsm_nt_rl_inv_8x4_lib8(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *E, float *inv_diag_E);
void kernel_sgemm_strsm_nt_rl_inv_8x4_vs_lib8(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_8x4_vs_lib8(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *inv_diag_D, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_8x4_lib8(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *inv_diag_D);
void kernel_strmm_nn_rl_8x4_lib8(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *D);
void kernel_strmm_nn_rl_8x4_vs_lib8(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *D, int km, int kn);
void kernel_strmm_nn_rl_8x4_gen_lib8(int k, float *alpha, float *A, int offsetB, float *B, int sdb, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_strmm_nt_ru_8x4_lib8(int k, float *alpha, float *A, float *B, float *D);
void kernel_strmm_nt_ru_8x4_vs_lib8(int k, float *alpha, float *A, float *B, float *D, int km, int kn);
// 4x8
void kernel_sgemm_nt_4x8_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D);
void kernel_sgemm_nt_4x8_vs_lib8(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D, int km, int kn);
void kernel_sgemm_nt_4x8_gen_lib8(int k, float *alpha, float *A, float *B, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_strsm_nt_rl_inv_4x8_lib8(int k, float *A, float *B, float *C, float *D, float *E, float *inv_diag_E);
void kernel_strsm_nt_rl_inv_4x8_vs_lib8(int k, float *A, float *B, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
// 8
void kernel_sgemv_n_8_lib8(int k, float *alpha, float *A, float *x, float *beta, float *y, float *z);
void kernel_sgemv_n_8_vs_lib8(int k, float *alpha, float *A, float *x, float *beta, float *y, float *z, int k1);
void kernel_sgemv_n_8_gen_lib8(int kmax, float *alpha, float *A, float *x, float *beta, float *y, float *z, int k0, int k1);
void kernel_sgemv_t_8_lib8(int k, float *alpha, int offsetA, float *A, int sda, float *x, float *beta, float *y, float *z);
void kernel_sgemv_t_8_vs_lib8(int k, float *alpha, int offsetA, float *A, int sda, float *x, float *beta, float *y, float *z, int k1);
void kernel_sgemv_t_4_lib8(int k, float *alpha, int offsetA, float *A, int sda, float *x, float *beta, float *y, float *z);
void kernel_sgemv_t_4_vs_lib8(int k, float *alpha, int offsetA, float *A, int sda, float *x, float *beta, float *y, float *z, int k1);
void kernel_strsv_ln_inv_8_lib8(int k, float *A, float *inv_diag_A, float *x, float *y, float *z);
void kernel_strsv_ln_inv_8_vs_lib8(int k, float *A, float *inv_diag_A, float *x, float *y, float *z, int km, int kn);
void kernel_strsv_lt_inv_8_lib8(int k, float *A, int sda, float *inv_diag_A, float *x, float *y, float *z);
void kernel_strsv_lt_inv_8_vs_lib8(int k, float *A, int sda, float *inv_diag_A, float *x, float *y, float *z, int km, int kn);
void kernel_sgemv_nt_4_lib8(int kmax, float *alpha_n, float *alpha_t, float *A, int sda, float *x_n, float *x_t, float *beta_t, float *y_t, float *z_n, float *z_t);
void kernel_sgemv_nt_4_vs_lib8(int kmax, float *alpha_n, float *alpha_t, float *A, int sda, float *x_n, float *x_t, float *beta_t, float *y_t, float *z_n, float *z_t, int km);
void kernel_ssymv_l_4l_lib8(int kmax, float *alpha, float *A, int sda, float *x, float *z);
void kernel_ssymv_l_4r_lib8(int kmax, float *alpha, float *A, int sda, float *x, float *z);
void kernel_ssymv_l_4l_gen_lib8(int kmax, float *alpha, int offA, float *A, int sda, float *x, float *z, int km);
void kernel_ssymv_l_4r_gen_lib8(int kmax, float *alpha, int offA, float *A, int sda, float *x, float *z, int km);
// -------- aux
// ---- copy
// lib4
//
void kernel_sgecpsc_4_0_lib4(int kmax, float *alpha, float *A, float *B);
void kernel_sgecp_4_0_lib4(int kmax, float *A, float *B);
void kernel_sgecpsc_4_1_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgecp_4_1_lib4(int kmax, float *A0, int sda, float *B);
void kernel_sgecpsc_4_2_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgecp_4_2_lib4(int kmax, float *A0, int sda, float *B);
void kernel_sgecpsc_4_3_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgecp_4_3_lib4(int kmax, float *A0, int sda, float *B);
void kernel_sgecpsc_3_0_lib4(int kmax, float *alpha, float *A, float *B);
void kernel_sgecp_3_0_lib4(int kmax, float *A, float *B);
void kernel_sgecpsc_3_2_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgecp_3_2_lib4(int kmax, float *A0, int sda, float *B);
void kernel_sgecpsc_3_3_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgecp_3_3_lib4(int kmax, float *A0, int sda, float *B);
void kernel_sgecpsc_2_0_lib4(int kmax, float *alpha, float *A, float *B);
void kernel_sgecp_2_0_lib4(int kmax, float *A, float *B);
void kernel_sgecpsc_2_3_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgecp_2_3_lib4(int kmax, float *A0, int sda, float *B);
void kernel_sgecpsc_1_0_lib4(int kmax, float *alpha, float *A, float *B);
void kernel_sgecp_1_0_lib4(int kmax, float *A, float *B);
// lib8
//
void kernel_sgecp_8_0_lib8(int m, float *A, float *B);
void kernel_sgecp_8_0_gen_lib8(int m, float *A, float *B, int m1);
void kernel_sgecp_8_0_gen_u_lib8(int m, float *A, float *B, int m1);
void kernel_sgesc_8_0_lib8(int m, float *alpha, float *A);
void kernel_sgesc_8_0_gen_lib8(int m, float *alpha, float *A, int m1);
void kernel_sgesc_8_0_gen_u_lib8(int m, float *alpha, float *A, int m1);
void kernel_sgecpsc_8_0_lib8(int m, float *alpha, float *A, float *B);
void kernel_sgecpsc_8_0_gen_lib8(int m, float *alpha, float *A, float *B, int m1);
void kernel_sgecpsc_8_0_gen_u_lib8(int m, float *alpha, float *A, float *B, int m1);
void kernel_sgecp_8_1_lib8(int m, float *A, int sda, float *B);
void kernel_sgecp_8_1_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgecpsc_8_1_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgecpsc_8_1_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgecp_8_2_lib8(int m, float *A, int sda, float *B);
void kernel_sgecp_8_2_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgecpsc_8_2_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgecpsc_8_2_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgecp_8_3_lib8(int m, float *A, int sda, float *B);
void kernel_sgecp_8_3_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgecpsc_8_3_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgecpsc_8_3_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgecp_8_4_lib8(int m, float *A, int sda, float *B);
void kernel_sgecp_8_4_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgecpsc_8_4_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgecpsc_8_4_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgecp_8_5_lib8(int m, float *A, int sda, float *B);
void kernel_sgecp_8_5_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgecpsc_8_5_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgecpsc_8_5_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgecp_8_6_lib8(int m, float *A, int sda, float *B);
void kernel_sgecp_8_6_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgecpsc_8_6_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgecpsc_8_6_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgecp_8_7_lib8(int m, float *A, int sda, float *B);
void kernel_sgecp_8_7_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgecpsc_8_7_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgecpsc_8_7_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
// transpose
//
void kernel_sgetr_8_0_lib8(int m, float *A, int sda, float *B);
void kernel_sgetr_8_0_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgetr_8_1_lib8(int m, float *A, int sda, float *B);
void kernel_sgetr_8_1_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgetr_8_2_lib8(int m, float *A, int sda, float *B);
void kernel_sgetr_8_2_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgetr_8_3_lib8(int m, float *A, int sda, float *B);
void kernel_sgetr_8_3_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgetr_8_4_lib8(int m, float *A, int sda, float *B);
void kernel_sgetr_8_4_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgetr_8_5_lib8(int m, float *A, int sda, float *B);
void kernel_sgetr_8_5_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgetr_8_6_lib8(int m, float *A, int sda, float *B);
void kernel_sgetr_8_6_gen_lib8(int m, float *A, int sda, float *B, int m1);
void kernel_sgetr_8_7_lib8(int m, float *A, int sda, float *B);
void kernel_sgetr_8_7_gen_lib8(int m, float *A, int sda, float *B, int m1);
// add
//
void kernel_sgead_8_0_lib8(int m, float *alpha, float *A, float *B);
void kernel_sgead_8_0_gen_lib8(int m, float *alpha, float *A, float *B, int m1);
void kernel_sgead_8_1_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgead_8_1_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgead_8_2_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgead_8_2_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgead_8_3_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgead_8_3_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgead_8_4_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgead_8_4_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgead_8_5_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgead_8_5_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgead_8_6_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgead_8_6_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
void kernel_sgead_8_7_lib8(int m, float *alpha, float *A, int sda, float *B);
void kernel_sgead_8_7_gen_lib8(int m, float *alpha, float *A, int sda, float *B, int m1);
//
// lib4
//
// level 2 BLAS
// 4
void kernel_sgemv_n_4_lib4(int k, float *alpha, float *A, float *x, float *beta, float *y, float *z);
void kernel_sgemv_n_4_vs_lib4(int k, float *alpha, float *A, float *x, float *beta, float *y, float *z, int k1);
void kernel_sgemv_n_4_gen_lib4(int kmax, float *alpha, float *A, float *x, float *beta, float *y, float *z, int k0, int k1);
void kernel_sgemv_t_4_lib4(int k, float *alpha, int offsetA, float *A, int sda, float *x, float *beta, float *y, float *z);
void kernel_sgemv_t_4_vs_lib4(int k, float *alpha, int offsetA, float *A, int sda, float *x, float *beta, float *y, float *z, int k1);
void kernel_strsv_ln_inv_4_lib4(int k, float *A, float *inv_diag_A, float *x, float *y, float *z);
void kernel_strsv_ln_inv_4_vs_lib4(int k, float *A, float *inv_diag_A, float *x, float *y, float *z, int km, int kn);
void kernel_strsv_lt_inv_4_lib4(int k, float *A, int sda, float *inv_diag_A, float *x, float *y, float *z);
void kernel_strsv_lt_inv_3_lib4(int k, float *A, int sda, float *inv_diag_A, float *x, float *y, float *z);
void kernel_strsv_lt_inv_2_lib4(int k, float *A, int sda, float *inv_diag_A, float *x, float *y, float *z);
void kernel_strsv_lt_inv_1_lib4(int k, float *A, int sda, float *inv_diag_A, float *x, float *y, float *z);
void kernel_strmv_un_4_lib4(int k, float *A, float *x, float *z);
void kernel_strmv_ut_4_lib4(int k, float *A, int sda, float *x, float *z);
void kernel_strmv_ut_4_vs_lib4(int k, float *A, int sda, float *x, float *z, int km);
void kernel_sgemv_nt_6_lib4(int kmax, float *alpha_n, float *alpha_t, float *A, int sda, float *x_n, float *x_t, float *beta_t, float *y_t, float *z_n, float *z_t);
void kernel_sgemv_nt_4_lib4(int kmax, float *alpha_n, float *alpha_t, float *A, int sda, float *x_n, float *x_t, float *beta_t, float *y_t, float *z_n, float *z_t);
void kernel_sgemv_nt_4_vs_lib4(int kmax, float *alpha_n, float *alpha_t, float *A, int sda, float *x_n, float *x_t, float *beta_t, float *y_t, float *z_n, float *z_t, int km);
void kernel_ssymv_l_4_lib4(int kmax, float *alpha, float *A, int sda, float *x_n, float *z_n);
void kernel_ssymv_l_4_gen_lib4(int kmax, float *alpha, int offA, float *A, int sda, float *x_n, float *z_n, int km);
// level 3 BLAS
// 12x4
void kernel_sgemm_nt_16x4_lib4(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd); //
void kernel_sgemm_nt_16x4_vs_lib4(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int m1, int n1); //
void kernel_strsm_nt_rl_inv_16x4_lib4(int k, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E);
void kernel_strsm_nt_rl_inv_16x4_vs_lib4(int k, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E, int m1, int n1);
// 12x4
void kernel_sgemm_nt_12x4_lib4(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd); //
void kernel_sgemm_nt_12x4_vs_lib4(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int m1, int n1); //
void kernel_strsm_nt_rl_inv_12x4_lib4(int k, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E);
void kernel_strsm_nt_rl_inv_12x4_vs_lib4(int k, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E, int m1, int n1);
// 8x8
void kernel_sgemm_nt_8x8_lib4(int k, float *alpha, float *A, int sda, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd); //
void kernel_sgemm_nt_8x8_vs_lib4(int k, float *alpha, float *A, int sda, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd, int m1, int n1); //
void kernel_sgemm_nn_8x8_lib4(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd); //
void kernel_sgemm_nn_8x8_vs_lib4(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd, int m1, int n1); //
// 8x4
void kernel_sgemm_nt_8x4_lib4(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd); //
void kernel_sgemm_nt_8x4_vs_lib4(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int m1, int n1); //
void kernel_sgemm_nn_8x4_lib4(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd); //
void kernel_sgemm_nn_8x4_vs_lib4(int k, float *alpha, float *A, int sda, int offsetB, float *B, int sdb, float *beta, float *C, int sdc, float *D, int sdd, int m1, int n1); //
void kernel_ssyrk_nt_l_8x4_lib4(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd); //
void kernel_ssyrk_nt_l_8x4_vs_lib4(int k, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, int km, int kn); //
void kernel_strsm_nt_rl_inv_8x4_lib4(int k, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E);
void kernel_strsm_nt_rl_inv_8x4_vs_lib4(int k, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd, float *E, float *inv_diag_E, int m1, int n1);
// 4x4
void kernel_sgemm_nt_4x4_lib4(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D); //
void kernel_sgemm_nt_4x4_vs_lib4(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D, int km, int kn); //
void kernel_sgemm_nt_4x4_gen_lib4(int k, float *alpha, float *A, float *B, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int k0, int k1);
void kernel_sgemm_nn_4x4_lib4(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *beta, float *C, float *D); //
void kernel_sgemm_nn_4x4_vs_lib4(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *beta, float *C, float *D, int km, int kn); //
void kernel_sgemm_nn_4x4_gen_lib4(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1); //
void kernel_ssyrk_nt_l_4x4_lib4(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D); //
void kernel_ssyrk_nt_l_4x4_vs_lib4(int k, float *alpha, float *A, float *B, float *beta, float *C, float *D, int km, int kn); //
void kernel_ssyrk_nt_l_4x4_gen_lib4(int k, float *alpha, float *A, float *B, float *beta, int offsetC, float *C, int sdc, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_strmm_nt_ru_4x4_lib4(int k, float *alpha, float *A, float *B, float *D); //
void kernel_strmm_nt_ru_4x4_vs_lib4(int k, float *alpha, float *A, float *B, float *D, int km, int kn); //
void kernel_strmm_nn_rl_4x4_lib4(int k, float *alpha, float *A, int offsetB, float *B, int sdb, float *D);
void kernel_strmm_nn_rl_4x4_gen_lib4(int k, float *alpha, float *A, int offsetB, float *B, int sdb, int offsetD, float *D, int sdd, int m0, int m1, int n0, int n1);
void kernel_strsm_nt_rl_inv_4x4_lib4(int k, float *A, float *B, float *beta, float *C, float *D, float *E, float *inv_diag_E);
void kernel_strsm_nt_rl_inv_4x4_vs_lib4(int k, float *A, float *B, float *beta, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
void kernel_strsm_nt_rl_one_4x4_lib4(int k, float *A, float *B, float *beta, float *C, float *D, float *E);
void kernel_strsm_nt_rl_one_4x4_vs_lib4(int k, float *A, float *B, float *beta, float *C, float *D, float *E, int km, int kn);
void kernel_strsm_nt_ru_inv_4x4_lib4(int k, float *A, float *B, float *beta, float *C, float *D, float *E, float *inv_diag_E);
void kernel_strsm_nt_ru_inv_4x4_vs_lib4(int k, float *A, float *B, float *beta, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
void kernel_strsm_nt_ru_one_4x4_lib4(int k, float *A, float *B, float *beta, float *C, float *D, float *E);
void kernel_strsm_nt_ru_one_4x4_vs_lib4(int k, float *A, float *B, float *beta, float *C, float *D, float *E, int km, int kn);
void kernel_strsm_nn_ru_inv_4x4_lib4(int k, float *A, float *B, int sdb, float *beta, float *C, float *D, float *E, float *inv_diag_E);
void kernel_strsm_nn_ru_inv_4x4_vs_lib4(int k, float *A, float *B, int sdb, float *beta, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
void kernel_strsm_nn_ll_one_4x4_lib4(int k, float *A, float *B, int sdb, float *C, float *D, float *E);
void kernel_strsm_nn_ll_one_4x4_vs_lib4(int k, float *A, float *B, int sdb, float *C, float *D, float *E, int km, int kn);
void kernel_strsm_nn_lu_inv_4x4_lib4(int kmax, float *A, float *B, int sdb, float *C, float *D, float *E, float *inv_diag_E);
void kernel_strsm_nn_lu_inv_4x4_vs_lib4(int kmax, float *A, float *B, int sdb, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
// diag
void kernel_sgemm_diag_right_4_a0_lib4(int kmax, float *alpha, float *A, int sda, float *B, float *D, int sdd);
void kernel_sgemm_diag_right_4_lib4(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_sgemm_diag_right_3_lib4(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_sgemm_diag_right_2_lib4(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_sgemm_diag_right_1_lib4(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int sdc, float *D, int sdd);
void kernel_sgemm_diag_left_4_a0_lib4(int kmax, float *alpha, float *A, float *B, float *D);
void kernel_sgemm_diag_left_4_lib4(int kmax, float *alpha, float *A, float *B, float *beta, float *C, float *D);
void kernel_sgemm_diag_left_3_lib4(int kmax, float *alpha, float *A, float *B, float *beta, float *C, float *D);
void kernel_sgemm_diag_left_2_lib4(int kmax, float *alpha, float *A, float *B, float *beta, float *C, float *D);
void kernel_sgemm_diag_left_1_lib4(int kmax, float *alpha, float *A, float *B, float *beta, float *C, float *D);
// LAPACK
// 16x4
void kernel_spotrf_nt_l_16x4_lib4(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_spotrf_nt_l_16x4_vs_lib4(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int m1, int n1);
// 12x4
void kernel_spotrf_nt_l_12x4_lib4(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_spotrf_nt_l_12x4_vs_lib4(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int m1, int n1);
// 8x4
void kernel_spotrf_nt_l_8x4_lib4(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D);
void kernel_spotrf_nt_l_8x4_vs_lib4(int k, float *A, int sda, float *B, float *C, int sdc, float *D, int sdd, float *inv_diag_D, int m1, int n1);
// 4x4
void kernel_spotrf_nt_l_4x4_lib4(int k, float *A, float *B, float *C, float *D, float *inv_diag_D);
void kernel_spotrf_nt_l_4x4_vs_lib4(int k, float *A, float *B, float *C, float *D, float *inv_diag_D, int km, int kn);
void kernel_sgetrf_nn_4x4_lib4(int k, float *A, float *B, int sdb, float *C, float *D, float *inv_diag_D);
void kernel_sgetrf_nn_4x4_vs_lib4(int k, float *A, float *B, int sdb, float *C, float *D, float *inv_diag_D, int km, int kn);
void kernel_sgetrf_pivot_4_lib4(int m, float *pA, int sda, float *inv_diag_A, int* ipiv);
void kernel_sgetrf_pivot_4_vs_lib4(int m, int n, float *pA, int sda, float *inv_diag_A, int* ipiv);
// merged routines
// 4x4
void kernel_sgemm_strsm_nt_rl_inv_4x4_lib4(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *E, float *inv_diag_E);
void kernel_sgemm_strsm_nt_rl_inv_4x4_vs_lib4(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *E, float *inv_diag_E, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_4x4_vs_lib4(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *inv_diag_D, int km, int kn);
void kernel_ssyrk_spotrf_nt_l_4x4_lib4(int kp, float *Ap, float *Bp, int km_, float *Am, float *Bm, float *C, float *D, float *inv_diag_D);
// auxiliary routines
void kernel_strcp_l_4_0_lib4(int kmax, float *A, float *B);
void kernel_strcp_l_4_1_lib4(int kmax, float *A0, int sda, float *B);
void kernel_strcp_l_4_2_lib4(int kmax, float *A0, int sda, float *B);
void kernel_strcp_l_4_3_lib4(int kmax, float *A0, int sda, float *B);
void kernel_strcp_l_3_0_lib4(int kmax, float *A, float *B);
void kernel_strcp_l_3_2_lib4(int kmax, float *A0, int sda, float *B);
void kernel_strcp_l_3_3_lib4(int kmax, float *A0, int sda, float *B);
void kernel_strcp_l_2_0_lib4(int kmax, float *A, float *B);
void kernel_strcp_l_2_3_lib4(int kmax, float *A0, int sda, float *B);
void kernel_strcp_l_1_0_lib4(int kmax, float *A, float *B);
void kernel_sgead_4_0_lib4(int kmax, float *alpha, float *A, float *B);
void kernel_sgead_4_1_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgead_4_2_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgead_4_3_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgead_3_0_lib4(int kmax, float *alpha, float *A, float *B);
void kernel_sgead_3_2_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgead_3_3_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgead_2_0_lib4(int kmax, float *alpha, float *A, float *B);
void kernel_sgead_2_3_lib4(int kmax, float *alpha, float *A0, int sda, float *B);
void kernel_sgead_1_0_lib4(int kmax, float *alpha, float *A, float *B);
// TODO
void kernel_sgeset_4_lib4(int kmax, float alpha, float *A);
void kernel_strset_4_lib4(int kmax, float alpha, float *A);
void kernel_sgetr_4_lib4(int tri, int kmax, int kna, float alpha, float *A, float *C, int sdc);
void kernel_sgetr_3_lib4(int tri, int kmax, int kna, float alpha, float *A, float *C, int sdc);
void kernel_sgetr_2_lib4(int tri, int kmax, int kna, float alpha, float *A, float *C, int sdc);
void kernel_sgetr_1_lib4(int tri, int kmax, int kna, float alpha, float *A, float *C, int sdc);
// pack
// 24 lib 8
void kernel_spack_nn_24_lib8(int kmax, float *A, int lda, float *B, int sdb);
void kernel_spack_nn_24_vs_lib8(int kmax, float *A, int lda, float *B, int sdb, int m1);
// 16 lib 8
void kernel_spack_nn_16_lib8(int kmax, float *A, int lda, float *B, int sdb);
void kernel_spack_nn_16_vs_lib8(int kmax, float *A, int lda, float *B, int sdb, int m1);
// 8 lib 8
void kernel_spack_nn_8_lib8(int kmax, float *A, int lda, float *B);
void kernel_spack_nn_8_vs_lib8(int kmax, float *A, int lda, float *B, int m1);
void kernel_spack_tn_8_lib8(int kmax, float *A, int lda, float *B);
void kernel_spack_tn_8_vs_lib8(int kmax, float *A, int lda, float *B, int m1);
void kernel_spack_tt_8_lib8(int kmax, float *A, int lda, float *B, int sdb);
void kernel_spack_tt_8_vs_lib8(int kmax, float *A, int lda, float *B, int sdb, int m1);
// 8 lib 4
void kernel_spack_nn_8_lib4(int kmax, float *A, int lda, float *B, int sdb);
void kernel_spack_nn_8_vs_lib4(int kmax, float *A, int lda, float *B, int sdb, int m1);
//void kernel_spack_tt_8_lib4(int kmax, float *A, int lda, float *B, int sdb);
// 4
void kernel_spack_nn_4_lib4(int kmax, float *A, int lda, float *B);
void kernel_spack_nn_4_vs_lib4(int kmax, float *A, int lda, float *B, int m1);
void kernel_spack_tn_4_lib4(int kmax, float *A, int lda, float *B);
void kernel_spack_tn_4_vs_lib4(int kmax, float *A, int lda, float *B, int m1);
void kernel_spack_tt_4_lib4(int kmax, float *A, int lda, float *B, int sdb);
void kernel_spack_tt_4_vs_lib4(int kmax, float *A, int lda, float *B, int sdb, int m1);
// unpack
// 8
void kernel_sunpack_nn_8_lib4(int kmax, float *A, int sda, float *B, int ldb);
void kernel_sunpack_nn_8_vs_lib4(int kmax, float *A, int sda, float *B, int ldb, int m1);
//void kernel_sunpack_tt_8_lib4(int kmax, float *A, int sda, float *B, int ldb);
// 4
void kernel_sunpack_nn_4_lib4(int kmax, float *A, float *B, int ldb);
void kernel_sunpack_nn_4_vs_lib4(int kmax, float *A, float *B, int ldb, int m1);
void kernel_sunpack_nt_4_lib4(int kmax, float *A, float *B, int ldb);
void kernel_sunpack_nt_4_vs_lib4(int kmax, float *A, float *B, int ldb, int m1);
void kernel_sunpack_tt_4_lib4(int kmax, float *A, int sda, float *B, int ldb);
// panel copy
// 4
void kernel_spacp_nt_4_lib4(int kmax, float *A, int offsetB, float *B, int sdb);
void kernel_spacp_tn_4_lib4(int kmax, int offsetA, float *A, int sda, float *B);
void kernel_spacp_nn_4_lib4(int kmax, int offsetA, float *A, int sda, float *B);
void kernel_spacp_nn_4_vs_lib4(int kmax, int offsetA, float *A, int sda, float *B, int m1);
/************************************************
* BLAS API kernels
************************************************/
//#if defined(BLAS_API)
// A, B panel-major bs=8; C, D column-major
// 24x4
void kernel_sgemm_nt_24x4_lib88cc(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_24x4_vs_lib88cc(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 16x4
void kernel_sgemm_nt_16x4_lib88cc(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_16x4_vs_lib88cc(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 8x8
void kernel_sgemm_nt_8x8_lib88cc(int kmax, float *alpha, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_8x8_vs_lib88cc(int kmax, float *alpha, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_spotrf_nt_l_8x8_lib88cc(int kmax, float *A, float *B, float *C, int ldc, float *D, int ldd, float *dD);
void kernel_spotrf_nt_l_8x8_vs_lib88cc(int kmax, float *A, float *B, float *C, int ldc, float *D, int ldd, float *dD, int m1, int n1);
void kernel_strsm_nt_rl_inv_8x8_lib88ccc(int kmax, float *A, float *B, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE);
void kernel_strsm_nt_rl_inv_8x8_vs_lib88ccc(int kmax, float *A, float *B, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE, int m1, int n1);
// 8x4
void kernel_sgemm_nt_8x4_lib88cc(int kmax, float *alpha, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_8x4_vs_lib88cc(int kmax, float *alpha, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// A, B panel-major bs=4; C, D column-major
// 8x8
void kernel_sgemm_nt_8x8_lib44cc(int kmax, float *alpha, float *A, int sda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd);
// 8x4
void kernel_sgemm_nt_8x4_lib44cc(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_8x4_vs_lib44cc(int kmax, float *alpha, float *A, int sda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_spotrf_nt_l_8x4_lib44cc(int kmax, float *A, int sda, float *B, float *C, int ldc, float *D, int ldd, float *dD);
void kernel_strsm_nt_rl_inv_8x4_lib44ccc(int kmax, float *A, int sda, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE);
void kernel_strsm_nt_rl_inv_8x4_vs_lib44ccc(int kmax, float *A, int sda, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE, int m1, int n1);
// 4x4
void kernel_sgemm_nt_4x4_lib44cc(int kmax, float *alpha, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_4x4_vs_lib44cc(int kmax, float *alpha, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_strsm_nt_rl_inv_4x4_lib44cc4(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, float *dE);
void kernel_strsm_nt_rl_inv_4x4_vs_lib44cc4(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, float *dE, int m1, int n1);
void kernel_strsm_nt_rl_inv_4x4_lib44ccc(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE);
void kernel_strsm_nt_rl_inv_4x4_vs_lib44ccc(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE, int m1, int n1);
void kernel_strsm_nt_rl_one_4x4_lib44cc4(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E);
void kernel_strsm_nt_rl_one_4x4_vs_lib44cc4(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, int m1, int n1);
void kernel_strsm_nt_ru_inv_4x4_lib44cc4(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, float *dE);
void kernel_strsm_nt_ru_inv_4x4_vs_lib44cc4(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, float *dE, int m1, int n1);
void kernel_strsm_nt_ru_one_4x4_lib44cc4(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E);
void kernel_strsm_nt_ru_one_4x4_vs_lib44cc4(int kmax, float *A, float *B, float *beta, float *C, int ldc, float *D, int ldd, float *E, int m1, int n1);
void kernel_spotrf_nt_l_4x4_lib44cc(int kmax, float *A, float *B, float *C, int ldc, float *D, int ldd, float *dD);
void kernel_spotrf_nt_l_4x4_vs_lib44cc(int kmax, float *A, float *B, float *C, int ldc, float *D, int ldd, float *dD, int m1, int n1);
// B panel-major bs=8; A, C, D column-major
// 4x24
void kernel_sgemm_nt_4x24_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_4x24_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_tt_4x24_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_tt_4x24_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 4x16
void kernel_sgemm_nt_4x16_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_4x16_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_tt_4x16_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_tt_4x16_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 8x8
void kernel_sgemm_nt_8x8_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_8x8_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_tt_8x8_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_tt_8x8_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 4x8
void kernel_sgemm_nt_4x8_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_4x8_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_tt_4x8_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_tt_4x8_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// B panel-major bs=4; A, C, D column-major
// 8x8
void kernel_sgemm_nt_8x8_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_8x8_vs_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_tt_8x8_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_tt_8x8_vs_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 4x8
void kernel_sgemm_nt_4x8_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_4x8_vs_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_tt_4x8_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_tt_4x8_vs_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 4x4
void kernel_sgemm_nt_4x4_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_4x4_vs_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_tt_4x4_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_tt_4x4_vs_libc4cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// A panel-major bs=8; B, C, D column-major
// 24x4
void kernel_sgemm_nn_24x4_lib8ccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nn_24x4_vs_lib8ccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_nt_24x4_lib8ccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_24x4_vs_lib8ccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 16x4
void kernel_sgemm_nn_16x4_lib8ccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nn_16x4_vs_lib8ccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_nt_16x4_lib8ccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_16x4_vs_lib8ccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 8x8
void kernel_sgemm_nn_8x8_lib8ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nn_8x8_vs_lib8ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_nt_8x8_lib8ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_8x8_vs_lib8ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 8x4
void kernel_sgemm_nn_8x4_lib8ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nn_8x4_vs_lib8ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_nt_8x4_lib8ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_8x4_vs_lib8ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// A panel-major bs=4; B, C, D column-major
// 8x8
void kernel_sgemm_nn_8x8_lib4ccc(int kmax, float *alpha, float *A, int sda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_8x8_lib4ccc(int kmax, float *alpha, float *A, int sda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
// 8x4
void kernel_sgemm_nn_8x4_lib4ccc(int kmax, float *alpha, float *A, int sda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nn_8x4_vs_lib4ccc(int kmax, float *alpha, float *A, int sda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_nt_8x4_lib4ccc(int kmax, float *alpha, float *A, int sda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_8x4_vs_lib4ccc(int kmax, float *alpha, float *A, int sda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// 4x4
void kernel_sgemm_nn_4x4_lib4ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nn_4x4_vs_lib4ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_nt_4x4_lib4ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_4x4_vs_lib4ccc(int kmax, float *alpha, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_strsm_nn_rl_inv_4x4_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE);
void kernel_strsm_nn_rl_inv_4x4_vs_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE, int m1, int n1);
void kernel_strsm_nn_rl_one_4x4_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde);
void kernel_strsm_nn_rl_one_4x4_vs_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, int m1, int n1);
void kernel_strsm_nt_rl_inv_4x4_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE);
void kernel_strsm_nt_rl_inv_4x4_vs_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE, int m1, int n1);
void kernel_strsm_nt_rl_one_4x4_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde);
void kernel_strsm_nt_rl_one_4x4_vs_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, int m1, int n1);
void kernel_strsm_nn_ru_inv_4x4_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE);
void kernel_strsm_nn_ru_inv_4x4_vs_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE, int m1, int n1);
void kernel_strsm_nn_ru_one_4x4_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde);
void kernel_strsm_nn_ru_one_4x4_vs_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, int m1, int n1);
void kernel_strsm_nt_ru_inv_4x4_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE);
void kernel_strsm_nt_ru_inv_4x4_vs_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, float *dE, int m1, int n1);
void kernel_strsm_nt_ru_one_4x4_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde);
void kernel_strsm_nt_ru_one_4x4_vs_lib4cccc(int kmax, float *A, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, float *E, int lde, int m1, int n1);
// A, C, D panel-major; B, E column-major
// TODO merge with above
// 4x4
void kernel_strsm_nn_rl_inv_4x4_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, float *dE);
void kernel_strsm_nn_rl_inv_4x4_vs_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, float *dE, int m1, int n1);
void kernel_strsm_nn_rl_one_4x4_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde);
void kernel_strsm_nn_rl_one_4x4_vs_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, int m1, int n1);
void kernel_strsm_nn_ru_inv_4x4_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, float *dE);
void kernel_strsm_nn_ru_inv_4x4_vs_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, float *dE, int m1, int n1);
void kernel_strsm_nn_ru_one_4x4_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde);
void kernel_strsm_nn_ru_one_4x4_vs_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, int m1, int n1);
void kernel_strsm_nt_rl_inv_4x4_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, float *dE);
void kernel_strsm_nt_rl_inv_4x4_vs_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, float *dE, int m1, int n1);
void kernel_strsm_nt_rl_one_4x4_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde);
void kernel_strsm_nt_rl_one_4x4_vs_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, int m1, int n1);
void kernel_strsm_nt_ru_inv_4x4_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, float *dE);
void kernel_strsm_nt_ru_inv_4x4_vs_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, float *dE, int m1, int n1);
void kernel_strsm_nt_ru_one_4x4_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde);
void kernel_strsm_nt_ru_one_4x4_vs_lib4c44c(int kmax, float *A, float *B, int ldb, float *beta, float *C, float *D, float *E, int lde, int m1, int n1);
// A, B, C, D column-major
void kernel_sgemm_nn_4x4_libcccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nn_4x4_vs_libcccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_nt_4x4_libcccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_nt_4x4_vs_libcccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
void kernel_sgemm_tt_4x4_libcccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd);
void kernel_sgemm_tt_4x4_vs_libcccc(int kmax, float *alpha, float *A, int lda, float *B, int ldb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1);
// vector
void kernel_sdot_11_lib(int n, float *x, float *y, float *res);
void kernel_saxpy_11_lib(int n, float *alpha, float *x, float *y);
//#endif // BLAS_API
// larger kernels
// 24
void kernel_sgemm_nt_24xn_p0_lib88cc(int n, int k, float *alpha, float *A, int sda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, float *A_p, float *B_p);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_S_KERNEL_H_

View File

@@ -1,62 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_STDLIB_H_
#define BLASFEO_STDLIB_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
//
void blasfeo_malloc(void **ptr, size_t size);
//
void blasfeo_malloc_align(void **ptr, size_t size);
//
void blasfeo_free(void *ptr);
//
void blasfeo_free_align(void *ptr);
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_STDLIB_H_

View File

@@ -1,73 +0,0 @@
#ifndef TARGET_X64_INTEL_HASWELL
#define TARGET_X64_INTEL_HASWELL
#endif
#ifndef TARGET_NEED_FEATURE_AVX2
#define TARGET_NEED_FEATURE_AVX2 1
#endif
#ifndef TARGET_NEED_FEATURE_FMA
#define TARGET_NEED_FEATURE_FMA 1
#endif
#ifndef TARGET_NEED_FEATURE_SSE3
/* #undef TARGET_NEED_FEATURE_SSE3 */
#endif
#ifndef TARGET_NEED_FEATURE_AVX
/* #undef TARGET_NEED_FEATURE_AVX */
#endif
#ifndef TARGET_NEED_FEATURE_VFPv3
/* #undef TARGET_NEED_FEATURE_VFPv3 */
#endif
#ifndef TARGET_NEED_FEATURE_NEON
/* #undef TARGET_NEED_FEATURE_NEON */
#endif
#ifndef TARGET_NEED_FEATURE_VFPv4
/* #undef TARGET_NEED_FEATURE_VFPv4 */
#endif
#ifndef TARGET_NEED_FEATURE_NEONv2
/* #undef TARGET_NEED_FEATURE_NEONv2 */
#endif
#ifndef LA_HIGH_PERFORMANCE
#define LA_HIGH_PERFORMANCE
#endif
#ifndef MF_PANELMAJ
#define MF_PANELMAJ
#endif
#ifndef EXT_DEP
#define ON 1
#define OFF 0
#if ON==ON
#define EXT_DEP
#endif
#undef ON
#undef OFF
#endif
#ifndef BLAS_API
#define ON 1
#define OFF 0
#if OFF==ON
#define BLAS_API
#endif
#undef ON
#undef OFF
#endif
#ifndef FORTRAN_BLAS_API
#define ON 1
#define OFF 0
#if OFF==ON
#define FORTRAN_BLAS_API
#endif
#undef ON
#undef OFF
#endif

View File

@@ -1,114 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_TIMING_H_
#define BLASFEO_TIMING_H_
//#include <stdbool.h>
#if (defined _WIN32 || defined _WIN64) && !(defined __MINGW32__ || defined __MINGW64__)
/* Use Windows QueryPerformanceCounter for timing. */
#include <Windows.h>
/** A structure for keeping internal timer data. */
typedef struct blasfeo_timer_ {
LARGE_INTEGER tic;
LARGE_INTEGER toc;
LARGE_INTEGER freq;
} blasfeo_timer;
#elif(defined __APPLE__)
#include <mach/mach_time.h>
/** A structure for keeping internal timer data. */
typedef struct blasfeo_timer_ {
uint64_t tic;
uint64_t toc;
mach_timebase_info_data_t tinfo;
} blasfeo_timer;
#elif(defined __DSPACE__)
#include <brtenv.h>
typedef struct blasfeo_timer_ {
double time;
} blasfeo_timer;
#elif(defined __XILINX_NONE_ELF__ || defined __XILINX_ULTRASCALE_NONE_ELF_JAILHOUSE__)
#include "xtime_l.h"
typedef struct blasfeo_timer_ {
uint64_t tic;
uint64_t toc;
} blasfeo_timer;
#else
/* Use POSIX clock_gettime() for timing on non-Windows machines. */
#include <time.h>
#if __STDC_VERSION__ >= 199901L // C99 Mode
#include <sys/stat.h>
#include <sys/time.h>
typedef struct blasfeo_timer_ {
struct timeval tic;
struct timeval toc;
} blasfeo_timer;
#else // ANSI C Mode
/** A structure for keeping internal timer data. */
typedef struct blasfeo_timer_ {
struct timespec tic;
struct timespec toc;
} blasfeo_timer;
#endif // __STDC_VERSION__ >= 199901L
#endif // (defined _WIN32 || defined _WIN64)
/** A function for measurement of the current time. */
void blasfeo_tic(blasfeo_timer* t);
/** A function which returns the elapsed time. */
double blasfeo_toc(blasfeo_timer* t);
#endif // BLASFEO_TIMING_H_

View File

@@ -1,83 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_V_AUX_EXT_DEP_H_
#define BLASFEO_V_AUX_EXT_DEP_H_
#include "blasfeo_target.h"
#ifdef __cplusplus
extern "C" {
#endif
/************************************************
* d_aux_extern_depend_lib.c
************************************************/
#ifdef EXT_DEP
void v_zeros(void **ptrA, int size);
// dynamically allocate size bytes of memory aligned to 64-byte boundaries and set accordingly a pointer to void; set allocated memory to zero
void v_zeros_align(void **ptrA, int size);
// free the memory allocated by v_zeros
void v_free(void *ptrA);
// free the memory allocated by v_zeros_aligned
void v_free_align(void *ptrA);
// dynamically allocate size bytes of memory and set accordingly a pointer to char; set allocated memory to zero
void c_zeros(char **ptrA, int size);
// dynamically allocate size bytes of memory aligned to 64-byte boundaries and set accordingly a pointer to char; set allocated memory to zero
void c_zeros_align(char **ptrA, int size);
// free the memory allocated by c_zeros
void c_free(char *ptrA);
// free the memory allocated by c_zeros_aligned
void c_free_align(char *ptrA);
#endif // EXT_DEP
#ifdef __cplusplus
}
#endif
#endif // BLASFEO_V_AUX_EXT_DEP_H_

View File

@@ -1,78 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
// headers to reference BLAS and LAPACK routines employed in BLASFEO WR
// level 1
double ddot_(int *m, double *x, int *incx, double *y, int *incy);
void dcopy_(int *m, double *x, int *incx, double *y, int *incy);
void daxpy_(int *m, double *alpha, double *x, int *incx, double *y, int *incy);
void dscal_(int *m, double *alpha, double *x, int *incx);
void drot_(int *m, double *x, int *incx, double *y, int *incy, double *c, double *s);
void drotg_(double *a, double *b, double *c, double *s);
// level 2
void dgemv_(char *ta, int *m, int *n, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy);
void dsymv_(char *uplo, int *m, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy);
void dtrmv_(char *uplo, char *trans, char *diag, int *n, double *A, int *lda, double *x, int *incx);
void dtrsv_(char *uplo, char *trans, char *diag, int *n, double *A, int *lda, double *x, int *incx);
void dger_(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *A, int *lda);
// level 3
void dgemm_(char *ta, char *tb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc);
void dsyrk_(char *uplo, char *trans, int *n, int *k, double *alpha, double *A, int *lda, double *beta, double *C, int *ldc);
void dtrmm_(char *side, char *uplo, char *trans, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb);
void dtrsm_(char *side, char *uplo, char *trans, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb);
void dsyr2k_(char *uplo, char *trans, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc);
// lapack
void dpotrf_(char *uplo, int *m, double *A, int *lda, int *info);
void dgetrf_(int *m, int *n, double *A, int *lda, int *ipiv, int *info);
void dgeqrf_(int *m, int *n, double *A, int *lda, double *tau, double *work, int *lwork, int *info);
void dgeqr2_(int *m, int *n, double *A, int *lda, double *tau, double *work, int *info);
void dgelqf_(int *m, int *n, double *A, int *lda, double *tau, double *work, int *lwork, int *info);
void dorglq_(int *m, int *n, int *k, double *A, int *lda, double *tau, double *work, int *lwork, int *info);
#ifdef __cplusplus
}
#endif

View File

@@ -1,73 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
// headers to reference BLAS and LAPACK routines employed in BLASFEO WR
// level 1
double ddot_(long long *m, double *x, long long *incx, double *y, long long *incy);
void dcopy_(long long *m, double *x, long long *incx, double *y, long long *incy);
void daxpy_(long long *m, double *alpha, double *x, long long *incx, double *y, long long *incy);
void dscal_(long long *m, double *alpha, double *x, long long *incx);
// level 2
void dgemv_(char *ta, long long *m, long long *n, double *alpha, double *A, long long *lda, double *x, long long *incx, double *beta, double *y, long long *incy);
void dsymv_(char *uplo, long long *m, double *alpha, double *A, long long *lda, double *x, long long *incx, double *beta, double *y, long long *incy);
void dtrmv_(char *uplo, char *trans, char *diag, long long *n, double *A, long long *lda, double *x, long long *incx);
void dtrsv_(char *uplo, char *trans, char *diag, long long *n, double *A, long long *lda, double *x, long long *incx);
void dger_(long long *m, long long *n, double *alpha, double *x, long long *incx, double *y, long long *incy, double *A, long long *lda);
// level 3
void dgemm_(char *ta, char *tb, long long *m, long long *n, long long *k, double *alpha, double *A, long long *lda, double *B, long long *ldb, double *beta, double *C, long long *ldc);
void dsyrk_(char *uplo, char *trans, long long *n, long long *k, double *alpha, double *A, long long *lda, double *beta, double *C, long long *ldc);
void dtrmm_(char *side, char *uplo, char *trans, char *diag, long long *m, long long *n, double *alpha, double *A, long long *lda, double *B, long long *ldb);
void dtrsm_(char *side, char *uplo, char *trans, char *diag, long long *m, long long *n, double *alpha, double *A, long long *lda, double *B, long long *ldb);
// lapack
void dpotrf_(char *uplo, long long *m, double *A, long long *lda, long long *info);
void dgetrf_(long long *m, long long *n, double *A, long long *lda, long long *ipiv, long long *info);
void dgeqrf_(long long *m, long long *n, double *A, long long *lda, double *tau, double *work, long long *lwork, long long *info);
void dgeqr2_(long long *m, long long *n, double *A, long long *lda, double *tau, double *work, long long *info);
#ifdef __cplusplus
}
#endif

View File

@@ -1,78 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
// headers to reference BLAS and LAPACK routines employed in BLASFEO WR
// level 1
float sdot_(int *m, float *x, int *incx, float *y, int *incy);
void scopy_(int *m, float *x, int *incx, float *y, int *incy);
void saxpy_(int *m, float *alpha, float *x, int *incx, float *y, int *incy);
void sscal_(int *m, float *alpha, float *x, int *incx);
void srot_(int *m, float *x, int *incx, float *y, int *incy, float *c, float *s);
void srotg_(float *a, float *b, float *c, float *s);
// level 2
void sgemv_(char *ta, int *m, int *n, float *alpha, float *A, int *lda, float *x, int *incx, float *beta, float *y, int *incy);
void ssymv_(char *uplo, int *m, float *alpha, float *A, int *lda, float *x, int *incx, float *beta, float *y, int *incy);
void strmv_(char *uplo, char *trans, char *diag, int *n, float *A, int *lda, float *x, int *incx);
void strsv_(char *uplo, char *trans, char *diag, int *n, float *A, int *lda, float *x, int *incx);
void sger_(int *m, int *n, float *alpha, float *x, int *incx, float *y, int *incy, float *A, int *lda);
// level 3
void sgemm_(char *ta, char *tb, int *m, int *n, int *k, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc);
void ssyrk_(char *uplo, char *trans, int *n, int *k, float *alpha, float *A, int *lda, float *beta, float *C, int *ldc);
void strmm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *A, int *lda, float *B, int *ldb);
void strsm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *A, int *lda, float *B, int *ldb);
void ssyr2k_(char *uplo, char *trans, int *n, int *k, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc);
// lapack
void spotrf_(char *uplo, int *m, float *A, int *lda, int *info);
void sgetrf_(int *m, int *n, float *A, int *lda, int *ipiv, int *info);
void sgeqrf_(int *m, int *n, float *A, int *lda, float *tau, float *work, int *lwork, int *info);
void sgeqr2_(int *m, int *n, float *A, int *lda, float *tau, float *work, int *info);
void sgelqf_(int *m, int *n, float *A, int *lda, float *tau, float *work, int *lwork, int *info);
void sorglq_(int *m, int *n, int *k, float *A, int *lda, float *tau, float *work, int *lwork, int *info);
#ifdef __cplusplus
}
#endif

View File

@@ -1,73 +0,0 @@
/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
// headers to reference BLAS and LAPACK routines employed in BLASFEO WR
// level 1
float sdot_(long long *m, float *x, long long *incx, float *y, long long *incy);
void scopy_(long long *m, float *x, long long *incx, float *y, long long *incy);
void saxpy_(long long *m, float *alpha, float *x, long long *incx, float *y, long long *incy);
void sscal_(long long *m, float *alpha, float *x, long long *incx);
// level 2
void sgemv_(char *ta, long long *m, long long *n, float *alpha, float *A, long long *lda, float *x, long long *incx, float *beta, float *y, long long *incy);
void ssymv_(char *uplo, long long *m, float *alpha, float *A, long long *lda, float *x, long long *incx, float *beta, float *y, long long *incy);
void strmv_(char *uplo, char *trans, char *diag, long long *n, float *A, long long *lda, float *x, long long *incx);
void strsv_(char *uplo, char *trans, char *diag, long long *n, float *A, long long *lda, float *x, long long *incx);
void sger_(long long *m, long long *n, float *alpha, float *x, long long *incx, float *y, long long *incy, float *A, long long *lda);
// level 3
void sgemm_(char *ta, char *tb, long long *m, long long *n, long long *k, float *alpha, float *A, long long *lda, float *B, long long *ldb, float *beta, float *C, long long *ldc);
void ssyrk_(char *uplo, char *trans, long long *n, long long *k, float *alpha, float *A, long long *lda, float *beta, float *C, long long *ldc);
void strmm_(char *side, char *uplo, char *transa, char *diag, long long *m, long long *n, float *alpha, float *A, long long *lda, float *B, long long *ldb);
void strsm_(char *side, char *uplo, char *transa, char *diag, long long *m, long long *n, float *alpha, float *A, long long *lda, float *B, long long *ldb);
// lapack
void spotrf_(char *uplo, long long *m, float *A, long long *lda, long long *info);
void sgetrf_(long long *m, long long *n, float *A, long long *lda, long long *ipiv, long long *info);
void sgeqrf_(long long *m, long long *n, float *A, long long *lda, float *tau, float *work, long long *lwork, long long *info);
void sgeqr2_(long long *m, long long *n, float *A, long long *lda, float *tau, float *work, long long *info);
#ifdef __cplusplus
}
#endif