Menu

[09bd5b]: / src / linalg.c  Maximize  Restore  History

Download this file

353 lines (304 with data), 10.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
/***************************************************************************
* Copyright (C) 2010 by Matthias Ihrke *
* ihrke@nld.ds.mpg.de
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include "linalg.h"
#include "helper.h"
#include "mathadd.h"
#include <string.h>
#include <math.h>
#include <gsl/gsl_linalg.h>
/** \brief convert 2D- Array struct to gsl_matrix.
\ingroup grplinalg
This function works also for non-double Array's,
but the input array MUST be of DType DOUBLE if alloc
is FALSE. If alloc is TRUE, the type is casted
to double during the copy-process.
\note the overhead for alloc=FALSE is very small,
for alloc=TRUE MxN copy processes are necessary.
\warning use alloc=FALSE only if you have a double-Array
\note if you free with gsl_matrix_free, the memory is
*NOT* free'd but still belongs to the Array!
Example:
\code
gsl_matrix *g;
Array *a = array_new_dummy( DOUBLE, 2, 10, 11 );
g = matrix_to_gsl( a, FALSES );
gsl_matrix_free( g ); //does not free a->data (!)
array_free( a ); // does free a->data
\endcode
\param in the input 2D-array
\param alloc if true, copy the memory pointed to by in;
if false, simply put the pointer into the
gsl_matrix struct (bad stuff happens if
it is not DType DOUBLE);
\return the gsl_matrix
*/
gsl_matrix* matrix_to_gsl( Array *in, bool alloc ){
gsl_matrix *out;
if( in->ndim != 2 ){
errprintf("in is not a matrix, ndim=%i\n", in->ndim );
}
if( alloc ){
out = gsl_matrix_alloc( (size_t)in->size[0], (size_t)in->size[1] );
if( in->dtype==DOUBLE ){ /* faster */
memcpy( out->data, in->data, in->nbytes );
} else { /* slow */
int i,j;
double el;
for( i=0; i<in->size[0]; i++ ){
for( j=0; j<in->size[1]; j++ ){
array_dtype_to_double( &el, array_index2( in, i, j ), in->dtype );
gsl_matrix_set( out, i, j, el );
}
}
}
} else { /* no mallocing */
MALLOC( out, 1, gsl_matrix );
out->size1=(size_t)in->size[0];
out->size2=(size_t)in->size[1];
out->tda=(size_t)in->size[1];
out->data=(double*)in->data;
out->owner=0; /* GSL is not the owner of the data */
out->block=NULL;
}
return out;
}
/** \brief calculate the mean of matrix a along dimension dim.
\ingroup grplinalg
\param a the matrix
\param dim along which dimension?
\return vector of size a->size[1-dim]
*/
Array* matrix_mean( Array *a, int dim ){
Array *v;
int i,j;
bool ismatrix;
matrix_CHECK( ismatrix, a );
if( !ismatrix ) return NULL;
if( dim!=0 && dim!=1 ){
errprintf("wrong dimension dim=%i, must be in {0,1}\n", dim );
return NULL;
}
double *p;
v = array_new2( DOUBLE, 1, a->size[1-dim] );
for( i=0; i<a->size[1-dim]; i++ ){
p=&array_INDEX1( v, double, i );
*p= 0.0;
for( j=0; j<a->size[dim]; j++ ){
array_INDEX1( v, double, i ) +=
array_INDEX2( a, double, (dim==1)?i:j,(dim==1)?j:i );
}
array_INDEX1( v, double, i ) /= (double)a->size[dim];
}
return v;
}
/** \brief get a row of a matrix as vector.
\ingroup grplinalg
\note if alloc is TRUE, the returned vector is
independant. If alloc is FALSE, the memory
is shared between m and the row vector but
is owned by the matrix (an array_free(vec) call
does not free the memory, but array_free(m) does).
\param m the matrix
\param row index to the row
\param alloc allocate memory for the row-data, or
only pass the pointer to the new Array
*/
Array* matrix_get_row( Array *m, int row, bool alloc ){
Array *rowvec;
bool ismatrix;
matrix_CHECK( ismatrix, m );
if( !ismatrix ) return NULL;
if( row<0 || row>=m->size[0] ){
errprintf("Index out of bounds: %i/%i\n", row, m->size[0] );
return NULL;
}
if( !alloc ){
rowvec = array_fromptr2( DOUBLE, 1, array_INDEXMEM2( m, row, 0 ),
m->size[1] );
} else {
rowvec = array_new2( DOUBLE, 1, m->size[1] );
memcpy( rowvec->data, array_INDEXMEM2( m, row, 0 ),
m->size[1]*m->dtype_size );
}
return rowvec;
}
/** \brief get a column of a matrix as vector.
\ingroup grplinalg
\note memory copy is necessary, because the
memory of a column is not aligned.
It is therefore slower than calling
matrix_get_row() with alloc=FALSE.
If you need a lot of columns, you might
want consider calling matrix_transpose() first
and matrix_get_row() afterwards.
\param m the matrix
\param col index to the column
\return a column vector
*/
Array* matrix_get_col( Array *m, int col ){
Array *colvec;
int i;
bool ismatrix;
matrix_CHECK( ismatrix, m );
if( !ismatrix ) return NULL;
if( col<0 || col>=m->size[1] ){
errprintf("Index out of bounds: %i/%i\n", col, m->size[1] );
return NULL;
}
colvec = array_new2( DOUBLE, 1, m->size[0] );
for( i=0; i<m->size[0]; i++ ){
array_INDEX1( colvec, double, i )=array_INDEX2(m, double, i, col );
}
return colvec;
}
/** \brief Matrix Multiplication.
\ingroup grplinalg
m1 and m2 must be matrices such that m1->size[1]==m2->size[0].
\param m1,m2 the matrices
\return new matrix of size m1->size[1], m2->size[0]
*/
Array* matrix_mult( const Array *m1, const Array *m2 ){
Array *out=NULL;
int i,j,k;
bool ismatrix;
matrix_CHECK( ismatrix, m1 ); if( !ismatrix ) return NULL;
matrix_CHECK( ismatrix, m2 ); if( !ismatrix ) return NULL;
if( m1->size[1] != m2->size[0] ){
errprintf("Mismatch in size, cannot calculate matrix product: %i!=%i\n",
m1->size[1], m2->size[0] );
return NULL;
}
out = array_new2( DOUBLE, 2, m1->size[0], m2->size[1] );
double *field;
for( i=0; i<out->size[0]; i++ ){
for( j=0; j<out->size[1]; j++ ){
field=(double*)array_INDEXMEM2( out, i, j );
*field=0.0;
for( k=0; k<m1->size[1]; k++ ){
*field += array_INDEX2( m1, double, i, k )*
array_INDEX2( m2, double, k, j );
}
}
}
return out;
}
/** \brief Matrix transpose.
\ingroup grplinalg
\todo implement a faster way
\param m matrix
\param alloc if TRUE, return a new matrix that is the transpose of m;
else do in-place transpose
*/
Array* matrix_transpose( Array *m, bool alloc ){
int i,j;
bool ismatrix;
matrix_CHECK( ismatrix, m );
if( !ismatrix ) return NULL;
Array *out;
out = array_new2( DOUBLE, 2, m->size[1], m->size[0] );
for( i=0; i<m->size[0]; i++ ){
for( j=0; j<m->size[1]; j++ ){
array_INDEX2( out, double, j,i) = array_INDEX2( m, double, i,j);
}
}
if( !alloc ){
memcpy( m->data, out->data, out->nbytes );
array_free( out );
out = m;
double tmp=out->size[0];
out->size[0]=out->size[1];
out->size[1]=tmp;
}
return out;
}
/** \brief Principal Components analysis.
\ingroup grplinalg
This implementation uses SVD to calculate the PCA.
Example:
\code
Array *X = get_data_from_somewhere();
Array *var, *X_pca;
matrix_pca( X, &var, FALSE ); // overwrite X
X_pca = matrix_pca( X, &var, TRUE ); // do not touch X
\endcode
\param X a 2D array observations x variables containing the data
\param var output: vector of eigenvalues of XX^T in decreasing order; if you
pass NULL, it is ignored;
\param alloc if true, new memory is allocated and returned. Else
X is overwritten.
\return pointer to the PCA'd matrix
*/
Array* matrix_pca( Array *X, Array **var, bool alloc ){
Array *out=NULL;
int i,j;
bool ismatrix;
matrix_CHECK( ismatrix, X );
if( !ismatrix ) return NULL;
int N,K; /* N observations, K variables */
N = X->size[0];
K = X->size[1];
Array *tmp = array_copy( X, TRUE );
/* subtract mean from observations */
Array *mean=matrix_mean( X, 0 );
for( i=0; i<N; i++ ){
for( j=0; j<K; j++ ){
array_INDEX2( tmp, double, i, j ) -=
array_INDEX1( mean, double, j );
}
}
array_scale( tmp, 1.0/sqrt((double) N-1 ) );
gsl_matrix *A=matrix_to_gsl( tmp, TRUE ); /* copy */
gsl_matrix *V=gsl_matrix_alloc( K, K );
gsl_vector *S=gsl_vector_alloc( K );
gsl_vector *workspace=gsl_vector_alloc( K );
/* A->U, V->V, S->S */
gsl_linalg_SV_decomp( A, V, S, workspace);
gsl_matrix_transpose( V );
if( var ){
(*var)=array_fromptr2( DOUBLE, 1, S->data, S->size );
S->owner=0; /* transfer ownership to array */
(*var)->free_data=1;
for( i=0; i<array_NUMEL( *var ); i++ ){
array_INDEX1( *var, double, i ) = SQR( array_INDEX1( *var, double, i ) );
}
}
Array *Vp=array_fromptr2( DOUBLE, 2, V->data, V->size1, V->size2 );
matrix_transpose( tmp, FALSE );
out=matrix_mult( Vp, tmp ); /* PCA'd data */
matrix_transpose( out, FALSE );
if( out->size[0]!=X->size[0] || out->size[1]!=X->size[1] ){
errprintf("Input/Output dimension mismatch: (%i,%i) vs. (%i,%i)\n",
X->size[0], X->size[1], out->size[0], out->size[1] );
}
if( !alloc ){ /* write back out->X */
memcpy( X->data, out->data, out->nbytes );
array_free( out );
out = X;
}
/* clean up */
gsl_matrix_free( A );
gsl_matrix_free( V );
gsl_vector_free( S );
gsl_vector_free( workspace );
array_free( mean );
array_free( Vp );
array_free( tmp );
return out;
}
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.