The gauss.c snippet solves a system of equations by the method of Gauss-Jordan. It can be used in your fits to obtain the coefficients of the fit by the method of least squares.

It uses the program gaussj.c from the Numerical recipes (http://www.nr.com) collection. You need to include both in your code:

#include "nrutil.c"
#include "gaussj.c"

If you have a group of N pairs of data (xi, yi), whose behaviour can be described by the equation:

y = A0 + A1x + A1x2 + ... + An-1xn-1

the problem is to determine the values of the Ai parameters.

The code

The program reads a file with data in two columns (the x and y data you have measured) and asks the number of parameters for the fit (more on this in Appendix D of the lab-guide about statistical distributions).

/***************************************************
 * Data input
 ***************************************************/
printf("\nName of data file: ");
scanf("%s", name);
N = numOfLines(name);
printf("N = %d\n", N);
	
printf(     "\nNumber of parameters of the funcion: ");
scanf("%d", &n);
fprintf(fp, "\nNumber of parameters of the funcion: %d\n", n);

The vectors and matrices are created according to the nrutil.c definitions. Other relevant vectors and matrices are calculated too. Then, a call to the gaussj() function is made to invert the matrix of the fit, and finally the corrected values for the y coordinate are calculated. The standard deviation, the errors and the regression coefficients are calculated as well.

Finally, everything is written to an output file.

Method of simple least squares

In this method we want to determine the parameters that make the discrepancies between the measured values yi and the calculated ones yi,cal minimal:

χ2 = ∑ ( yi - yi,calc )2 = ∑ ( yi - A0 + A1x + A1x2 + ... + An-1xn-1 )2

For this, we equal the partial differential of χ2 with respect to each of the parameters Aj to cero:

( ∂χ2/∂Aj ) = 2 ∑ ( yi - A0 + A1x + A1x2 + ... + An-1xn-1 )( -xi )j = 0

This leads to a system of equations a x = b, where a is the matrix of curvature, and:

x = ( A0, A1, A1, ... An-1 )
b = ( ∑yi, ∑yixi, ∑yixi2, ... ∑yixin-1 )

printf(    "\n\nMatrix a before inversion:\n\n");
fprintf(fp,"\n\nMatrix a before inversion:\n\n");
for (i=1; i<=n; i++) {	   
	for (j=1; j<=n; j++) {
		sumk = 0.0;
		for (k=1; k<=N; k++)
			sumk += pow(x[k], i+j-2);			
		a[i][j] = sumk;
		printf(     " %10.3lf", a[i][j]);
		fprintf(fp, " %10.3lf", a[i][j]);
	}
	printf(     "\n");
	fprintf(fp, "\n");
}

/***************************************************/
printf(    "\nVector b:\n\n");
fprintf(fp,"\nVector b:\n\n");
for (j=1; j<=n; j++) {
	sumj = 0.0;
	for (i=1; i<=N; i++)
		sumj += y[i]*pow(x[i], j-1);
	b[j] = sumj;
	printf(     " %10.3lf", b[j]);
	fprintf(fp, " %10.3lf", b[j]);
}

To solve the system, we write the previous equation in the form x = a-1 b. The inverse of matrix a is called error matrix or covariance matrix, because its elements are the variance and covariance of the fit parameters Aj.

gaussj(a,n,B,n);
printf(     "\n\nMatrix a after inversion:\n\n");
fprintf(fp, "\n\nMatrix a after inversion:\n\n");
for (i=1; i<=n; i++) {
	for (j=1; j<=n; j++) {
		printf(     " %10.3lf", a[i][j]);
		fprintf(fp, " %10.3lf", a[i][j]);
	}
	printf(    "\n");
	fprintf(fp,"\n");
}

The standard deviation of the fit is:

s = √ ∑ ( yi - yi,calc )2/N -n

/***************************************************
 * Standard deviation
 ***************************************************/
sumi    = 0.0;
for (i=1; i<=N; i++) 
	sumi += pow(y[i] - yc[i], 2);
s = sqrt( sumi / ((double)(N-n)) );
printf(     "\n\nStandard deviation of the fit: \n s = %3.3e", s);
fprintf(fp, "\n\nStandard deviation of the fit: \n s = %3.3e", s);

and the uncertainties in the Aj parameters are calculated according to:

s( Aj ) = s √ εjj

where εjj represent the elements of the diagonal of the error matrix.

/***************************************************
 * Error calculation
 ***************************************************/
printf(     "\n\nUncertainty:\n\n");
fprintf(fp, "\n\nUncertainty:\n\n");
for(i=1; i<=n; i++) {
	S[i] = s*sqrt(a[i][i]);
	printf(     " S[A%d] = %3.3e\n", i, S[i]);
	fprintf(fp, " S[A%d] = %3.3e\n", i, S[i]);
}

The coefficient of multiple regression R is defined as:

R2 = ∑ ( Aj Sjy2 / Sy2 )

where

xj, av = ∑ xji / N

y av = ∑ yi / N

Sjy2 = ∑ ( xji - xj, av ) ( yj - yav ) / N-1

/***************************************************
 * Regression coefficient
 ***************************************************/
for (j=1; j<=n; j++) {
	sumi = 0.0;
	for (i=1; i<=N; i++) 
		sumi += pow(x[i], j-1);
	xj[j] = sumi / (double)N;
	printf(     "\n Average of x to the power of %d = %3.3lf", j-1, xj[j]);
	fprintf(fp, "\n Average of x to the power of %d = %3.3lf", j-1, xj[j]);
}
	
sumi = 0.0;
for (i=1; i<=N; i++)
	sumi += y[i];
yj = sumi / (double)N;
printf(     "\n\n Average of experimental y = %3.3lf\n", yj);
fprintf(fp, "\n\n Average of experimental y = %3.3lf\n", yj);

for (j=1; j<=n; j++) {
	sumi = 0.0;
	for (i=1; i<=N; i++)
		sumi += (pow(x[i], j-1) - xj[j])*(y[i] - yj);
	Sjy2[j] = sumi / ( (double)(N-1) );
	printf(     "\n Dispersion of x%dy: S[x,y]2 = %3.3lf", j-1, Sjy2[j]);
	fprintf(fp, "\n Dispersion of x%dy: S[x,y]2 = %3.3lf", j-1, Sjy2[j]);
}

and

Sy2 = ∑ ( yi - yav )2 / N-1

sumi = 0.0;
for (i=1; i<=N; i++) 
	sumi += pow(y[i] - yj, 2);
Sy2 = sumi / ( (double)(N-1) );
printf(     "\n\n Dispersion of experimental y: S[y]2 = %3.3lf\n", Sy2);
fprintf(fp, "\n\n Dispersion of experimental y: S[y]2 = %3.3lf\n", Sy2);

R2 = 0.0;
for (j=1; j<=n; j++) 
	R2 += A[j]*Sjy2[j] / Sy2;
printf(     "\n Regression coefficient: R2 = %lf\n", R2);
fprintf(fp, "\n Regression coefficient: R2 = %lf\n", R2);