[ODE] cg solver

Fri May 4 11:33:27 MST 2007

the cg solver i posted was leaking lots of memory. here is a fixed  
version

static int CG_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody  
* const *body,
	dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc,  
dRealMutablePtr b,
	dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex,
	dxQuickStepParameters *qs)
{
	int i,j,errorcode=0;
	const int num_iterations = qs->num_iterations;

#ifdef WARM_STARTING
	// for warm starting, this seems to be necessary to prevent
	// jerkiness in motor-driven joints. i have no idea why this works.
	for (i=0; i<m; i++) lambda[i] *= (dReal) 0.9;
#else
	dSetZero (lambda,m);
#endif

	// a copy of the 'hi' vector in case findex[] is being used
	dRealAllocaArray (hicopy,m);
	memcpy (hicopy,hi,m*sizeof(dReal));

	// precompute iMJ = inv(M)*J'
	dRealAllocaArray (iMJ,m*12);
	compute_invM_JT (m,J,iMJ,jb,body,invI);

	dReal last_rho = 0;
	dRealAllocaArray (r,m);
	dRealAllocaArray (z,m);
	dRealAllocaArray (p,m);
	dRealAllocaArray (q,m);

	// precompute 1 / diagonals of A
	dRealAllocaArray (Ad,m);
	dRealPtr iMJ_ptr = iMJ;
	dRealPtr J_ptr = J;
	for (i=0; i<m; i++) {
		dReal sum = 0;
		for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j];
		if (jb[i*2+1] >= 0) {
			for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j];
		}
		iMJ_ptr += 12;
		J_ptr += 12;
		Ad[i] = REAL(1.0) / (sum + cfm[i]);
	}

#ifdef WARM_STARTING
	// compute residual r = b - A*lambda
	multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
	for (i=0; i<m; i++) r[i] = b[i] - r[i];
#else
	dSetZero (lambda,m);
	memcpy (r,b,m*sizeof(dReal));		// residual r = b - A*lambda
#endif

	dReal stepsize=1.0;
	dReal lasterror=1e20;
	int badsteps=0;

	dRealAllocaArray (bestsol,m);
	for (i=0; i<m; i++) bestsol[i] = lambda[i];

	for (int iteration=0; iteration < num_iterations; iteration++) {
		for (i=0; i<m; i++) z[i] = r[i]*Ad[i];	// z = inv(M)*r
		dReal rho = dot (m,r,z);		// rho = r'*z

		// @@@
		// we must check for convergence, otherwise rho will go to 0 if
		// we get an exact solution, which will introduce NaNs into the  
equations.
		if (rho < 1e-10) {
			//dMessage (0,"CG returned at iteration %d",iteration);
			break;
		}

		if (iteration==0) {
			memcpy (p,z,m*sizeof(dReal));	// p = z
		}
		else {
			add (m,p,z,p,rho/last_rho);	// p = z + (rho/last_rho)*p
		}

		// compute q = (J*inv(M)*J')*p
		multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,p,q);

		dReal alpha = rho/dot (m,p,q);		// alpha = rho/(p'*q)

	#if !ALWAYS_PROJECT
		add (m,lambda,lambda,p,alpha);		// lambda = lambda + alpha*p

		add (m,r,r,q,-alpha);				// r = r - alpha*q
	#else
		for (int i=0; i<m; i++) {
			int index = i;

			// set the limits for this constraint. note that 'hicopy' is used.
			// this is the place where the QuickStep method differs from the
			// direct LCP solving method, since that method only performs this
			// limit adjustment once per time step, whereas this method performs
			// once per iteration per constraint row.
			// the constraints are ordered so that all lambda[] values needed  
have
			// already been computed.
			if (findex && findex[index] >= 0) {
				hi[index] = dFabs (hicopy[index] * lambda[findex[index]]);
				lo[index] = -hi[index];
			}

			// compute lambda and clamp it to [lo,hi].
			// @@@ potential optimization: does SSE have clamping instructions
			//     to save test+jump penalties here?
			dReal new_lambda = lambda[index] + alpha*p[i];
			if (new_lambda < lo[index]) {
				lambda[index] = lo[index];
			}
			else if (new_lambda > hi[index]) {
				lambda[index] = hi[index];
			}
			else
			{
				lambda[index] = new_lambda;
			}
		}

		// compute residual r = b - A*lambda
		multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
		for (i=0; i<m; i++) r[i] = b[i] - r[i];

		// print actual error
		dReal error = 0;
		for (i=0; i<m; i++) error += dFabs(r[i]);
		//dMessage (0, "lambda error = %10.6e",error);
		if (error>lasterror)
		{
			badsteps++;
			if (badsteps>4)
			{
				errorcode=-1;
				iteration += num_iterations;
			}
			//dMessage(0,"solution getting worse...");
			stepsize *= 0.75;
		}	
		else
		{
			badsteps=0;
			for (i=0; i<m; i++) bestsol[i] = lambda[i];
		}
		lasterror = error;
	#endif

		last_rho = rho;
	}

#if !ALWAYS_PROJECT
	for (int i=0; i<m; i++) {
		int index = i;

		// set the limits for this constraint. note that 'hicopy' is used.
		// this is the place where the QuickStep method differs from the
		// direct LCP solving method, since that method only performs this
		// limit adjustment once per time step, whereas this method performs
		// once per iteration per constraint row.
		// the constraints are ordered so that all lambda[] values needed have
		// already been computed.
		if (findex && findex[index] >= 0) {
			hi[index] = dFabs (hicopy[index] * lambda[findex[index]]);
			lo[index] = -hi[index];
		}

		// compute lambda and clamp it to [lo,hi].
		// @@@ potential optimization: does SSE have clamping instructions
		//     to save test+jump penalties here?
		dReal new_lambda = lambda[index];
		if (new_lambda < lo[index]) {
			lambda[index] = lo[index];
		}
		else if (new_lambda > hi[index]) {
			lambda[index] = hi[index];
		}
	}
#endif

	// there was an error
	if (errorcode!=0)
	{
		for (i=0; i<m; i++) lambda[i] = bestsol[i];
		memcpy (hi,hicopy,m*sizeof(dReal));
	}

	// compute fc = inv(M)*J'*lambda
	multiply_invM_JT (m,nb,iMJ,jb,lambda,fc);

#if 0
	// measure solution error
	multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
	dReal error = 0;
	for (i=0; i<m; i++) error += dFabs(r[i] - b[i]);
	dMessage (0, "CG lambda error = %10.6e",error);
#endif

	dRealFreeArray (hicopy);
	dRealFreeArray (iMJ);
	dRealFreeArray (r);
	dRealFreeArray (z);
	dRealFreeArray (p);
	dRealFreeArray (q);
	dRealFreeArray (Ad);
	dRealFreeArray (bestsol);

	return 0;//errorcode;
}