[ODE] cg solver
Patrick Enoch
Hendrix_ at gmx.net
Fri May 4 11:33:27 MST 2007
the cg solver i posted was leaking lots of memory. here is a fixed
version
static int CG_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody
* const *body,
dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc,
dRealMutablePtr b,
dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex,
dxQuickStepParameters *qs)
{
int i,j,errorcode=0;
const int num_iterations = qs->num_iterations;
#ifdef WARM_STARTING
// for warm starting, this seems to be necessary to prevent
// jerkiness in motor-driven joints. i have no idea why this works.
for (i=0; i<m; i++) lambda[i] *= (dReal) 0.9;
#else
dSetZero (lambda,m);
#endif
// a copy of the 'hi' vector in case findex[] is being used
dRealAllocaArray (hicopy,m);
memcpy (hicopy,hi,m*sizeof(dReal));
// precompute iMJ = inv(M)*J'
dRealAllocaArray (iMJ,m*12);
compute_invM_JT (m,J,iMJ,jb,body,invI);
dReal last_rho = 0;
dRealAllocaArray (r,m);
dRealAllocaArray (z,m);
dRealAllocaArray (p,m);
dRealAllocaArray (q,m);
// precompute 1 / diagonals of A
dRealAllocaArray (Ad,m);
dRealPtr iMJ_ptr = iMJ;
dRealPtr J_ptr = J;
for (i=0; i<m; i++) {
dReal sum = 0;
for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j];
if (jb[i*2+1] >= 0) {
for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j];
}
iMJ_ptr += 12;
J_ptr += 12;
Ad[i] = REAL(1.0) / (sum + cfm[i]);
}
#ifdef WARM_STARTING
// compute residual r = b - A*lambda
multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
for (i=0; i<m; i++) r[i] = b[i] - r[i];
#else
dSetZero (lambda,m);
memcpy (r,b,m*sizeof(dReal)); // residual r = b - A*lambda
#endif
dReal stepsize=1.0;
dReal lasterror=1e20;
int badsteps=0;
dRealAllocaArray (bestsol,m);
for (i=0; i<m; i++) bestsol[i] = lambda[i];
for (int iteration=0; iteration < num_iterations; iteration++) {
for (i=0; i<m; i++) z[i] = r[i]*Ad[i]; // z = inv(M)*r
dReal rho = dot (m,r,z); // rho = r'*z
// @@@
// we must check for convergence, otherwise rho will go to 0 if
// we get an exact solution, which will introduce NaNs into the
equations.
if (rho < 1e-10) {
//dMessage (0,"CG returned at iteration %d",iteration);
break;
}
if (iteration==0) {
memcpy (p,z,m*sizeof(dReal)); // p = z
}
else {
add (m,p,z,p,rho/last_rho); // p = z + (rho/last_rho)*p
}
// compute q = (J*inv(M)*J')*p
multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,p,q);
dReal alpha = rho/dot (m,p,q); // alpha = rho/(p'*q)
#if !ALWAYS_PROJECT
add (m,lambda,lambda,p,alpha); // lambda = lambda + alpha*p
add (m,r,r,q,-alpha); // r = r - alpha*q
#else
for (int i=0; i<m; i++) {
int index = i;
// set the limits for this constraint. note that 'hicopy' is used.
// this is the place where the QuickStep method differs from the
// direct LCP solving method, since that method only performs this
// limit adjustment once per time step, whereas this method performs
// once per iteration per constraint row.
// the constraints are ordered so that all lambda[] values needed
have
// already been computed.
if (findex && findex[index] >= 0) {
hi[index] = dFabs (hicopy[index] * lambda[findex[index]]);
lo[index] = -hi[index];
}
// compute lambda and clamp it to [lo,hi].
// @@@ potential optimization: does SSE have clamping instructions
// to save test+jump penalties here?
dReal new_lambda = lambda[index] + alpha*p[i];
if (new_lambda < lo[index]) {
lambda[index] = lo[index];
}
else if (new_lambda > hi[index]) {
lambda[index] = hi[index];
}
else
{
lambda[index] = new_lambda;
}
}
// compute residual r = b - A*lambda
multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
for (i=0; i<m; i++) r[i] = b[i] - r[i];
// print actual error
dReal error = 0;
for (i=0; i<m; i++) error += dFabs(r[i]);
//dMessage (0, "lambda error = %10.6e",error);
if (error>lasterror)
{
badsteps++;
if (badsteps>4)
{
errorcode=-1;
iteration += num_iterations;
}
//dMessage(0,"solution getting worse...");
stepsize *= 0.75;
}
else
{
badsteps=0;
for (i=0; i<m; i++) bestsol[i] = lambda[i];
}
lasterror = error;
#endif
last_rho = rho;
}
#if !ALWAYS_PROJECT
for (int i=0; i<m; i++) {
int index = i;
// set the limits for this constraint. note that 'hicopy' is used.
// this is the place where the QuickStep method differs from the
// direct LCP solving method, since that method only performs this
// limit adjustment once per time step, whereas this method performs
// once per iteration per constraint row.
// the constraints are ordered so that all lambda[] values needed have
// already been computed.
if (findex && findex[index] >= 0) {
hi[index] = dFabs (hicopy[index] * lambda[findex[index]]);
lo[index] = -hi[index];
}
// compute lambda and clamp it to [lo,hi].
// @@@ potential optimization: does SSE have clamping instructions
// to save test+jump penalties here?
dReal new_lambda = lambda[index];
if (new_lambda < lo[index]) {
lambda[index] = lo[index];
}
else if (new_lambda > hi[index]) {
lambda[index] = hi[index];
}
}
#endif
// there was an error
if (errorcode!=0)
{
for (i=0; i<m; i++) lambda[i] = bestsol[i];
memcpy (hi,hicopy,m*sizeof(dReal));
}
// compute fc = inv(M)*J'*lambda
multiply_invM_JT (m,nb,iMJ,jb,lambda,fc);
#if 0
// measure solution error
multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
dReal error = 0;
for (i=0; i<m; i++) error += dFabs(r[i] - b[i]);
dMessage (0, "CG lambda error = %10.6e",error);
#endif
dRealFreeArray (hicopy);
dRealFreeArray (iMJ);
dRealFreeArray (r);
dRealFreeArray (z);
dRealFreeArray (p);
dRealFreeArray (q);
dRealFreeArray (Ad);
dRealFreeArray (bestsol);
return 0;//errorcode;
}
More information about the ODE
mailing list