XML. (was: Re: [ODE] outputting objects)
Nate W
coding at natew.com
Tue Feb 4 13:42:01 2003
On Tue, 4 Feb 2003, Martin C. Martin wrote:
> Yeah, what you really need is a parser generator, like yacc, but that
> generates top down parsers. Recursive descent parsers would be ideal.
I sorta built a recursive descent parser atop the SAX API. The
start-element, data, and end-element callbacks are included below. I'm
really not happy with the speed though. It could be that I have a
throw/catch for every tag, it could be that there are typically several
string compares per tag.
I think a big speed increased would be realized by loading the XML data
into memory, hashing each tag to a 32-bit int, and then use int compares
rather than string compares.
Then use some recursive descent through the in-memory version of the data,
but have the object deserialization methods look at the hash-and-data
pairs at the current "descent level" and do reflection there. More later,
when I have time....
/****************************************************************************/
/** XmlStreamReader::vStartElement
**
** When a tag opens, the reader asks the object at the top of the stack how
** to handle the data within the tag. The object at the top of the stack
** pay provide a new XmlStreamable object, or may provide a pointer to a
** primitive data type whose value is to be set by the tag's data.
****************************************************************************/
void StreamReader::vStartElement (const wchar_t *pwchLocalName)
{
Serializable *pCurrentObject = 0;
// If the new tag matches the object at the top of the stack, push
if (!wcscmp (pwchLocalName, m_wszTopTag))
{
m_ObjectStack.push (m_pTopObject);
return;
}
// The new data will be taken by the object at the top of the stack
if (m_ObjectStack.size () > 0)
pCurrentObject = m_ObjectStack.top ();
// If there is no current object, something has gone horribly wrong
if (!pCurrentObject)
return;
// Set the state of the stream
m_Stream.m_eReadState = Stream::rsStartElement;
m_Stream.m_wszCurrentTag = pwchLocalName;
// This will be used to indicate whether the current tag was processed
bool fCaught = false;
try
{
// Allow the object at the top of the stack to determine what to do next.
// The object will throw something to indicate what is to be done.
pCurrentObject->vSerialize (m_Stream);
}
catch (Serializable *pObject)
{
// If a new XmlStreamable is thrown, put the new object atop the stack
m_ObjectStack.push (pObject);
fCaught = true;
}
// If a pointer to a primitive data type is thrown, the pointer and
// data type are stored for use in the vCharacters callback (see below).
catch (int *pi)
{
m_PrimitivePointer.pInteger = pi;
m_eCurrentPrimitiveDataType = pdtInteger;
fCaught = true;
}
catch (unsigned int *pu)
{
// this 'unsigned' stuff is a bit of a hack, being cast to 'signed' for the moment
m_PrimitivePointer.pInteger = (int*) pu;
m_eCurrentPrimitiveDataType = pdtInteger;
fCaught = true;
}
catch (void **pp)
{
m_PrimitivePointer.pPointer = pp;
m_eCurrentPrimitiveDataType = pdtPointer;
fCaught = true;
}
catch (std::string *pstr)
{
m_PrimitivePointer.pString = pstr;
m_eCurrentPrimitiveDataType = pdtString;
fCaught = true;
}
catch (real *pr)
{
m_PrimitivePointer.pReal = pr;
m_eCurrentPrimitiveDataType = pdtReal;
fCaught = true;
}
catch (bool *pf)
{
m_PrimitivePointer.pBoolean = pf;
m_eCurrentPrimitiveDataType = pdtBoolean;
fCaught = true;
}
// if nothing was caught, the tag wasn't recognized
if (!fCaught)
{
// Not sure what's best course of action at this point. Walk the
// stack of XmlStreamable objects and try to find a match?
}
else
{
// Uncomment this stuff to aid debugging
// OutputDebugString ("Caught ");
// OutputDebugStringW (pwchLocalName);
// OutputDebugString ("\n");
}
}
/****************************************************************************/
/** XmlStreamReader::vCharacters
**
** The characters will be parsed according to the type of primitive that
** was thrown during the previous call to vStartElement.
****************************************************************************/
void StreamReader::vCharacters (const wchar_t *_pwchData, int iCharacters)
{
// Copy the data into a local buffer
wchar_t *pwchData = (wchar_t*) alloca (iCharacters * 2 + 10);
memcpy (pwchData, _pwchData, iCharacters * 2);
pwchData[iCharacters] = 0;
// Error check
if (pdtInvalid == m_eCurrentPrimitiveDataType)
return;
// Switch on the type of primitive currently expected
switch (m_eCurrentPrimitiveDataType)
{
case pdtInteger:
*m_PrimitivePointer.pInteger = _wtoi (pwchData);
break;
case pdtUnsigned:
// TODO: this should be handled with something like _wtou (pwchData)...
// But, there is no "_wtou" as yet. Unsigned ints are currently treated
// as signed ints. So far so good, but this really oughtta be fixed the
// right way.
*m_PrimitivePointer.pInteger = 0;
break;
case pdtPointer:
*((int*)(m_PrimitivePointer.pPointer)) = _wtoi (pwchData);
break;
case pdtString:
{
// Convert the string to 8-bit format
char *szTemp = (char*) alloca (iCharacters + 1);
wcstombs (szTemp, pwchData, iCharacters + 1);
// null-terminate the 8-bit string
szTemp[iCharacters] = 0;
// String copy can be done via an external function, if the caller wishes to
// manage the buffer that way.
if (m_pfnStringAssign)
m_pfnStringAssign (m_PrimitivePointer.pString, szTemp);
else
*m_PrimitivePointer.pString = szTemp;
break;
}
case pdtReal:
*m_PrimitivePointer.pReal = (real) _wtof (pwchData);
break;
case pdtBoolean:
{
// Get the first couple characters into 8-bit format
char szTemp[5];
wcstombs (szTemp, pwchData, 2);
char cFirst = szTemp[0];
if ((cFirst == 't') || (cFirst == 'T') || (cFirst == 'y') || (cFirst == 'Y') || (cFirst == '1'))
{
*m_PrimitivePointer.pBoolean = true;
}
if ((cFirst == 'f') || (cFirst == 'F') || (cFirst == 'n') || (cFirst == 'N') || (cFirst == '0'))
{
*m_PrimitivePointer.pBoolean = false;
}
break;
}
case pdtInvalid:
default:
break;
}
// Reset the expected primitive data type to 'invalid'
m_eCurrentPrimitiveDataType = pdtInvalid;
}
/****************************************************************************/
/** XmlStreamReader::vEndElement
****************************************************************************/
void StreamReader::vEndElement (const wchar_t *pwchLocalName)
{
Serializable *pCurrentObject = 0;
// Get the top object from the stack
if (m_ObjectStack.size () > 0)
pCurrentObject = m_ObjectStack.top ();
// If it matches the element that just ended, pop it from the stack
if (pCurrentObject && pCurrentObject->wszGetActualTagName () && !wcscmp (pwchLocalName, pCurrentObject->wszGetActualTagName ()))
m_ObjectStack.pop ();
}