1
0
forked from mirrors/0ad
Files
0ad/source/ps/XML/Xeromyces.cpp
T
janwas c0ed950657 had to remove uint and ulong from lib/types.h due to conflict with other library.
this snowballed into a massive search+destroy of the hodgepodge of
mostly equivalent types we had in use (int, uint, unsigned, unsigned
int, i32, u32, ulong, uintN).

it is more efficient to use 64-bit types in 64-bit mode, so the
preferred default is size_t (for anything remotely resembling a size or
index). tile coordinates are ssize_t to allow more efficient conversion
to/from floating point. flags are int because we almost never need more
than 15 distinct bits, bit test/set is not slower and int is fastest to
type. finally, some data that is pretty much directly passed to OpenGL
is now typed accordingly.

after several hours, the code now requires fewer casts and less
guesswork.

other changes:
- unit and player IDs now have an "invalid id" constant in the
respective class to avoid casting and -1
- fix some endian/64-bit bugs in the map (un)packing. added a
convenience function to write/read a size_t.
- ia32: change CPUID interface to allow passing in ecx (required for
cache topology detection, which I need at work). remove some unneeded
functions from asm, replace with intrinsics where possible.

This was SVN commit r5942.
2008-05-11 18:48:32 +00:00

499 lines
13 KiB
C++

#include "precompiled.h"
#include <vector>
#include <set>
#include <map>
#include <stack>
#include <algorithm>
#include "ps/CLogger.h"
#include "ps/Filesystem.h"
#include "Xeromyces.h"
#define LOG_CATEGORY "xml"
#include "XML.h"
int CXeromyces::XercesLoaded = 0; // for once-only initialisation
// Convenient storage for the internal tree
typedef struct {
std::string name;
utf16string value;
} XMLAttribute;
typedef struct XMLElement {
std::string name;
int linenum;
utf16string text;
std::vector<XMLElement*> childs;
std::vector<XMLAttribute*> attrs;
} XMLElement;
class XeroHandler : public DefaultHandler
{
public:
XeroHandler() : m_locator(NULL), Root(NULL) {}
~XeroHandler()
{
if (Root)
DeallocateElement(Root);
}
// SAX2 event handlers:
virtual void startDocument();
virtual void endDocument();
virtual void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs);
virtual void endElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname);
virtual void characters(const XMLCh* const chars, const unsigned int length);
const Locator* m_locator;
virtual void setDocumentLocator(const Locator* const locator)
{
m_locator = locator;
}
// Non-SAX2 stuff, used for storing the
// parsed data and constructing the XMB:
void CreateXMB();
WriteBuffer writeBuffer;
private:
std::set<std::string> ElementNames;
std::set<std::string> AttributeNames;
XMLElement* Root;
XMLElement* CurrentElement;
std::stack<XMLElement*> ElementStack;
std::map<std::string, int> ElementID;
std::map<std::string, int> AttributeID;
void OutputElement(XMLElement* el);
// Recursively frees memory
void DeallocateElement(XMLElement* el);
};
CXeromyces::CXeromyces()
{
}
CXeromyces::~CXeromyces()
{
}
void CXeromyces::Terminate()
{
if (XercesLoaded)
{
XMLPlatformUtils::Terminate();
XercesLoaded = 0;
}
}
// Find out write location of the XMB file corresponding to xmlFilename
void CXeromyces::GetXMBPath(PIVFS vfs, const VfsPath& xmlFilename, const VfsPath& xmbFilename, VfsPath& xmbActualPath)
{
// rationale:
// - it is necessary to write out XMB files into a subdirectory
// corresponding to the mod from which the XML file is taken.
// this avoids confusion when multiple mods are active -
// their XMB files' VFS filename would otherwise be indistinguishable.
// - we group files in the cache/ mount point first by mod, and only
// then XMB. this is so that all output files for a given mod can
// easily be deleted. the operation of deleting all old/unused
// XMB files requires a program anyway (to find out which are no
// longer needed), so it's not a problem that XMB files reside in
// a subdirectory (which would make manually deleting all harder).
// get real path of XML file (e.g. mods/official/entities/...)
Path P_XMBRealPath;
vfs->GetRealPath(xmlFilename, P_XMBRealPath);
// extract mod name from that
char modName[PATH_MAX];
// .. NOTE: can't use %s, of course (keeps going beyond '/')
int matches = sscanf(P_XMBRealPath.string().c_str(), "mods/%[^/]", modName);
debug_assert(matches == 1);
// build full name: cache, then mod name, XMB subdir, original XMB path
xmbActualPath = VfsPath("cache/mods") / modName / "xmb" / xmbFilename;
}
PSRETURN CXeromyces::Load(const VfsPath& filename)
{
// Make sure the .xml actually exists
if (! FileExists(filename))
{
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to find XML file %s", filename);
return PSRETURN_Xeromyces_XMLOpenFailed;
}
// Get some data about the .xml file
FileInfo fileInfo;
if (g_VFS->GetFileInfo(filename, &fileInfo) < 0)
{
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to stat XML file %s", filename);
return PSRETURN_Xeromyces_XMLOpenFailed;
}
/*
XMBs are stored with a unique name, where the name is generated from
characteristics of the XML file. If a file already exists with the
generated name, it is assumed that that file is a valid conversion of
the XML, and so it's loaded. Otherwise, the XMB is created with that
filename.
This means it's never necessary to overwrite existing XMB files; since
the XMBs are often in archives, it's not easy to rewrite those files,
and it's not possible to switch to using a loose file because the VFS
has already decided that file is inside an archive. So each XMB is given
a unique name, and old ones are somehow purged.
*/
// Generate the filename for the xmb:
// <xml filename>_<mtime><size><format version>.xmb
// with mtime/size as 8-digit hex, where mtime's lowest bit is
// zeroed because zip files only have 2 second resolution.
const int suffixLength = 22;
char suffix[suffixLength+1];
int ret = sprintf(suffix, "_%08x%08xB.xmb", (int)(fileInfo.MTime() & ~1), (int)fileInfo.Size());
debug_assert(ret == suffixLength);
VfsPath xmbFilename = change_extension(filename, suffix);
VfsPath xmbPath;
GetXMBPath(g_VFS, filename, xmbFilename, xmbPath);
// If the file exists, use it
if (FileExists(xmbPath))
{
if (ReadXMBFile(xmbPath))
return PSRETURN_OK;
else
return PSRETURN_Xeromyces_XMLOpenFailed;
}
// XMB isn't up to date with the XML, so rebuild it:
// Load Xerces if necessary
if (! XercesLoaded)
{
XMLPlatformUtils::Initialize();
XercesLoaded = 1;
}
// Open the .xml file
CVFSInputSource source;
if (source.OpenFile(filename) < 0)
{
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Failed to open XML file %s", filename);
return PSRETURN_Xeromyces_XMLOpenFailed;
}
// Set up the Xerces parser
SAX2XMLReader* Parser = XMLReaderFactory::createXMLReader();
// Enable validation
Parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
Parser->setFeature(XMLUni::fgXercesDynamic, true);
XeroHandler handler;
Parser->setContentHandler(&handler);
CXercesErrorHandler errorHandler;
Parser->setErrorHandler(&errorHandler);
CVFSEntityResolver entityResolver(filename.string().c_str());
Parser->setEntityResolver(&entityResolver);
// Build a tree inside handler
Parser->parse(source);
// (It's horribly inefficient doing SAX2->tree then tree->XMB,
// but the XML->XMB conversion should be done very rarely
// anyway. If it's ever needed, the XMB writing can be done
// directly from inside the SAX2 event handlers, although that's
// a little more complex)
delete Parser;
if (errorHandler.GetSawErrors())
{
LOG(CLogger::Error, LOG_CATEGORY, "CXeromyces: Errors in XML file '%s'", filename);
return PSRETURN_Xeromyces_XMLParseError;
// The internal tree of the XeroHandler will be cleaned up automatically
}
// Convert the data structures into the XMB format
handler.CreateXMB();
// Save the file to disk, so it can be loaded quickly next time
WriteBuffer& writeBuffer = handler.writeBuffer;
g_VFS->CreateFile(xmbPath, writeBuffer.Data(), writeBuffer.Size());
XMBBuffer = writeBuffer.Data(); // add a reference
// Set up the XMBFile
Initialise((const char*)XMBBuffer.get());
return PSRETURN_OK;
}
bool CXeromyces::ReadXMBFile(const VfsPath& filename)
{
size_t size;
if(g_VFS->LoadFile(filename, XMBBuffer, size) < 0)
return false;
debug_assert(size >= 42); // else: invalid XMB file size. (42 bytes is the smallest possible XMB. (Well, maybe not quite, but it's a nice number.))
// Set up the XMBFile
Initialise((const char*)XMBBuffer.get());
return true;
}
void XeroHandler::startDocument()
{
Root = new XMLElement;
ElementStack.push(Root);
}
void XeroHandler::endDocument()
{
}
/*
// Silently clobbers non-ASCII characters
std::string lowercase_ascii(const XMLCh *a)
{
std::string b;
size_t len=XMLString::stringLen(a);
b.resize(len);
for (size_t i = 0; i < len; ++i)
b[i] = (char)towlower(a[i]);
return b;
}
*/
/**
* Return an ASCII version of the given 16-bit string, ignoring
* any non-ASCII characters.
*
* @param const XMLCh * a Input string.
* @return std::string 8-bit ASCII version of <code>a</code>.
**/
std::string toAscii( const XMLCh* a )
{
std::string b;
size_t len=XMLString::stringLen(a);
b.reserve(len);
for (size_t i = 0; i < len; ++i)
{
if(a[i] < 0x80)
b += (char) a[i];
}
return b;
}
void XeroHandler::startElement(const XMLCh* const UNUSED(uri), const XMLCh* const localname, const XMLCh* const UNUSED(qname), const Attributes& attrs)
{
std::string elementName = toAscii(localname);
ElementNames.insert(elementName);
// Create a new element
XMLElement* e = new XMLElement;
e->name = elementName;
e->linenum = m_locator->getLineNumber();
// Store all the attributes in the new element
for (unsigned int i = 0; i < attrs.getLength(); ++i)
{
std::string attrName = toAscii(attrs.getLocalName(i));
AttributeNames.insert(attrName);
XMLAttribute* a = new XMLAttribute;
a->name = attrName;
const XMLCh *tmp = attrs.getValue(i);
a->value = utf16string(tmp, tmp+XMLString::stringLen(tmp));
e->attrs.push_back(a);
}
// Add the element to its parent
ElementStack.top()->childs.push_back(e);
// Set as parent of following elements
ElementStack.push(e);
}
void XeroHandler::endElement(const XMLCh* const UNUSED(uri), const XMLCh* const UNUSED(localname), const XMLCh* const UNUSED(qname))
{
ElementStack.pop();
}
void XeroHandler::characters(const XMLCh* const chars, const unsigned int UNUSED(length))
{
ElementStack.top()->text += utf16string(chars, chars+XMLString::stringLen(chars));
}
void XeroHandler::CreateXMB()
{
// Header
writeBuffer.Append((void*)HeaderMagicStr, 4);
std::set<std::string>::iterator it;
int i;
// Element names
i = 0;
int ElementCount = (int)ElementNames.size();
writeBuffer.Append(&ElementCount, 4);
for (it = ElementNames.begin(); it != ElementNames.end(); ++it)
{
int TextLen = (int)it->length()+1;
writeBuffer.Append(&TextLen, 4);
writeBuffer.Append((void*)it->c_str(), TextLen);
ElementID[*it] = i++;
}
// Attribute names
i = 0;
int AttributeCount = (int)AttributeNames.size();
writeBuffer.Append(&AttributeCount, 4);
for (it = AttributeNames.begin(); it != AttributeNames.end(); ++it)
{
int TextLen = (int)it->length()+1;
writeBuffer.Append(&TextLen, 4);
writeBuffer.Append((void*)it->c_str(), TextLen);
AttributeID[*it] = i++;
}
// All the XML contents must be surrounded by a single element
debug_assert(Root->childs.size() == 1);
OutputElement(Root->childs[0]);
delete Root;
Root = NULL;
}
// Writes a whole element (recursively if it has children) into the buffer,
// and also frees all the memory that has been allocated for that element.
void XeroHandler::OutputElement(XMLElement* el)
{
// Filled in later with the length of the element
int Pos_Length = (int)writeBuffer.Size();
writeBuffer.Append("????", 4);
int NameID = ElementID[el->name];
writeBuffer.Append(&NameID, 4);
int AttrCount = (int)el->attrs.size();
writeBuffer.Append(&AttrCount, 4);
int ChildCount = (int)el->childs.size();
writeBuffer.Append(&ChildCount, 4);
// Filled in later with the offset to the list of child elements
int Pos_ChildrenOffset = (int)writeBuffer.Size();
writeBuffer.Append("????", 4);
// Trim excess whitespace in the entity's text, while counting
// the number of newlines trimmed (so that JS error reporting
// can give the correct line number)
std::string whitespaceA = " \t\r\n";
utf16string whitespace (whitespaceA.begin(), whitespaceA.end());
// Find the start of the non-whitespace section
size_t first = el->text.find_first_not_of(whitespace);
if (first == el->text.npos)
// Entirely whitespace - easy to handle
el->text = utf16string();
else
{
// Count the number of \n being cut off,
// and add them to the line number
utf16string trimmed (el->text.begin(), el->text.begin()+first);
el->linenum += (int)std::count(trimmed.begin(), trimmed.end(), (utf16_t)'\n');
// Find the end of the non-whitespace section,
// and trim off everything else
size_t last = el->text.find_last_not_of(whitespace);
el->text = el->text.substr(first, 1+last-first);
}
// Output text, prefixed by length in bytes
if (el->text.length() == 0)
{
// No text; don't write much
writeBuffer.Append("\0\0\0\0", 4);
}
else
{
// Write length and line number and null-terminated text
int NodeLen = 4 + 2*((int)el->text.length()+1);
writeBuffer.Append(&NodeLen, 4);
writeBuffer.Append(&el->linenum, 4);
writeBuffer.Append((void*)el->text.c_str(), NodeLen-4);
}
// Output attributes
int i;
for (i = 0; i < AttrCount; ++i)
{
int AttrName = AttributeID[el->attrs[i]->name];
writeBuffer.Append(&AttrName, 4);
int AttrLen = 2*((int)el->attrs[i]->value.length()+1);
writeBuffer.Append(&AttrLen, 4);
writeBuffer.Append((void*)el->attrs[i]->value.c_str(), AttrLen);
// Free each attribute as soon as it's been dealt with
delete el->attrs[i];
}
// Go back and fill in the child-element offset
int ChildrenOffset = (int)writeBuffer.Size() - (Pos_ChildrenOffset+4);
writeBuffer.Overwrite(&ChildrenOffset, 4, Pos_ChildrenOffset);
// Output all child nodes
for (i = 0; i < ChildCount; ++i)
OutputElement(el->childs[i]);
// Go back and fill in the length
int Length = (int)writeBuffer.Size() - Pos_Length;
writeBuffer.Overwrite(&Length, 4, Pos_Length);
// Tidy up the parser's mess
delete el;
}
void XeroHandler::DeallocateElement(XMLElement* el)
{
size_t i;
for (i = 0; i < el->attrs.size(); ++i)
delete el->attrs[i];
for (i = 0; i < el->childs.size(); ++i)
DeallocateElement(el->childs[i]);
delete el;
}