X-Git-Url: http://www.project-moonshot.org/gitweb/?a=blobdiff_plain;f=xmltooling%2Futil%2FParserPool.cpp;h=ee64178e1803f4f748e71aa79289cb76cedc2498;hb=c9a063e554871e212edc46a1fd9ae865cbc9da9e;hp=8a22b6e4f76fe85da18621587014f6222add096a;hpb=dd11b998f20c8d80c9e7ecda0d8983610c7c9589;p=shibboleth%2Fcpp-xmltooling.git diff --git a/xmltooling/util/ParserPool.cpp b/xmltooling/util/ParserPool.cpp index 8a22b6e..ee64178 100644 --- a/xmltooling/util/ParserPool.cpp +++ b/xmltooling/util/ParserPool.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2001-2006 Internet2 + * Copyright 2001-2008 Internet2 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,11 +17,13 @@ /** * ParserPool.cpp * - * XML parsing + * A thread-safe pool of parsers that share characteristics. */ #include "internal.h" #include "exceptions.h" +#include "logging.h" +#include "util/CurlURLInputStream.h" #include "util/NDC.h" #include "util/ParserPool.h" #include "util/XMLHelper.h" @@ -30,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -38,12 +39,63 @@ #include #include +using namespace xmltooling::logging; using namespace xmltooling; +using namespace xercesc; using namespace std; -using namespace log4cpp; + + +namespace { + class MyErrorHandler : public DOMErrorHandler { + public: + unsigned int errors; + + MyErrorHandler() : errors(0) {} + + bool handleError(const DOMError& e) + { +#ifdef _DEBUG + xmltooling::NDC ndc("handleError"); +#endif + Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool"); + + DOMLocator* locator=e.getLocation(); + auto_ptr_char temp(e.getMessage()); + + switch (e.getSeverity()) { + case DOMError::DOM_SEVERITY_WARNING: + log.warnStream() << "warning on line " << locator->getLineNumber() + << ", column " << locator->getColumnNumber() + << ", message: " << temp.get() << logging::eol; + return true; + + case DOMError::DOM_SEVERITY_ERROR: + ++errors; + log.errorStream() << "error on line " << locator->getLineNumber() + << ", column " << locator->getColumnNumber() + << ", message: " << temp.get() << logging::eol; + return true; + + case DOMError::DOM_SEVERITY_FATAL_ERROR: + ++errors; + log.errorStream() << "fatal error on line " << locator->getLineNumber() + << ", column " << locator->getColumnNumber() + << ", message: " << temp.get() << logging::eol; + return true; + } + + ++errors; + log.errorStream() << "undefined error type on line " << locator->getLineNumber() + << ", column " << locator->getColumnNumber() + << ", message: " << temp.get() << logging::eol; + return false; + } + }; +} + ParserPool::ParserPool(bool namespaceAware, bool schemaAware) - : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(Mutex::create()) {} + : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(Mutex::create()), m_security(new SecurityManager()) {} ParserPool::~ParserPool() { @@ -52,6 +104,7 @@ ParserPool::~ParserPool() m_pool.pop(); } delete m_lock; + delete m_security; } DOMDocument* ParserPool::newDocument() @@ -59,26 +112,78 @@ DOMDocument* ParserPool::newDocument() return DOMImplementationRegistry::getDOMImplementation(NULL)->createDocument(); } +#ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS + +DOMDocument* ParserPool::parse(DOMLSInput& domsrc) +{ + DOMLSParser* parser=checkoutBuilder(); + XercesJanitor janitor(parser); + try { + MyErrorHandler deh; + parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, dynamic_cast(&deh)); + DOMDocument* doc=parser->parse(&domsrc); + if (deh.errors) { + if (doc) + doc->release(); + throw XMLParserException("XML error(s) during parsing, check log for specifics"); + } + parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, (void*)NULL); + parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true); + checkinBuilder(janitor.release()); + return doc; + } + catch (XMLException& ex) { + parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, (void*)NULL); + parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true); + checkinBuilder(janitor.release()); + auto_ptr_char temp(ex.getMessage()); + throw XMLParserException(string("Xerces error during parsing: ") + (temp.get() ? temp.get() : "no message")); + } + catch (XMLToolingException&) { + parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, (void*)NULL); + parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true); + checkinBuilder(janitor.release()); + throw; + } +} + +#else + DOMDocument* ParserPool::parse(DOMInputSource& domsrc) { DOMBuilder* parser=checkoutBuilder(); XercesJanitor janitor(parser); try { + MyErrorHandler deh; + parser->setErrorHandler(&deh); DOMDocument* doc=parser->parse(domsrc); - parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true); + if (deh.errors) { + if (doc) + doc->release(); + throw XMLParserException("XML error(s) during parsing, check log for specifics"); + } + parser->setErrorHandler(NULL); + parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true); checkinBuilder(janitor.release()); return doc; } - catch (XMLException&) { + catch (XMLException& ex) { + parser->setErrorHandler(NULL); + parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true); checkinBuilder(janitor.release()); - throw; + auto_ptr_char temp(ex.getMessage()); + throw XMLParserException(string("Xerces error during parsing: ") + (temp.get() ? temp.get() : "no message")); } catch (XMLToolingException&) { + parser->setErrorHandler(NULL); + parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true); checkinBuilder(janitor.release()); throw; } } +#endif + DOMDocument* ParserPool::parse(istream& is) { StreamInputSource src(is); @@ -91,7 +196,7 @@ template class doubleit { public: doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {} - void operator() (const pair& s) { temp += s.first + sep + s.first + sep; } + void operator() (const pair& s) { temp += s.first + sep + s.first + sep; } T& temp; const typename T::value_type& sep; }; @@ -141,9 +246,10 @@ bool ParserPool::loadCatalog(const XMLCh* pathname) Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool"); // XML constants - static const XMLCh catalog[] = { chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull }; - static const XMLCh uri[] = { chLatin_u, chLatin_r, chLatin_i, chNull }; - static const XMLCh name[] = { chLatin_n, chLatin_a, chLatin_m, chLatin_e, chNull }; + static const XMLCh catalog[] = UNICODE_LITERAL_7(c,a,t,a,l,o,g); + static const XMLCh system[] = UNICODE_LITERAL_6(s,y,s,t,e,m); + static const XMLCh systemId[] = UNICODE_LITERAL_8(s,y,s,t,e,m,I,d); + static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i); static const XMLCh CATALOG_NS[] = { chLatin_u, chLatin_r, chLatin_n, chColon, chLatin_o, chLatin_a, chLatin_s, chLatin_i, chLatin_s, chColon, @@ -176,12 +282,12 @@ bool ParserPool::loadCatalog(const XMLCh* pathname) return false; } - // Fetch all the elements. - DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,uri); + // Fetch all the elements. + DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,system); Lock lock(m_lock); for (XMLSize_t i=0; igetLength(); i++) { root=static_cast(mappings->item(i)); - const XMLCh* from=root->getAttributeNS(NULL,name); + const XMLCh* from=root->getAttributeNS(NULL,systemId); const XMLCh* to=root->getAttributeNS(NULL,uri); #ifdef HAVE_GOOD_STL m_schemaLocMap[from]=to; @@ -198,15 +304,27 @@ bool ParserPool::loadCatalog(const XMLCh* pathname) for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit(m_schemaLocations,' ')); #endif } - catch (XMLParserException& e) { - log.error("catalog loader caught XMLParserException: %s", e.what()); + catch (exception& e) { + log.error("catalog loader caught exception: %s", e.what()); return false; } return true; } -DOMInputSource* ParserPool::resolveEntity(const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI) +#ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS +DOMLSInput* ParserPool::resolveResource( + const XMLCh *const resourceType, + const XMLCh *const namespaceUri, + const XMLCh *const publicId, + const XMLCh *const systemId, + const XMLCh *const baseURI + ) +#else +DOMInputSource* ParserPool::resolveEntity( + const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI + ) +#endif { #if _DEBUG xmltooling::NDC ndc("resolveEntity"); @@ -221,69 +339,122 @@ DOMInputSource* ParserPool::resolveEntity(const XMLCh* const publicId, const XML log.debug("asked to resolve %s with baseURI %s",sysId.get(),base.get() ? base.get() : "(null)"); } - // Find well-known schemas in the specified location. #ifdef HAVE_GOOD_STL + // Find well-known schemas in the specified location. map::const_iterator i=m_schemaLocMap.find(systemId); if (i!=m_schemaLocMap.end()) - return new Wrapper4InputSource(new LocalFileInputSource(NULL,i->second.c_str())); + return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str())); + + // Check for entity as a value in the map. + for (i=m_schemaLocMap.begin(); i!=m_schemaLocMap.end(); ++i) { + if (XMLString::endsWith(i->second.c_str(), systemId)) + return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str())); + } + + // We'll allow anything without embedded slashes. + if (XMLString::indexOf(systemId, chForwardSlash)==-1) + return new Wrapper4InputSource(new LocalFileInputSource(baseURI,systemId)); #else + // Find well-known schemas in the specified location. auto_ptr_char temp(systemId); map::const_iterator i=m_schemaLocMap.find(temp.get()); - auto_ptr_XMLCh temp2(i->second.c_str()); - if (i!=m_schemaLocMap.end()) - return new Wrapper4InputSource(new LocalFileInputSource(NULL,temp2.get())); + if (i!=m_schemaLocMap.end()) { + auto_ptr_XMLCh temp2(i->second.c_str()); + return new Wrapper4InputSource(new LocalFileInputSource(baseURI,temp2.get())); + } + + // Check for entity as a value in the map. + for (i=m_schemaLocMap.begin(); i!=m_schemaLocMap.end(); ++i) { + auto_ptr_XMLCh temp2(i->second.c_str()); + if (XMLString::endsWith(temp2.get(), systemId)) + return new Wrapper4InputSource(new LocalFileInputSource(baseURI,temp2.get())); + } + + // We'll allow anything without embedded slashes. + if (XMLString::indexOf(systemId, chForwardSlash)==-1) + return new Wrapper4InputSource(new LocalFileInputSource(baseURI,systemId)); #endif // Shortcircuit the request. - auto_ptr_char sysId(systemId); - log.warn("unauthorized entity request (%s), blocking it", sysId.get() ? sysId.get() : "no systemId"); +#ifdef HAVE_GOOD_STL + auto_ptr_char temp(systemId); +#endif + log.debug("unauthorized entity request (%s), blocking it", temp.get()); static const XMLByte nullbuf[] = {0}; return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId)); } -bool ParserPool::handleError(const DOMError& e) +#ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS + +DOMLSParser* ParserPool::createBuilder() { -#ifdef _DEBUG - xmltooling::NDC ndc("handleError"); + static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull }; + DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype); + DOMLSParser* parser=static_cast(impl)->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS,NULL); + parser->getDomConfig()->setParameter(XMLUni::fgDOMNamespaces, m_namespaceAware); + if (m_schemaAware) { + parser->getDomConfig()->setParameter(XMLUni::fgDOMNamespaces, true); + parser->getDomConfig()->setParameter(XMLUni::fgXercesSchema, true); + parser->getDomConfig()->setParameter(XMLUni::fgDOMValidate, true); + parser->getDomConfig()->setParameter(XMLUni::fgXercesCacheGrammarFromParse, true); + + // We build a "fake" schema location hint that binds each namespace to itself. + // This ensures the entity resolver will be given the namespace as a systemId it can check. +#ifdef HAVE_GOOD_STL + parser->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast(m_schemaLocations.c_str())); +#else + auto_ptr_XMLCh temp(m_schemaLocations.c_str()); + parser->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast(temp.get())); #endif - Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool"); - DOMLocator* locator=e.getLocation(); - auto_ptr_char temp(e.getMessage()); - - switch (e.getSeverity()) { - case DOMError::DOM_SEVERITY_WARNING: - log.warnStream() << "warning on line " << locator->getLineNumber() - << ", column " << locator->getColumnNumber() - << ", message: " << temp.get() << CategoryStream::ENDLINE; - return true; + } + parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true); + parser->getDomConfig()->setParameter(XMLUni::fgXercesDisableDefaultEntityResolution, true); + parser->getDomConfig()->setParameter(XMLUni::fgDOMResourceResolver, dynamic_cast(this)); + parser->getDomConfig()->setParameter(XMLUni::fgXercesSecurityManager, m_security); + return parser; +} - case DOMError::DOM_SEVERITY_ERROR: - log.errorStream() << "error on line " << locator->getLineNumber() - << ", column " << locator->getColumnNumber() - << ", message: " << temp.get() << CategoryStream::ENDLINE; - throw XMLParserException(string("error during XML parsing: ") + (temp.get() ? temp.get() : "no message")); +DOMLSParser* ParserPool::checkoutBuilder() +{ + Lock lock(m_lock); + if (m_pool.empty()) { + DOMLSParser* builder=createBuilder(); + return builder; + } + DOMLSParser* p=m_pool.top(); + m_pool.pop(); + if (m_schemaAware) { +#ifdef HAVE_GOOD_STL + p->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast(m_schemaLocations.c_str())); +#else + auto_ptr_XMLCh temp2(m_schemaLocations.c_str()); + p->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast(temp2.get())); +#endif + } + return p; +} - case DOMError::DOM_SEVERITY_FATAL_ERROR: - log.critStream() << "fatal error on line " << locator->getLineNumber() - << ", column " << locator->getColumnNumber() - << ", message: " << temp.get() << CategoryStream::ENDLINE; - throw XMLParserException(string("fatal error during XML parsing: ") + (temp.get() ? temp.get() : "no message")); +void ParserPool::checkinBuilder(DOMLSParser* builder) +{ + if (builder) { + Lock lock(m_lock); + m_pool.push(builder); } - throw XMLParserException(string("unclassified error during XML parsing: ") + (temp.get() ? temp.get() : "no message")); } +#else + DOMBuilder* ParserPool::createBuilder() { static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull }; DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype); DOMBuilder* parser=static_cast(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0); - if (m_namespaceAware) - parser->setFeature(XMLUni::fgDOMNamespaces,true); + parser->setFeature(XMLUni::fgDOMNamespaces, m_namespaceAware); if (m_schemaAware) { - parser->setFeature(XMLUni::fgXercesSchema,true); - parser->setFeature(XMLUni::fgDOMValidation,true); - parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse,true); - parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal,true); + parser->setFeature(XMLUni::fgDOMNamespaces, true); + parser->setFeature(XMLUni::fgXercesSchema, true); + parser->setFeature(XMLUni::fgDOMValidation, true); + parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse, true); // We build a "fake" schema location hint that binds each namespace to itself. // This ensures the entity resolver will be given the namespace as a systemId it can check. @@ -294,9 +465,10 @@ DOMBuilder* ParserPool::createBuilder() parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast(temp.get())); #endif } - parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true); + parser->setProperty(XMLUni::fgXercesSecurityManager, m_security); + parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true); + parser->setFeature(XMLUni::fgXercesDisableDefaultEntityResolution, true); parser->setEntityResolver(this); - parser->setErrorHandler(this); return parser; } @@ -328,10 +500,12 @@ void ParserPool::checkinBuilder(DOMBuilder* builder) } } -unsigned int StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const unsigned int maxToRead) +#endif + +xsecsize_t StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead) { XMLByte* target=toFill; - unsigned int bytes_read=0,request=maxToRead; + xsecsize_t bytes_read=0,request=maxToRead; // Fulfill the rest by reading from the stream. if (request && !m_is.eof() && !m_is.fail()) { @@ -343,10 +517,56 @@ unsigned int StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const t catch(ios_base::failure& e) { Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream() << "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception: " << e.what() - << CategoryStream::ENDLINE; + << logging::eol; *toFill=0; return 0; } } return bytes_read; } + +#ifdef XMLTOOLING_LITE + +URLInputSource::URLInputSource(const XMLCh* url, const char* systemId) : InputSource(systemId), m_url(url) +{ +} + +URLInputSource::URLInputSource(const DOMElement* e, const char* systemId) : InputSource(systemId) +{ + static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i); + static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l); + + const XMLCh* attr = e->getAttributeNS(NULL, url); + if (!attr || !*attr) { + attr = e->getAttributeNS(NULL, uri); + if (!attr || !*attr) + throw IOException("No URL supplied via DOM to URLInputSource constructor."); + } + + m_url.setURL(attr); +} + +BinInputStream* URLInputSource::makeStream() const +{ + // Ask the URL to create us an appropriate input stream + return m_url.makeNewStream(); +} + +#else + +URLInputSource::URLInputSource(const XMLCh* url, const char* systemId) + : InputSource(systemId), m_url(url), m_root(NULL) +{ +} + +URLInputSource::URLInputSource(const DOMElement* e, const char* systemId) + : InputSource(systemId), m_root(e) +{ +} + +BinInputStream* URLInputSource::makeStream() const +{ + return m_root ? new CurlURLInputStream(m_root) : new CurlURLInputStream(m_url.get()); +} + +#endif