--- /dev/null
+/*\r
+ * Copyright 2001-2006 Internet2\r
+ * \r
+ * Licensed under the Apache License, Version 2.0 (the "License");\r
+ * you may not use this file except in compliance with the License.\r
+ * You may obtain a copy of the License at\r
+ *\r
+ * http://www.apache.org/licenses/LICENSE-2.0\r
+ *\r
+ * Unless required by applicable law or agreed to in writing, software\r
+ * distributed under the License is distributed on an "AS IS" BASIS,\r
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ * See the License for the specific language governing permissions and\r
+ * limitations under the License.\r
+ */\r
+\r
+/**\r
+ * ParserPool.cpp\r
+ * \r
+ * XML parsing\r
+ */\r
+\r
+#include "internal.h"\r
+#include "exceptions.h"\r
+#include "util/NDC.h"\r
+#include "util/ParserPool.h"\r
+#include "util/XMLHelper.h"\r
+\r
+#include <algorithm>\r
+#include <functional>\r
+#include <sys/types.h>\r
+#include <sys/stat.h>\r
+#include <log4cpp/Category.hh>\r
+#include <xercesc/util/PlatformUtils.hpp>\r
+#include <xercesc/util/XMLUniDefs.hpp>\r
+#include <xercesc/sax/SAXException.hpp>\r
+#include <xercesc/framework/MemBufInputSource.hpp>\r
+#include <xercesc/framework/LocalFileInputSource.hpp>\r
+#include <xercesc/framework/Wrapper4InputSource.hpp>\r
+\r
+using namespace xmltooling;\r
+using namespace std;\r
+using namespace log4cpp;\r
+\r
+ParserPool::ParserPool(bool namespaceAware, bool schemaAware)\r
+ : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(XMLPlatformUtils::makeMutex()) {}\r
+\r
+ParserPool::~ParserPool()\r
+{\r
+ while(!m_pool.empty()) {\r
+ m_pool.top()->release();\r
+ m_pool.pop();\r
+ }\r
+ XMLPlatformUtils::closeMutex(m_lock);\r
+}\r
+\r
+DOMDocument* ParserPool::newDocument()\r
+{\r
+ return DOMImplementationRegistry::getDOMImplementation(NULL)->createDocument();\r
+}\r
+\r
+DOMDocument* ParserPool::parse(DOMInputSource& domsrc)\r
+{\r
+ DOMBuilder* parser=checkoutBuilder();\r
+ try {\r
+ DOMDocument* doc=parser->parse(domsrc);\r
+ parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);\r
+ checkinBuilder(parser);\r
+ return doc;\r
+ }\r
+ catch (...) {\r
+ checkinBuilder(parser);\r
+ throw;\r
+ }\r
+}\r
+\r
+DOMDocument* ParserPool::parse(istream& is)\r
+{\r
+ StreamInputSource src(is);\r
+ Wrapper4InputSource domsrc(&src,false);\r
+ return parse(domsrc);\r
+}\r
+\r
+// Functor to double its argument separated by a character and append to a buffer\r
+template <class T> class doubleit\r
+{\r
+public:\r
+ doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {}\r
+ void operator() (const pair<T,T>& s) { temp += s.first + sep + s.first + sep; }\r
+ T& temp;\r
+ const typename T::value_type& sep;\r
+};\r
+\r
+bool ParserPool::loadSchema(const XMLCh* nsURI, const XMLCh* pathname)\r
+{\r
+ // Just check the pathname and then directly register the pair into the map.\r
+ \r
+ auto_ptr_char p(pathname);\r
+#ifdef WIN32\r
+ struct _stat stat_buf;\r
+ if (_stat(p.get(), &stat_buf) != 0)\r
+#else\r
+ struct stat stat_buf;\r
+ if (stat(p.get(), &stat_buf) != 0)\r
+#endif\r
+ {\r
+#if _DEBUG\r
+ xmltooling::NDC ndc("loadSchema");\r
+#endif\r
+ Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");\r
+ auto_ptr_char n(nsURI);\r
+ log.error("failed to load schema for (%s), file not found (%s)",n.get(),p.get());\r
+ return false;\r
+ }\r
+\r
+ XMLPlatformUtils::lockMutex(m_lock);\r
+#ifdef HAVE_GOOD_STL\r
+ m_schemaLocMap[nsURI]=pathname;\r
+ m_schemaLocations.erase();\r
+ for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));\r
+#else\r
+ auto_ptr_char n(nsURI);\r
+ m_schemaLocMap[n.get()]=p.get();\r
+ m_schemaLocations.erase();\r
+ for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));\r
+#endif\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+\r
+ return true;\r
+}\r
+\r
+bool ParserPool::loadCatalog(const XMLCh* pathname)\r
+{\r
+#if _DEBUG\r
+ xmltooling::NDC ndc("loadCatalog");\r
+#endif\r
+ Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");\r
+\r
+ // XML constants\r
+ static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };\r
+ static const XMLCh catalog[] = { chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull };\r
+ static const XMLCh uri[] = { chLatin_u, chLatin_r, chLatin_i, chNull };\r
+ static const XMLCh name[] = { chLatin_n, chLatin_a, chLatin_m, chLatin_e, chNull };\r
+ static const XMLCh CATALOG_NS[] = {\r
+ chLatin_u, chLatin_r, chLatin_n, chColon,\r
+ chLatin_o, chLatin_a, chLatin_s, chLatin_i, chLatin_s, chColon,\r
+ chLatin_n, chLatin_a, chLatin_m, chLatin_e, chLatin_s, chColon,\r
+ chLatin_t, chLatin_c, chColon,\r
+ chLatin_e, chLatin_n, chLatin_t, chLatin_i, chLatin_t, chLatin_y, chColon,\r
+ chLatin_x, chLatin_m, chLatin_l, chLatin_n, chLatin_s, chColon,\r
+ chLatin_x, chLatin_m, chLatin_l, chColon,\r
+ chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull\r
+ };\r
+\r
+ // Get a local parser to use. When it pops, the document will go with it.\r
+ DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);\r
+ auto_ptr<DOMBuilder> parser(static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0));\r
+ parser->setFeature(XMLUni::fgDOMNamespaces,true);\r
+\r
+ if (log.isDebugEnabled()) {\r
+ auto_ptr_char temp(pathname);\r
+ log.debug("loading XML catalog from %s", temp.get());\r
+ }\r
+\r
+ LocalFileInputSource fsrc(NULL,pathname);\r
+ Wrapper4InputSource domsrc(&fsrc,false);\r
+ try {\r
+ DOMDocument* doc=parser->parse(domsrc);\r
+ \r
+ // Check root element.\r
+ const DOMElement* root=doc->getDocumentElement();\r
+ if (!XMLHelper::isElementNamed(root,CATALOG_NS,catalog)) {\r
+ auto_ptr_char temp(pathname);\r
+ log.error("unknown root element, failed to load XML catalog from %s", temp.get());\r
+ return false;\r
+ }\r
+ \r
+ // Fetch all the <uri> elements.\r
+ DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,uri);\r
+ XMLPlatformUtils::lockMutex(m_lock);\r
+ for (XMLSize_t i=0; i<mappings->getLength(); i++) {\r
+ root=static_cast<DOMElement*>(mappings->item(i));\r
+ const XMLCh* from=root->getAttributeNS(NULL,name);\r
+ const XMLCh* to=root->getAttributeNS(NULL,uri);\r
+#ifdef HAVE_GOOD_STL\r
+ m_schemaLocMap[from]=to;\r
+#else\r
+ auto_ptr_char f(from);\r
+ auto_ptr_char t(to);\r
+ m_schemaLocMap[f.get()]=t.get();\r
+#endif\r
+ }\r
+ m_schemaLocations.erase();\r
+#ifdef HAVE_GOOD_STL\r
+ for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));\r
+#else\r
+ for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));\r
+#endif\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ }\r
+ catch (DOMException& e) {\r
+ auto_ptr_char p(pathname);\r
+ auto_ptr_char m(e.getMessage());\r
+ log.error("catalog loader caught DOMException (%s) from file (%s)", m.get(), p.get());\r
+ return false;\r
+ }\r
+ catch (SAXException& e) {\r
+ auto_ptr_char p(pathname);\r
+ auto_ptr_char m(e.getMessage());\r
+ log.error("catalog loader caught SAXException (%s) from file (%s)", m.get(), p.get());\r
+ return false;\r
+ }\r
+ catch (XMLException& e) {\r
+ auto_ptr_char p(pathname);\r
+ auto_ptr_char m(e.getMessage());\r
+ log.error("catalog loader caught XMLException (%s) from file (%s)", m.get(), p.get());\r
+ return false;\r
+ }\r
+\r
+ return true;\r
+}\r
+\r
+DOMInputSource* ParserPool::resolveEntity(const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI)\r
+{\r
+#if _DEBUG\r
+ xmltooling::NDC ndc("resolveEntity");\r
+#endif\r
+ if (!systemId)\r
+ return NULL;\r
+\r
+ Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");\r
+ if (log.isDebugEnabled()) {\r
+ auto_ptr_char sysId(systemId);\r
+ auto_ptr_char base(baseURI);\r
+ log.debug("asked to resolve %s with baseURI %s",sysId.get(),base.get() ? base.get() : "(null)");\r
+ }\r
+\r
+ // Find well-known schemas in the specified location.\r
+#ifdef HAVE_GOOD_STL\r
+ map<xstring,xstring>::const_iterator i=m_schemaLocMap.find(systemId);\r
+ if (i!=m_schemaLocMap.end())\r
+ return new Wrapper4InputSource(new LocalFileInputSource(NULL,i->second.c_str()));\r
+#else\r
+ auto_ptr_char temp(systemId);\r
+ map<string,string>::const_iterator i=m_schemaLocMap.find(temp.get());\r
+ auto_ptr_XMLCh temp2(i->second.c_str());\r
+ if (i!=m_schemaLocMap.end())\r
+ return new Wrapper4InputSource(new LocalFileInputSource(NULL,temp2.get()));\r
+#endif \r
+\r
+ // Shortcircuit the request.\r
+ log.warn("unauthorized entity request, blocking it");\r
+ static const XMLByte nullbuf[] = {0};\r
+ return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId));\r
+}\r
+\r
+bool ParserPool::handleError(const DOMError& e)\r
+{\r
+#ifdef _DEBUG\r
+ xmltooling::NDC ndc("handleError");\r
+#endif\r
+ Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");\r
+ DOMLocator* locator=e.getLocation();\r
+ auto_ptr_char temp(e.getMessage());\r
+\r
+ switch (e.getSeverity()) {\r
+ case DOMError::DOM_SEVERITY_WARNING:\r
+ log.warnStream() << "warning on line " << locator->getLineNumber()\r
+ << ", column " << locator->getColumnNumber()\r
+ << ", message: " << temp.get() << CategoryStream::ENDLINE;\r
+ return true;\r
+\r
+ case DOMError::DOM_SEVERITY_ERROR:\r
+ log.errorStream() << "error on line " << locator->getLineNumber()\r
+ << ", column " << locator->getColumnNumber()\r
+ << ", message: " << temp.get() << CategoryStream::ENDLINE;\r
+ throw XMLParserException(string("error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));\r
+\r
+ case DOMError::DOM_SEVERITY_FATAL_ERROR:\r
+ log.critStream() << "fatal error on line " << locator->getLineNumber()\r
+ << ", column " << locator->getColumnNumber()\r
+ << ", message: " << temp.get() << CategoryStream::ENDLINE;\r
+ throw XMLParserException(string("fatal error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));\r
+ }\r
+ throw XMLParserException(string("unclassified error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));\r
+}\r
+\r
+DOMBuilder* ParserPool::createBuilder()\r
+{\r
+ static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };\r
+ DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);\r
+ DOMBuilder* parser=static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0);\r
+ if (m_namespaceAware)\r
+ parser->setFeature(XMLUni::fgDOMNamespaces,true);\r
+ if (m_schemaAware) {\r
+ parser->setFeature(XMLUni::fgXercesSchema,true);\r
+ parser->setFeature(XMLUni::fgDOMValidation,true);\r
+ parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse,true);\r
+ parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal,true);\r
+ \r
+ // We build a "fake" schema location hint that binds each namespace to itself.\r
+ // This ensures the entity resolver will be given the namespace as a systemId it can check. \r
+#ifdef HAVE_GOOD_STL\r
+ parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));\r
+#else\r
+ auto_ptr_XMLCh temp(m_schemaLocations.c_str());\r
+ parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp.get()));\r
+#endif\r
+ }\r
+ parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);\r
+ parser->setEntityResolver(this);\r
+ parser->setErrorHandler(this);\r
+ return parser;\r
+}\r
+\r
+DOMBuilder* ParserPool::checkoutBuilder()\r
+{\r
+ XMLPlatformUtils::lockMutex(m_lock);\r
+ try {\r
+ if (m_pool.empty()) {\r
+ DOMBuilder* builder=createBuilder();\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ return builder;\r
+ }\r
+ DOMBuilder* p=m_pool.top();\r
+ m_pool.pop();\r
+ if (m_schemaAware) {\r
+#ifdef HAVE_GOOD_STL\r
+ p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));\r
+#else\r
+ auto_ptr_XMLCh temp2(m_schemaLocations.c_str());\r
+ p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp2.get()));\r
+#endif\r
+ }\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ return p;\r
+ }\r
+ catch(...) {\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ throw;\r
+ }\r
+}\r
+\r
+void ParserPool::checkinBuilder(DOMBuilder* builder)\r
+{\r
+ if (builder) {\r
+ XMLPlatformUtils::lockMutex(m_lock);\r
+ m_pool.push(builder);\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ }\r
+}\r
+\r
+unsigned int StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const unsigned int maxToRead)\r
+{\r
+ XMLByte* target=toFill;\r
+ unsigned int bytes_read=0,request=maxToRead;\r
+\r
+ // Fulfill the rest by reading from the stream.\r
+ if (request && !m_is.eof()) {\r
+ try {\r
+ m_is.read(reinterpret_cast<char* const>(target),request);\r
+ m_pos+=m_is.gcount();\r
+ bytes_read+=m_is.gcount();\r
+ }\r
+ catch(...) {\r
+ Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream() <<\r
+ "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception" << CategoryStream::ENDLINE;\r
+ *toFill=0;\r
+ return 0;\r
+ }\r
+ }\r
+ return bytes_read;\r
+}\r
--- /dev/null
+/*\r
+ * Copyright 2001-2006 Internet2\r
+ * \r
+ * Licensed under the Apache License, Version 2.0 (the "License");\r
+ * you may not use this file except in compliance with the License.\r
+ * You may obtain a copy of the License at\r
+ *\r
+ * http://www.apache.org/licenses/LICENSE-2.0\r
+ *\r
+ * Unless required by applicable law or agreed to in writing, software\r
+ * distributed under the License is distributed on an "AS IS" BASIS,\r
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ * See the License for the specific language governing permissions and\r
+ * limitations under the License.\r
+ */\r
+\r
+/**\r
+ * @file ParserPool.h\r
+ * \r
+ * XML parsing\r
+ */\r
+\r
+#if !defined(__xmltooling_pool_h__)\r
+#define __xmltooling_pool_h__\r
+\r
+#include <xmltooling/unicode.h>\r
+\r
+#include <map>\r
+#include <stack>\r
+#include <istream>\r
+#include <xercesc/dom/DOM.hpp>\r
+#include <xercesc/sax/InputSource.hpp>\r
+#include <xercesc/util/BinInputStream.hpp>\r
+\r
+using namespace xercesc;\r
+\r
+namespace xmltooling {\r
+\r
+ /**\r
+ * A thread-safe pool of DOMBuilders that share characteristics\r
+ */\r
+ class XMLTOOL_API ParserPool : public DOMEntityResolver, DOMErrorHandler\r
+ {\r
+ MAKE_NONCOPYABLE(ParserPool);\r
+ public:\r
+ /**\r
+ * Constructs a new pool\r
+ * \r
+ * @param namespaceAware indicates whether parsers should be namespace-aware or not\r
+ * @param schemaAware indicates whether parsers should be schema-validating or not\r
+ */\r
+ ParserPool(bool namespaceAware=true, bool schemaAware=false);\r
+ ~ParserPool();\r
+\r
+ /**\r
+ * Creates a new document using a parser from this pool.\r
+ * \r
+ * @return new XML document\r
+ * \r
+ */\r
+ DOMDocument* newDocument();\r
+\r
+ /**\r
+ * Parses a document using a pooled parser with the proper settings\r
+ * \r
+ * @param domsrc A DOM source containing the content to be parsed\r
+ * @return The DOM document resulting from the parse\r
+ * @throws XMLParserException thrown if there was a problem reading, parsing, or validating the XML\r
+ */\r
+ DOMDocument* parse(DOMInputSource& domsrc);\r
+\r
+ /**\r
+ * Parses a document using a pooled parser with the proper settings\r
+ * \r
+ * @param is An input stream containing the content to be parsed\r
+ * @return The DOM document resulting from the parse\r
+ * @throws XMLParserException thrown if there was a problem reading, parsing, or validating the XML\r
+ */\r
+ DOMDocument* parse(std::istream& is);\r
+\r
+ /**\r
+ * Load an OASIS catalog file to map schema namespace URIs to filenames.\r
+ * \r
+ * This does not provide real catalog support; only the <uri> element\r
+ * is supported to map from a namespace URI to a relative path or file:// URI.\r
+ * \r
+ * @param pathname path to a catalog file\r
+ * @return true iff the catalog was successfully processed\r
+ */\r
+ bool loadCatalog(const XMLCh* pathname);\r
+ \r
+ /*\r
+ * Load a schema explicitly from a local file.\r
+ * \r
+ * Note that "successful processing" does not imply that the schema is valid,\r
+ * only that a reference to it was successfully registered with the pool.\r
+ * \r
+ * @param nsURI XML namespace to load\r
+ * @param pathname path to schema file\r
+ * @return true iff the schema was successfully processed\r
+ */\r
+ bool loadSchema(const XMLCh* nsURI, const XMLCh* pathname);\r
+\r
+ /**\r
+ * Supplies all external entities (primarily schemas) to the parser\r
+ */\r
+ DOMInputSource* resolveEntity(const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI);\r
+\r
+ /**\r
+ * Handles parsing errors\r
+ */\r
+ bool handleError(const DOMError& e);\r
+\r
+ private:\r
+ DOMBuilder* createBuilder();\r
+ DOMBuilder* checkoutBuilder();\r
+ void checkinBuilder(DOMBuilder* builder);\r
+\r
+#ifdef HAVE_GOOD_STL\r
+ xstring m_schemaLocations;\r
+ std::map<xstring,xstring> m_schemaLocMap;\r
+#else\r
+ std::string m_schemaLocations;\r
+ std::map<std::string,std::string> m_schemaLocMap;\r
+#endif\r
+ bool m_namespaceAware,m_schemaAware;\r
+ std::stack<DOMBuilder*> m_pool;\r
+ void* m_lock;\r
+ };\r
+\r
+ /**\r
+ * A parser source that wraps a C++ input stream\r
+ */\r
+ class XMLTOOL_API StreamInputSource : public InputSource\r
+ {\r
+ MAKE_NONCOPYABLE(StreamInputSource);\r
+ public:\r
+ /**\r
+ * Constructs an input source around an input stream reference.\r
+ * \r
+ * @param is reference to an input stream\r
+ * @param systemId optional system identifier to attach to the stream\r
+ */\r
+ StreamInputSource(std::istream& is, const char* systemId=NULL) : InputSource(systemId), m_is(is) {}\r
+ virtual BinInputStream* makeStream() const { return new StreamBinInputStream(m_is); }\r
+\r
+ private:\r
+ std::istream& m_is;\r
+\r
+ class XMLTOOL_API StreamBinInputStream : public BinInputStream\r
+ {\r
+ public:\r
+ StreamBinInputStream(std::istream& is) : m_is(is), m_pos(0) {}\r
+ virtual unsigned int curPos() const { return m_pos; }\r
+ virtual unsigned int readBytes(XMLByte* const toFill, const unsigned int maxToRead);\r
+ private:\r
+ std::istream& m_is;\r
+ unsigned int m_pos;\r
+ };\r
+ };\r
+};\r
+\r
+#endif /* __xmltooling_pool_h__ */\r