+/*\r
+ * Copyright 2001-2006 Internet2\r
+ * \r
+ * Licensed under the Apache License, Version 2.0 (the "License");\r
+ * you may not use this file except in compliance with the License.\r
+ * You may obtain a copy of the License at\r
+ *\r
+ * http://www.apache.org/licenses/LICENSE-2.0\r
+ *\r
+ * Unless required by applicable law or agreed to in writing, software\r
+ * distributed under the License is distributed on an "AS IS" BASIS,\r
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ * See the License for the specific language governing permissions and\r
+ * limitations under the License.\r
+ */\r
+\r
+/**\r
+ * ParserPool.cpp\r
+ * \r
+ * XML parsing\r
+ */\r
+\r
+#include "internal.h"\r
+#include "exceptions.h"\r
+#include "util/NDC.h"\r
+#include "util/ParserPool.h"\r
+#include "util/XMLHelper.h"\r
+\r
+#include <algorithm>\r
+#include <functional>\r
+#include <sys/types.h>\r
+#include <sys/stat.h>\r
+#include <log4cpp/Category.hh>\r
+#include <xercesc/util/PlatformUtils.hpp>\r
+#include <xercesc/util/XMLUniDefs.hpp>\r
+#include <xercesc/sax/SAXException.hpp>\r
+#include <xercesc/framework/MemBufInputSource.hpp>\r
+#include <xercesc/framework/LocalFileInputSource.hpp>\r
+#include <xercesc/framework/Wrapper4InputSource.hpp>\r
+\r
+using namespace xmltooling;\r
+using namespace std;\r
+using namespace log4cpp;\r
+\r
+ParserPool::ParserPool(bool namespaceAware, bool schemaAware)\r
+ : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(XMLPlatformUtils::makeMutex()) {}\r
+\r
+ParserPool::~ParserPool()\r
+{\r
+ while(!m_pool.empty()) {\r
+ m_pool.top()->release();\r
+ m_pool.pop();\r
+ }\r
+ XMLPlatformUtils::closeMutex(m_lock);\r
+}\r
+\r
+DOMDocument* ParserPool::newDocument()\r
+{\r
+ return DOMImplementationRegistry::getDOMImplementation(NULL)->createDocument();\r
+}\r
+\r
+DOMDocument* ParserPool::parse(DOMInputSource& domsrc)\r
+{\r
+ DOMBuilder* parser=checkoutBuilder();\r
+ try {\r
+ DOMDocument* doc=parser->parse(domsrc);\r
+ parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);\r
+ checkinBuilder(parser);\r
+ return doc;\r
+ }\r
+ catch (...) {\r
+ checkinBuilder(parser);\r
+ throw;\r
+ }\r
+}\r
+\r
+DOMDocument* ParserPool::parse(istream& is)\r
+{\r
+ StreamInputSource src(is);\r
+ Wrapper4InputSource domsrc(&src,false);\r
+ return parse(domsrc);\r
+}\r
+\r
+// Functor to double its argument separated by a character and append to a buffer\r
+template <class T> class doubleit\r
+{\r
+public:\r
+ doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {}\r
+ void operator() (const pair<T,T>& s) { temp += s.first + sep + s.first + sep; }\r
+ T& temp;\r
+ const typename T::value_type& sep;\r
+};\r
+\r
+bool ParserPool::loadSchema(const XMLCh* nsURI, const XMLCh* pathname)\r
+{\r
+ // Just check the pathname and then directly register the pair into the map.\r
+ \r
+ auto_ptr_char p(pathname);\r
+#ifdef WIN32\r
+ struct _stat stat_buf;\r
+ if (_stat(p.get(), &stat_buf) != 0)\r
+#else\r
+ struct stat stat_buf;\r
+ if (stat(p.get(), &stat_buf) != 0)\r
+#endif\r
+ {\r
+#if _DEBUG\r
+ xmltooling::NDC ndc("loadSchema");\r
+#endif\r
+ Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");\r
+ auto_ptr_char n(nsURI);\r
+ log.error("failed to load schema for (%s), file not found (%s)",n.get(),p.get());\r
+ return false;\r
+ }\r
+\r
+ XMLPlatformUtils::lockMutex(m_lock);\r
+#ifdef HAVE_GOOD_STL\r
+ m_schemaLocMap[nsURI]=pathname;\r
+ m_schemaLocations.erase();\r
+ for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));\r
+#else\r
+ auto_ptr_char n(nsURI);\r
+ m_schemaLocMap[n.get()]=p.get();\r
+ m_schemaLocations.erase();\r
+ for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));\r
+#endif\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+\r
+ return true;\r
+}\r
+\r
+bool ParserPool::loadCatalog(const XMLCh* pathname)\r
+{\r
+#if _DEBUG\r
+ xmltooling::NDC ndc("loadCatalog");\r
+#endif\r
+ Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");\r
+\r
+ // XML constants\r
+ static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };\r
+ static const XMLCh catalog[] = { chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull };\r
+ static const XMLCh uri[] = { chLatin_u, chLatin_r, chLatin_i, chNull };\r
+ static const XMLCh name[] = { chLatin_n, chLatin_a, chLatin_m, chLatin_e, chNull };\r
+ static const XMLCh CATALOG_NS[] = {\r
+ chLatin_u, chLatin_r, chLatin_n, chColon,\r
+ chLatin_o, chLatin_a, chLatin_s, chLatin_i, chLatin_s, chColon,\r
+ chLatin_n, chLatin_a, chLatin_m, chLatin_e, chLatin_s, chColon,\r
+ chLatin_t, chLatin_c, chColon,\r
+ chLatin_e, chLatin_n, chLatin_t, chLatin_i, chLatin_t, chLatin_y, chColon,\r
+ chLatin_x, chLatin_m, chLatin_l, chLatin_n, chLatin_s, chColon,\r
+ chLatin_x, chLatin_m, chLatin_l, chColon,\r
+ chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull\r
+ };\r
+\r
+ // Get a local parser to use. When it pops, the document will go with it.\r
+ DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);\r
+ auto_ptr<DOMBuilder> parser(static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0));\r
+ parser->setFeature(XMLUni::fgDOMNamespaces,true);\r
+\r
+ if (log.isDebugEnabled()) {\r
+ auto_ptr_char temp(pathname);\r
+ log.debug("loading XML catalog from %s", temp.get());\r
+ }\r
+\r
+ LocalFileInputSource fsrc(NULL,pathname);\r
+ Wrapper4InputSource domsrc(&fsrc,false);\r
+ try {\r
+ DOMDocument* doc=parser->parse(domsrc);\r
+ \r
+ // Check root element.\r
+ const DOMElement* root=doc->getDocumentElement();\r
+ if (!XMLHelper::isElementNamed(root,CATALOG_NS,catalog)) {\r
+ auto_ptr_char temp(pathname);\r
+ log.error("unknown root element, failed to load XML catalog from %s", temp.get());\r
+ return false;\r
+ }\r
+ \r
+ // Fetch all the <uri> elements.\r
+ DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,uri);\r
+ XMLPlatformUtils::lockMutex(m_lock);\r
+ for (XMLSize_t i=0; i<mappings->getLength(); i++) {\r
+ root=static_cast<DOMElement*>(mappings->item(i));\r
+ const XMLCh* from=root->getAttributeNS(NULL,name);\r
+ const XMLCh* to=root->getAttributeNS(NULL,uri);\r
+#ifdef HAVE_GOOD_STL\r
+ m_schemaLocMap[from]=to;\r
+#else\r
+ auto_ptr_char f(from);\r
+ auto_ptr_char t(to);\r
+ m_schemaLocMap[f.get()]=t.get();\r
+#endif\r
+ }\r
+ m_schemaLocations.erase();\r
+#ifdef HAVE_GOOD_STL\r
+ for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));\r
+#else\r
+ for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));\r
+#endif\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ }\r
+ catch (DOMException& e) {\r
+ auto_ptr_char p(pathname);\r
+ auto_ptr_char m(e.getMessage());\r
+ log.error("catalog loader caught DOMException (%s) from file (%s)", m.get(), p.get());\r
+ return false;\r
+ }\r
+ catch (SAXException& e) {\r
+ auto_ptr_char p(pathname);\r
+ auto_ptr_char m(e.getMessage());\r
+ log.error("catalog loader caught SAXException (%s) from file (%s)", m.get(), p.get());\r
+ return false;\r
+ }\r
+ catch (XMLException& e) {\r
+ auto_ptr_char p(pathname);\r
+ auto_ptr_char m(e.getMessage());\r
+ log.error("catalog loader caught XMLException (%s) from file (%s)", m.get(), p.get());\r
+ return false;\r
+ }\r
+\r
+ return true;\r
+}\r
+\r
+DOMInputSource* ParserPool::resolveEntity(const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI)\r
+{\r
+#if _DEBUG\r
+ xmltooling::NDC ndc("resolveEntity");\r
+#endif\r
+ if (!systemId)\r
+ return NULL;\r
+\r
+ Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");\r
+ if (log.isDebugEnabled()) {\r
+ auto_ptr_char sysId(systemId);\r
+ auto_ptr_char base(baseURI);\r
+ log.debug("asked to resolve %s with baseURI %s",sysId.get(),base.get() ? base.get() : "(null)");\r
+ }\r
+\r
+ // Find well-known schemas in the specified location.\r
+#ifdef HAVE_GOOD_STL\r
+ map<xstring,xstring>::const_iterator i=m_schemaLocMap.find(systemId);\r
+ if (i!=m_schemaLocMap.end())\r
+ return new Wrapper4InputSource(new LocalFileInputSource(NULL,i->second.c_str()));\r
+#else\r
+ auto_ptr_char temp(systemId);\r
+ map<string,string>::const_iterator i=m_schemaLocMap.find(temp.get());\r
+ auto_ptr_XMLCh temp2(i->second.c_str());\r
+ if (i!=m_schemaLocMap.end())\r
+ return new Wrapper4InputSource(new LocalFileInputSource(NULL,temp2.get()));\r
+#endif \r
+\r
+ // Shortcircuit the request.\r
+ log.warn("unauthorized entity request, blocking it");\r
+ static const XMLByte nullbuf[] = {0};\r
+ return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId));\r
+}\r
+\r
+bool ParserPool::handleError(const DOMError& e)\r
+{\r
+#ifdef _DEBUG\r
+ xmltooling::NDC ndc("handleError");\r
+#endif\r
+ Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");\r
+ DOMLocator* locator=e.getLocation();\r
+ auto_ptr_char temp(e.getMessage());\r
+\r
+ switch (e.getSeverity()) {\r
+ case DOMError::DOM_SEVERITY_WARNING:\r
+ log.warnStream() << "warning on line " << locator->getLineNumber()\r
+ << ", column " << locator->getColumnNumber()\r
+ << ", message: " << temp.get() << CategoryStream::ENDLINE;\r
+ return true;\r
+\r
+ case DOMError::DOM_SEVERITY_ERROR:\r
+ log.errorStream() << "error on line " << locator->getLineNumber()\r
+ << ", column " << locator->getColumnNumber()\r
+ << ", message: " << temp.get() << CategoryStream::ENDLINE;\r
+ throw XMLParserException(string("error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));\r
+\r
+ case DOMError::DOM_SEVERITY_FATAL_ERROR:\r
+ log.critStream() << "fatal error on line " << locator->getLineNumber()\r
+ << ", column " << locator->getColumnNumber()\r
+ << ", message: " << temp.get() << CategoryStream::ENDLINE;\r
+ throw XMLParserException(string("fatal error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));\r
+ }\r
+ throw XMLParserException(string("unclassified error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));\r
+}\r
+\r
+DOMBuilder* ParserPool::createBuilder()\r
+{\r
+ static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };\r
+ DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);\r
+ DOMBuilder* parser=static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0);\r
+ if (m_namespaceAware)\r
+ parser->setFeature(XMLUni::fgDOMNamespaces,true);\r
+ if (m_schemaAware) {\r
+ parser->setFeature(XMLUni::fgXercesSchema,true);\r
+ parser->setFeature(XMLUni::fgDOMValidation,true);\r
+ parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse,true);\r
+ parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal,true);\r
+ \r
+ // We build a "fake" schema location hint that binds each namespace to itself.\r
+ // This ensures the entity resolver will be given the namespace as a systemId it can check. \r
+#ifdef HAVE_GOOD_STL\r
+ parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));\r
+#else\r
+ auto_ptr_XMLCh temp(m_schemaLocations.c_str());\r
+ parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp.get()));\r
+#endif\r
+ }\r
+ parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);\r
+ parser->setEntityResolver(this);\r
+ parser->setErrorHandler(this);\r
+ return parser;\r
+}\r
+\r
+DOMBuilder* ParserPool::checkoutBuilder()\r
+{\r
+ XMLPlatformUtils::lockMutex(m_lock);\r
+ try {\r
+ if (m_pool.empty()) {\r
+ DOMBuilder* builder=createBuilder();\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ return builder;\r
+ }\r
+ DOMBuilder* p=m_pool.top();\r
+ m_pool.pop();\r
+ if (m_schemaAware) {\r
+#ifdef HAVE_GOOD_STL\r
+ p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));\r
+#else\r
+ auto_ptr_XMLCh temp2(m_schemaLocations.c_str());\r
+ p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp2.get()));\r
+#endif\r
+ }\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ return p;\r
+ }\r
+ catch(...) {\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ throw;\r
+ }\r
+}\r
+\r
+void ParserPool::checkinBuilder(DOMBuilder* builder)\r
+{\r
+ if (builder) {\r
+ XMLPlatformUtils::lockMutex(m_lock);\r
+ m_pool.push(builder);\r
+ XMLPlatformUtils::unlockMutex(m_lock);\r
+ }\r
+}\r
+\r
+unsigned int StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const unsigned int maxToRead)\r
+{\r
+ XMLByte* target=toFill;\r
+ unsigned int bytes_read=0,request=maxToRead;\r
+\r
+ // Fulfill the rest by reading from the stream.\r
+ if (request && !m_is.eof()) {\r
+ try {\r
+ m_is.read(reinterpret_cast<char* const>(target),request);\r
+ m_pos+=m_is.gcount();\r
+ bytes_read+=m_is.gcount();\r
+ }\r
+ catch(...) {\r
+ Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream() <<\r
+ "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception" << CategoryStream::ENDLINE;\r
+ *toFill=0;\r
+ return 0;\r
+ }\r
+ }\r
+ return bytes_read;\r
+}\r