2 * Licensed to the University Corporation for Advanced Internet
3 * Development, Inc. (UCAID) under one or more contributor license
4 * agreements. See the NOTICE file distributed with this work for
5 * additional information regarding copyright ownership.
7 * UCAID licenses this file to you under the Apache License,
8 * Version 2.0 (the "License"); you may not use this file except
9 * in compliance with the License. You may obtain a copy of the
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
17 * either express or implied. See the License for the specific
18 * language governing permissions and limitations under the License.
24 * A thread-safe pool of parsers that share characteristics.
28 #include "exceptions.h"
30 #include "util/CurlURLInputStream.h"
32 #include "util/ParserPool.h"
33 #include "util/Threads.h"
34 #include "util/XMLHelper.h"
38 #include <sys/types.h>
40 #include <xercesc/util/PlatformUtils.hpp>
41 #include <xercesc/util/XMLUniDefs.hpp>
42 #include <xercesc/sax/SAXException.hpp>
43 #include <xercesc/framework/MemBufInputSource.hpp>
44 #include <xercesc/framework/LocalFileInputSource.hpp>
45 #include <xercesc/framework/Wrapper4InputSource.hpp>
47 using namespace xmltooling::logging;
48 using namespace xmltooling;
49 using namespace xercesc;
54 class MyErrorHandler : public DOMErrorHandler {
58 MyErrorHandler() : errors(0) {}
60 bool handleError(const DOMError& e)
63 xmltooling::NDC ndc("handleError");
65 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
67 DOMLocator* locator=e.getLocation();
68 auto_ptr_char temp(e.getMessage());
70 switch (e.getSeverity()) {
71 case DOMError::DOM_SEVERITY_WARNING:
72 log.warnStream() << "warning on line " << locator->getLineNumber()
73 << ", column " << locator->getColumnNumber()
74 << ", message: " << temp.get() << logging::eol;
77 case DOMError::DOM_SEVERITY_ERROR:
79 log.errorStream() << "error on line " << locator->getLineNumber()
80 << ", column " << locator->getColumnNumber()
81 << ", message: " << temp.get() << logging::eol;
84 case DOMError::DOM_SEVERITY_FATAL_ERROR:
86 log.errorStream() << "fatal error on line " << locator->getLineNumber()
87 << ", column " << locator->getColumnNumber()
88 << ", message: " << temp.get() << logging::eol;
93 log.errorStream() << "undefined error type on line " << locator->getLineNumber()
94 << ", column " << locator->getColumnNumber()
95 << ", message: " << temp.get() << logging::eol;
102 ParserPool::ParserPool(bool namespaceAware, bool schemaAware)
103 : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(Mutex::create()), m_security(new SecurityManager()) {}
105 ParserPool::~ParserPool()
107 while(!m_pool.empty()) {
108 m_pool.top()->release();
115 DOMDocument* ParserPool::newDocument()
117 return DOMImplementationRegistry::getDOMImplementation(nullptr)->createDocument();
120 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
122 DOMDocument* ParserPool::parse(DOMLSInput& domsrc)
124 DOMLSParser* parser=checkoutBuilder();
125 XercesJanitor<DOMLSParser> janitor(parser);
128 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, dynamic_cast<DOMErrorHandler*>(&deh));
129 DOMDocument* doc=parser->parse(&domsrc);
133 throw XMLParserException("XML error(s) during parsing, check log for specifics");
135 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, (void*)nullptr);
136 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
137 checkinBuilder(janitor.release());
140 catch (XMLException& ex) {
141 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, (void*)nullptr);
142 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
143 checkinBuilder(janitor.release());
144 auto_ptr_char temp(ex.getMessage());
145 throw XMLParserException(string("Xerces error during parsing: ") + (temp.get() ? temp.get() : "no message"));
147 catch (XMLToolingException&) {
148 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, (void*)nullptr);
149 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
150 checkinBuilder(janitor.release());
157 DOMDocument* ParserPool::parse(DOMInputSource& domsrc)
159 DOMBuilder* parser=checkoutBuilder();
160 XercesJanitor<DOMBuilder> janitor(parser);
163 parser->setErrorHandler(&deh);
164 DOMDocument* doc=parser->parse(domsrc);
168 throw XMLParserException("XML error(s) during parsing, check log for specifics");
170 parser->setErrorHandler(nullptr);
171 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
172 checkinBuilder(janitor.release());
175 catch (XMLException& ex) {
176 parser->setErrorHandler(nullptr);
177 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
178 checkinBuilder(janitor.release());
179 auto_ptr_char temp(ex.getMessage());
180 throw XMLParserException(string("Xerces error during parsing: ") + (temp.get() ? temp.get() : "no message"));
182 catch (XMLToolingException&) {
183 parser->setErrorHandler(nullptr);
184 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
185 checkinBuilder(janitor.release());
192 DOMDocument* ParserPool::parse(istream& is)
194 StreamInputSource src(is);
195 Wrapper4InputSource domsrc(&src,false);
196 return parse(domsrc);
199 // Functor to double its argument separated by a character and append to a buffer
200 template <class T> class doubleit
203 doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {}
204 void operator() (const pair<const T,T>& s) { temp += s.first + sep + s.first + sep; }
206 const typename T::value_type& sep;
209 bool ParserPool::loadSchema(const XMLCh* nsURI, const XMLCh* pathname)
211 // Just check the pathname and then directly register the pair into the map.
213 auto_ptr_char p(pathname);
215 struct _stat stat_buf;
216 if (_stat(p.get(), &stat_buf) != 0)
218 struct stat stat_buf;
219 if (stat(p.get(), &stat_buf) != 0)
223 xmltooling::NDC ndc("loadSchema");
225 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
226 auto_ptr_char n(nsURI);
227 log.error("failed to load schema for (%s), file not found (%s)",n.get(),p.get());
232 m_schemaLocMap[nsURI]=pathname;
233 m_schemaLocations.erase();
234 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
239 bool ParserPool::loadCatalog(const char* pathname)
241 auto_ptr_XMLCh temp(pathname);
242 return loadCatalog(temp.get());
245 bool ParserPool::loadCatalog(const XMLCh* pathname)
248 xmltooling::NDC ndc("loadCatalog");
250 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
253 static const XMLCh catalog[] = UNICODE_LITERAL_7(c,a,t,a,l,o,g);
254 static const XMLCh system[] = UNICODE_LITERAL_6(s,y,s,t,e,m);
255 static const XMLCh systemId[] = UNICODE_LITERAL_8(s,y,s,t,e,m,I,d);
256 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
257 static const XMLCh CATALOG_NS[] = {
258 chLatin_u, chLatin_r, chLatin_n, chColon,
259 chLatin_o, chLatin_a, chLatin_s, chLatin_i, chLatin_s, chColon,
260 chLatin_n, chLatin_a, chLatin_m, chLatin_e, chLatin_s, chColon,
261 chLatin_t, chLatin_c, chColon,
262 chLatin_e, chLatin_n, chLatin_t, chLatin_i, chLatin_t, chLatin_y, chColon,
263 chLatin_x, chLatin_m, chLatin_l, chLatin_n, chLatin_s, chColon,
264 chLatin_x, chLatin_m, chLatin_l, chColon,
265 chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull
268 // Parse the catalog with the internal parser pool.
270 if (log.isDebugEnabled()) {
271 auto_ptr_char temp(pathname);
272 log.debug("loading XML catalog from %s", temp.get());
275 LocalFileInputSource fsrc(nullptr,pathname);
276 Wrapper4InputSource domsrc(&fsrc,false);
278 DOMDocument* doc=XMLToolingConfig::getConfig().getParser().parse(domsrc);
279 XercesJanitor<DOMDocument> janitor(doc);
281 // Check root element.
282 const DOMElement* root=doc->getDocumentElement();
283 if (!XMLHelper::isNodeNamed(root,CATALOG_NS,catalog)) {
284 auto_ptr_char temp(pathname);
285 log.error("unknown root element, failed to load XML catalog from %s", temp.get());
289 // Fetch all the <system> elements.
290 DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,system);
292 for (XMLSize_t i=0; i<mappings->getLength(); i++) {
293 root=static_cast<DOMElement*>(mappings->item(i));
294 const XMLCh* from=root->getAttributeNS(nullptr,systemId);
295 const XMLCh* to=root->getAttributeNS(nullptr,uri);
296 m_schemaLocMap[from]=to;
298 m_schemaLocations.erase();
299 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
301 catch (exception& e) {
302 log.error("catalog loader caught exception: %s", e.what());
309 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
310 DOMLSInput* ParserPool::resolveResource(
311 const XMLCh *const resourceType,
312 const XMLCh *const namespaceUri,
313 const XMLCh *const publicId,
314 const XMLCh *const systemId,
315 const XMLCh *const baseURI
318 DOMInputSource* ParserPool::resolveEntity(
319 const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI
324 xmltooling::NDC ndc("resolveEntity");
329 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
330 if (log.isDebugEnabled()) {
331 auto_ptr_char sysId(systemId);
332 auto_ptr_char base(baseURI);
333 log.debug("asked to resolve %s with baseURI %s",sysId.get(),base.get() ? base.get() : "(null)");
336 // Find well-known schemas in the specified location.
337 map<xstring,xstring>::const_iterator i=m_schemaLocMap.find(systemId);
338 if (i!=m_schemaLocMap.end())
339 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
341 // Check for entity as a value in the map.
342 for (i=m_schemaLocMap.begin(); i!=m_schemaLocMap.end(); ++i) {
343 if (XMLString::endsWith(i->second.c_str(), systemId))
344 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
347 // We'll allow anything without embedded slashes.
348 if (XMLString::indexOf(systemId, chForwardSlash)==-1)
349 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,systemId));
351 // Shortcircuit the request.
352 auto_ptr_char temp(systemId);
353 log.debug("unauthorized entity request (%s), blocking it", temp.get());
354 static const XMLByte nullbuf[] = {0};
355 return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId));
358 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
360 DOMLSParser* ParserPool::createBuilder()
362 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
363 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
364 DOMLSParser* parser=static_cast<DOMImplementationLS*>(impl)->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS,nullptr);
365 parser->getDomConfig()->setParameter(XMLUni::fgDOMNamespaces, m_namespaceAware);
367 parser->getDomConfig()->setParameter(XMLUni::fgDOMNamespaces, true);
368 parser->getDomConfig()->setParameter(XMLUni::fgXercesSchema, true);
369 parser->getDomConfig()->setParameter(XMLUni::fgDOMValidate, true);
370 parser->getDomConfig()->setParameter(XMLUni::fgXercesCacheGrammarFromParse, true);
372 // We build a "fake" schema location hint that binds each namespace to itself.
373 // This ensures the entity resolver will be given the namespace as a systemId it can check.
374 parser->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast<XMLCh*>(m_schemaLocations.c_str()));
376 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
377 parser->getDomConfig()->setParameter(XMLUni::fgXercesDisableDefaultEntityResolution, true);
378 parser->getDomConfig()->setParameter(XMLUni::fgDOMResourceResolver, dynamic_cast<DOMLSResourceResolver*>(this));
379 parser->getDomConfig()->setParameter(XMLUni::fgXercesSecurityManager, m_security);
383 DOMLSParser* ParserPool::checkoutBuilder()
386 if (m_pool.empty()) {
387 DOMLSParser* builder=createBuilder();
390 DOMLSParser* p=m_pool.top();
393 p->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast<XMLCh*>(m_schemaLocations.c_str()));
397 void ParserPool::checkinBuilder(DOMLSParser* builder)
401 m_pool.push(builder);
407 DOMBuilder* ParserPool::createBuilder()
409 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
410 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
411 DOMBuilder* parser=static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0);
412 parser->setFeature(XMLUni::fgDOMNamespaces, m_namespaceAware);
414 parser->setFeature(XMLUni::fgDOMNamespaces, true);
415 parser->setFeature(XMLUni::fgXercesSchema, true);
416 parser->setFeature(XMLUni::fgDOMValidation, true);
417 parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse, true);
419 // We build a "fake" schema location hint that binds each namespace to itself.
420 // This ensures the entity resolver will be given the namespace as a systemId it can check.
421 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
423 parser->setProperty(XMLUni::fgXercesSecurityManager, m_security);
424 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
425 parser->setFeature(XMLUni::fgXercesDisableDefaultEntityResolution, true);
426 parser->setEntityResolver(this);
430 DOMBuilder* ParserPool::checkoutBuilder()
433 if (m_pool.empty()) {
434 DOMBuilder* builder=createBuilder();
437 DOMBuilder* p=m_pool.top();
440 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
444 void ParserPool::checkinBuilder(DOMBuilder* builder)
448 m_pool.push(builder);
454 StreamInputSource::StreamInputSource(istream& is, const char* systemId) : InputSource(systemId), m_is(is)
458 BinInputStream* StreamInputSource::makeStream() const
460 return new StreamBinInputStream(m_is);
463 StreamInputSource::StreamBinInputStream::StreamBinInputStream(istream& is) : m_is(is), m_pos(0)
467 #ifdef XMLTOOLING_XERCESC_64BITSAFE
472 StreamInputSource::StreamBinInputStream::curPos() const
477 #ifdef XMLTOOLING_XERCESC_64BITSAFE
478 const XMLCh* StreamInputSource::StreamBinInputStream::getContentType() const
484 xsecsize_t StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
486 XMLByte* target=toFill;
487 xsecsize_t bytes_read=0,request=maxToRead;
489 // Fulfill the rest by reading from the stream.
490 if (request && !m_is.eof() && !m_is.fail()) {
492 m_is.read(reinterpret_cast<char* const>(target),request);
493 m_pos+=m_is.gcount();
494 bytes_read+=m_is.gcount();
496 catch(ios_base::failure& e) {
497 Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream()
498 << "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception: " << e.what()
507 #ifdef XMLTOOLING_LITE
509 URLInputSource::URLInputSource(const XMLCh* url, const char* systemId, string* cacheTag) : InputSource(systemId), m_url(url)
513 URLInputSource::URLInputSource(const DOMElement* e, const char* systemId, string* cacheTag) : InputSource(systemId)
515 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
516 static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l);
518 const XMLCh* attr = e->getAttributeNS(nullptr, url);
519 if (!attr || !*attr) {
520 attr = e->getAttributeNS(nullptr, uri);
522 throw IOException("No URL supplied via DOM to URLInputSource constructor.");
528 BinInputStream* URLInputSource::makeStream() const
530 // Ask the URL to create us an appropriate input stream
531 return m_url.makeNewStream();
536 URLInputSource::URLInputSource(const XMLCh* url, const char* systemId, string* cacheTag)
537 : InputSource(systemId), m_cacheTag(cacheTag), m_url(url), m_root(nullptr)
541 URLInputSource::URLInputSource(const DOMElement* e, const char* systemId, string* cacheTag)
542 : InputSource(systemId), m_cacheTag(cacheTag), m_root(e)
546 BinInputStream* URLInputSource::makeStream() const
548 return m_root ? new CurlURLInputStream(m_root, m_cacheTag) : new CurlURLInputStream(m_url.get(), m_cacheTag);
553 const char URLInputSource::asciiStatusCodeElementName[] = "URLInputSourceStatus";
555 const XMLCh URLInputSource::utf16StatusCodeElementName[] = UNICODE_LITERAL_20(U,R,L,I,n,p,u,t,S,o,u,r,c,e,S,t,a,t,u,s);