2 * Copyright 2001-2010 Internet2
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 * A thread-safe pool of parsers that share characteristics.
24 #include "exceptions.h"
26 #include "util/CurlURLInputStream.h"
28 #include "util/ParserPool.h"
29 #include "util/Threads.h"
30 #include "util/XMLHelper.h"
34 #include <sys/types.h>
36 #include <xercesc/util/PlatformUtils.hpp>
37 #include <xercesc/util/XMLUniDefs.hpp>
38 #include <xercesc/sax/SAXException.hpp>
39 #include <xercesc/framework/MemBufInputSource.hpp>
40 #include <xercesc/framework/LocalFileInputSource.hpp>
41 #include <xercesc/framework/Wrapper4InputSource.hpp>
43 using namespace xmltooling::logging;
44 using namespace xmltooling;
45 using namespace xercesc;
50 class MyErrorHandler : public DOMErrorHandler {
54 MyErrorHandler() : errors(0) {}
56 bool handleError(const DOMError& e)
59 xmltooling::NDC ndc("handleError");
61 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
63 DOMLocator* locator=e.getLocation();
64 auto_ptr_char temp(e.getMessage());
66 switch (e.getSeverity()) {
67 case DOMError::DOM_SEVERITY_WARNING:
68 log.warnStream() << "warning on line " << locator->getLineNumber()
69 << ", column " << locator->getColumnNumber()
70 << ", message: " << temp.get() << logging::eol;
73 case DOMError::DOM_SEVERITY_ERROR:
75 log.errorStream() << "error on line " << locator->getLineNumber()
76 << ", column " << locator->getColumnNumber()
77 << ", message: " << temp.get() << logging::eol;
80 case DOMError::DOM_SEVERITY_FATAL_ERROR:
82 log.errorStream() << "fatal error on line " << locator->getLineNumber()
83 << ", column " << locator->getColumnNumber()
84 << ", message: " << temp.get() << logging::eol;
89 log.errorStream() << "undefined error type on line " << locator->getLineNumber()
90 << ", column " << locator->getColumnNumber()
91 << ", message: " << temp.get() << logging::eol;
98 ParserPool::ParserPool(bool namespaceAware, bool schemaAware)
99 : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(Mutex::create()), m_security(new SecurityManager()) {}
101 ParserPool::~ParserPool()
103 while(!m_pool.empty()) {
104 m_pool.top()->release();
111 DOMDocument* ParserPool::newDocument()
113 return DOMImplementationRegistry::getDOMImplementation(nullptr)->createDocument();
116 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
118 DOMDocument* ParserPool::parse(DOMLSInput& domsrc)
120 DOMLSParser* parser=checkoutBuilder();
121 XercesJanitor<DOMLSParser> janitor(parser);
124 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, dynamic_cast<DOMErrorHandler*>(&deh));
125 DOMDocument* doc=parser->parse(&domsrc);
129 throw XMLParserException("XML error(s) during parsing, check log for specifics");
131 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, nullptr);
132 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
133 checkinBuilder(janitor.release());
136 catch (XMLException& ex) {
137 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, nullptr);
138 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
139 checkinBuilder(janitor.release());
140 auto_ptr_char temp(ex.getMessage());
141 throw XMLParserException(string("Xerces error during parsing: ") + (temp.get() ? temp.get() : "no message"));
143 catch (XMLToolingException&) {
144 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, nullptr);
145 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
146 checkinBuilder(janitor.release());
153 DOMDocument* ParserPool::parse(DOMInputSource& domsrc)
155 DOMBuilder* parser=checkoutBuilder();
156 XercesJanitor<DOMBuilder> janitor(parser);
159 parser->setErrorHandler(&deh);
160 DOMDocument* doc=parser->parse(domsrc);
164 throw XMLParserException("XML error(s) during parsing, check log for specifics");
166 parser->setErrorHandler(nullptr);
167 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
168 checkinBuilder(janitor.release());
171 catch (XMLException& ex) {
172 parser->setErrorHandler(nullptr);
173 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
174 checkinBuilder(janitor.release());
175 auto_ptr_char temp(ex.getMessage());
176 throw XMLParserException(string("Xerces error during parsing: ") + (temp.get() ? temp.get() : "no message"));
178 catch (XMLToolingException&) {
179 parser->setErrorHandler(nullptr);
180 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
181 checkinBuilder(janitor.release());
188 DOMDocument* ParserPool::parse(istream& is)
190 StreamInputSource src(is);
191 Wrapper4InputSource domsrc(&src,false);
192 return parse(domsrc);
195 // Functor to double its argument separated by a character and append to a buffer
196 template <class T> class doubleit
199 doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {}
200 void operator() (const pair<const T,T>& s) { temp += s.first + sep + s.first + sep; }
202 const typename T::value_type& sep;
205 bool ParserPool::loadSchema(const XMLCh* nsURI, const XMLCh* pathname)
207 // Just check the pathname and then directly register the pair into the map.
209 auto_ptr_char p(pathname);
211 struct _stat stat_buf;
212 if (_stat(p.get(), &stat_buf) != 0)
214 struct stat stat_buf;
215 if (stat(p.get(), &stat_buf) != 0)
219 xmltooling::NDC ndc("loadSchema");
221 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
222 auto_ptr_char n(nsURI);
223 log.error("failed to load schema for (%s), file not found (%s)",n.get(),p.get());
228 m_schemaLocMap[nsURI]=pathname;
229 m_schemaLocations.erase();
230 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
235 bool ParserPool::loadCatalog(const XMLCh* pathname)
238 xmltooling::NDC ndc("loadCatalog");
240 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
243 static const XMLCh catalog[] = UNICODE_LITERAL_7(c,a,t,a,l,o,g);
244 static const XMLCh system[] = UNICODE_LITERAL_6(s,y,s,t,e,m);
245 static const XMLCh systemId[] = UNICODE_LITERAL_8(s,y,s,t,e,m,I,d);
246 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
247 static const XMLCh CATALOG_NS[] = {
248 chLatin_u, chLatin_r, chLatin_n, chColon,
249 chLatin_o, chLatin_a, chLatin_s, chLatin_i, chLatin_s, chColon,
250 chLatin_n, chLatin_a, chLatin_m, chLatin_e, chLatin_s, chColon,
251 chLatin_t, chLatin_c, chColon,
252 chLatin_e, chLatin_n, chLatin_t, chLatin_i, chLatin_t, chLatin_y, chColon,
253 chLatin_x, chLatin_m, chLatin_l, chLatin_n, chLatin_s, chColon,
254 chLatin_x, chLatin_m, chLatin_l, chColon,
255 chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull
258 // Parse the catalog with the internal parser pool.
260 if (log.isDebugEnabled()) {
261 auto_ptr_char temp(pathname);
262 log.debug("loading XML catalog from %s", temp.get());
265 LocalFileInputSource fsrc(nullptr,pathname);
266 Wrapper4InputSource domsrc(&fsrc,false);
268 DOMDocument* doc=XMLToolingConfig::getConfig().getParser().parse(domsrc);
269 XercesJanitor<DOMDocument> janitor(doc);
271 // Check root element.
272 const DOMElement* root=doc->getDocumentElement();
273 if (!XMLHelper::isNodeNamed(root,CATALOG_NS,catalog)) {
274 auto_ptr_char temp(pathname);
275 log.error("unknown root element, failed to load XML catalog from %s", temp.get());
279 // Fetch all the <system> elements.
280 DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,system);
282 for (XMLSize_t i=0; i<mappings->getLength(); i++) {
283 root=static_cast<DOMElement*>(mappings->item(i));
284 const XMLCh* from=root->getAttributeNS(nullptr,systemId);
285 const XMLCh* to=root->getAttributeNS(nullptr,uri);
286 m_schemaLocMap[from]=to;
288 m_schemaLocations.erase();
289 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
291 catch (exception& e) {
292 log.error("catalog loader caught exception: %s", e.what());
299 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
300 DOMLSInput* ParserPool::resolveResource(
301 const XMLCh *const resourceType,
302 const XMLCh *const namespaceUri,
303 const XMLCh *const publicId,
304 const XMLCh *const systemId,
305 const XMLCh *const baseURI
308 DOMInputSource* ParserPool::resolveEntity(
309 const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI
314 xmltooling::NDC ndc("resolveEntity");
319 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
320 if (log.isDebugEnabled()) {
321 auto_ptr_char sysId(systemId);
322 auto_ptr_char base(baseURI);
323 log.debug("asked to resolve %s with baseURI %s",sysId.get(),base.get() ? base.get() : "(null)");
326 // Find well-known schemas in the specified location.
327 map<xstring,xstring>::const_iterator i=m_schemaLocMap.find(systemId);
328 if (i!=m_schemaLocMap.end())
329 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
331 // Check for entity as a value in the map.
332 for (i=m_schemaLocMap.begin(); i!=m_schemaLocMap.end(); ++i) {
333 if (XMLString::endsWith(i->second.c_str(), systemId))
334 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
337 // We'll allow anything without embedded slashes.
338 if (XMLString::indexOf(systemId, chForwardSlash)==-1)
339 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,systemId));
341 // Shortcircuit the request.
342 auto_ptr_char temp(systemId);
343 log.debug("unauthorized entity request (%s), blocking it", temp.get());
344 static const XMLByte nullbuf[] = {0};
345 return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId));
348 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
350 DOMLSParser* ParserPool::createBuilder()
352 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
353 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
354 DOMLSParser* parser=static_cast<DOMImplementationLS*>(impl)->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS,nullptr);
355 parser->getDomConfig()->setParameter(XMLUni::fgDOMNamespaces, m_namespaceAware);
357 parser->getDomConfig()->setParameter(XMLUni::fgDOMNamespaces, true);
358 parser->getDomConfig()->setParameter(XMLUni::fgXercesSchema, true);
359 parser->getDomConfig()->setParameter(XMLUni::fgDOMValidate, true);
360 parser->getDomConfig()->setParameter(XMLUni::fgXercesCacheGrammarFromParse, true);
362 // We build a "fake" schema location hint that binds each namespace to itself.
363 // This ensures the entity resolver will be given the namespace as a systemId it can check.
364 parser->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast<XMLCh*>(m_schemaLocations.c_str()));
366 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
367 parser->getDomConfig()->setParameter(XMLUni::fgXercesDisableDefaultEntityResolution, true);
368 parser->getDomConfig()->setParameter(XMLUni::fgDOMResourceResolver, dynamic_cast<DOMLSResourceResolver*>(this));
369 parser->getDomConfig()->setParameter(XMLUni::fgXercesSecurityManager, m_security);
373 DOMLSParser* ParserPool::checkoutBuilder()
376 if (m_pool.empty()) {
377 DOMLSParser* builder=createBuilder();
380 DOMLSParser* p=m_pool.top();
383 p->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast<XMLCh*>(m_schemaLocations.c_str()));
387 void ParserPool::checkinBuilder(DOMLSParser* builder)
391 m_pool.push(builder);
397 DOMBuilder* ParserPool::createBuilder()
399 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
400 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
401 DOMBuilder* parser=static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0);
402 parser->setFeature(XMLUni::fgDOMNamespaces, m_namespaceAware);
404 parser->setFeature(XMLUni::fgDOMNamespaces, true);
405 parser->setFeature(XMLUni::fgXercesSchema, true);
406 parser->setFeature(XMLUni::fgDOMValidation, true);
407 parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse, true);
409 // We build a "fake" schema location hint that binds each namespace to itself.
410 // This ensures the entity resolver will be given the namespace as a systemId it can check.
411 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
413 parser->setProperty(XMLUni::fgXercesSecurityManager, m_security);
414 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
415 parser->setFeature(XMLUni::fgXercesDisableDefaultEntityResolution, true);
416 parser->setEntityResolver(this);
420 DOMBuilder* ParserPool::checkoutBuilder()
423 if (m_pool.empty()) {
424 DOMBuilder* builder=createBuilder();
427 DOMBuilder* p=m_pool.top();
430 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
434 void ParserPool::checkinBuilder(DOMBuilder* builder)
438 m_pool.push(builder);
444 StreamInputSource::StreamInputSource(istream& is, const char* systemId) : InputSource(systemId), m_is(is)
448 BinInputStream* StreamInputSource::makeStream() const
450 return new StreamBinInputStream(m_is);
453 StreamInputSource::StreamBinInputStream::StreamBinInputStream(istream& is) : m_is(is), m_pos(0)
457 #ifdef XMLTOOLING_XERCESC_64BITSAFE
462 StreamInputSource::StreamBinInputStream::curPos() const
467 #ifdef XMLTOOLING_XERCESC_64BITSAFE
468 const XMLCh* StreamInputSource::StreamBinInputStream::getContentType() const
474 xsecsize_t StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
476 XMLByte* target=toFill;
477 xsecsize_t bytes_read=0,request=maxToRead;
479 // Fulfill the rest by reading from the stream.
480 if (request && !m_is.eof() && !m_is.fail()) {
482 m_is.read(reinterpret_cast<char* const>(target),request);
483 m_pos+=m_is.gcount();
484 bytes_read+=m_is.gcount();
486 catch(ios_base::failure& e) {
487 Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream()
488 << "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception: " << e.what()
497 #ifdef XMLTOOLING_LITE
499 URLInputSource::URLInputSource(const XMLCh* url, const char* systemId, string* cacheTag) : InputSource(systemId), m_url(url)
503 URLInputSource::URLInputSource(const DOMElement* e, const char* systemId, string* cacheTag) : InputSource(systemId)
505 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
506 static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l);
508 const XMLCh* attr = e->getAttributeNS(nullptr, url);
509 if (!attr || !*attr) {
510 attr = e->getAttributeNS(nullptr, uri);
512 throw IOException("No URL supplied via DOM to URLInputSource constructor.");
518 BinInputStream* URLInputSource::makeStream() const
520 // Ask the URL to create us an appropriate input stream
521 return m_url.makeNewStream();
526 URLInputSource::URLInputSource(const XMLCh* url, const char* systemId, string* cacheTag)
527 : InputSource(systemId), m_cacheTag(cacheTag), m_url(url), m_root(nullptr)
531 URLInputSource::URLInputSource(const DOMElement* e, const char* systemId, string* cacheTag)
532 : InputSource(systemId), m_cacheTag(cacheTag), m_root(e)
536 BinInputStream* URLInputSource::makeStream() const
538 return m_root ? new CurlURLInputStream(m_root, m_cacheTag) : new CurlURLInputStream(m_url.get(), m_cacheTag);
543 const char URLInputSource::asciiStatusCodeElementName[] = "URLInputSourceStatus";
545 const XMLCh URLInputSource::utf16StatusCodeElementName[] = UNICODE_LITERAL_20(U,R,L,I,n,p,u,t,S,o,u,r,c,e,S,t,a,t,u,s);