2 * Copyright 2001-2008 Internet2
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 * A thread-safe pool of parsers that share characteristics.
24 #include "exceptions.h"
26 #include "util/CurlURLInputStream.h"
28 #include "util/ParserPool.h"
29 #include "util/XMLHelper.h"
33 #include <sys/types.h>
35 #include <xercesc/util/PlatformUtils.hpp>
36 #include <xercesc/util/XMLUniDefs.hpp>
37 #include <xercesc/sax/SAXException.hpp>
38 #include <xercesc/framework/MemBufInputSource.hpp>
39 #include <xercesc/framework/LocalFileInputSource.hpp>
40 #include <xercesc/framework/Wrapper4InputSource.hpp>
42 using namespace xmltooling::logging;
43 using namespace xmltooling;
44 using namespace xercesc;
49 class MyErrorHandler : public DOMErrorHandler {
53 MyErrorHandler() : errors(0) {}
55 bool handleError(const DOMError& e)
58 xmltooling::NDC ndc("handleError");
60 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
62 DOMLocator* locator=e.getLocation();
63 auto_ptr_char temp(e.getMessage());
65 switch (e.getSeverity()) {
66 case DOMError::DOM_SEVERITY_WARNING:
67 log.warnStream() << "warning on line " << locator->getLineNumber()
68 << ", column " << locator->getColumnNumber()
69 << ", message: " << temp.get() << logging::eol;
72 case DOMError::DOM_SEVERITY_ERROR:
74 log.errorStream() << "error on line " << locator->getLineNumber()
75 << ", column " << locator->getColumnNumber()
76 << ", message: " << temp.get() << logging::eol;
79 case DOMError::DOM_SEVERITY_FATAL_ERROR:
81 log.errorStream() << "fatal error on line " << locator->getLineNumber()
82 << ", column " << locator->getColumnNumber()
83 << ", message: " << temp.get() << logging::eol;
88 log.errorStream() << "undefined error type on line " << locator->getLineNumber()
89 << ", column " << locator->getColumnNumber()
90 << ", message: " << temp.get() << logging::eol;
97 ParserPool::ParserPool(bool namespaceAware, bool schemaAware)
98 : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(Mutex::create()), m_security(new SecurityManager()) {}
100 ParserPool::~ParserPool()
102 while(!m_pool.empty()) {
103 m_pool.top()->release();
110 DOMDocument* ParserPool::newDocument()
112 return DOMImplementationRegistry::getDOMImplementation(NULL)->createDocument();
115 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
117 DOMDocument* ParserPool::parse(DOMLSInput& domsrc)
119 DOMLSParser* parser=checkoutBuilder();
120 XercesJanitor<DOMLSParser> janitor(parser);
123 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, dynamic_cast<DOMErrorHandler*>(&deh));
124 DOMDocument* doc=parser->parse(&domsrc);
128 throw XMLParserException("XML error(s) during parsing, check log for specifics");
130 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, (void*)NULL);
131 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
132 checkinBuilder(janitor.release());
135 catch (XMLException& ex) {
136 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, (void*)NULL);
137 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
138 checkinBuilder(janitor.release());
139 auto_ptr_char temp(ex.getMessage());
140 throw XMLParserException(string("Xerces error during parsing: ") + (temp.get() ? temp.get() : "no message"));
142 catch (XMLToolingException&) {
143 parser->getDomConfig()->setParameter(XMLUni::fgDOMErrorHandler, (void*)NULL);
144 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
145 checkinBuilder(janitor.release());
152 DOMDocument* ParserPool::parse(DOMInputSource& domsrc)
154 DOMBuilder* parser=checkoutBuilder();
155 XercesJanitor<DOMBuilder> janitor(parser);
158 parser->setErrorHandler(&deh);
159 DOMDocument* doc=parser->parse(domsrc);
163 throw XMLParserException("XML error(s) during parsing, check log for specifics");
165 parser->setErrorHandler(NULL);
166 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
167 checkinBuilder(janitor.release());
170 catch (XMLException& ex) {
171 parser->setErrorHandler(NULL);
172 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
173 checkinBuilder(janitor.release());
174 auto_ptr_char temp(ex.getMessage());
175 throw XMLParserException(string("Xerces error during parsing: ") + (temp.get() ? temp.get() : "no message"));
177 catch (XMLToolingException&) {
178 parser->setErrorHandler(NULL);
179 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
180 checkinBuilder(janitor.release());
187 DOMDocument* ParserPool::parse(istream& is)
189 StreamInputSource src(is);
190 Wrapper4InputSource domsrc(&src,false);
191 return parse(domsrc);
194 // Functor to double its argument separated by a character and append to a buffer
195 template <class T> class doubleit
198 doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {}
199 void operator() (const pair<const T,T>& s) { temp += s.first + sep + s.first + sep; }
201 const typename T::value_type& sep;
204 bool ParserPool::loadSchema(const XMLCh* nsURI, const XMLCh* pathname)
206 // Just check the pathname and then directly register the pair into the map.
208 auto_ptr_char p(pathname);
210 struct _stat stat_buf;
211 if (_stat(p.get(), &stat_buf) != 0)
213 struct stat stat_buf;
214 if (stat(p.get(), &stat_buf) != 0)
218 xmltooling::NDC ndc("loadSchema");
220 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
221 auto_ptr_char n(nsURI);
222 log.error("failed to load schema for (%s), file not found (%s)",n.get(),p.get());
228 m_schemaLocMap[nsURI]=pathname;
229 m_schemaLocations.erase();
230 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
232 auto_ptr_char n(nsURI);
233 m_schemaLocMap[n.get()]=p.get();
234 m_schemaLocations.erase();
235 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));
241 bool ParserPool::loadCatalog(const XMLCh* pathname)
244 xmltooling::NDC ndc("loadCatalog");
246 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
249 static const XMLCh catalog[] = UNICODE_LITERAL_7(c,a,t,a,l,o,g);
250 static const XMLCh system[] = UNICODE_LITERAL_6(s,y,s,t,e,m);
251 static const XMLCh systemId[] = UNICODE_LITERAL_8(s,y,s,t,e,m,I,d);
252 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
253 static const XMLCh CATALOG_NS[] = {
254 chLatin_u, chLatin_r, chLatin_n, chColon,
255 chLatin_o, chLatin_a, chLatin_s, chLatin_i, chLatin_s, chColon,
256 chLatin_n, chLatin_a, chLatin_m, chLatin_e, chLatin_s, chColon,
257 chLatin_t, chLatin_c, chColon,
258 chLatin_e, chLatin_n, chLatin_t, chLatin_i, chLatin_t, chLatin_y, chColon,
259 chLatin_x, chLatin_m, chLatin_l, chLatin_n, chLatin_s, chColon,
260 chLatin_x, chLatin_m, chLatin_l, chColon,
261 chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull
264 // Parse the catalog with the internal parser pool.
266 if (log.isDebugEnabled()) {
267 auto_ptr_char temp(pathname);
268 log.debug("loading XML catalog from %s", temp.get());
271 LocalFileInputSource fsrc(NULL,pathname);
272 Wrapper4InputSource domsrc(&fsrc,false);
274 DOMDocument* doc=XMLToolingConfig::getConfig().getParser().parse(domsrc);
275 XercesJanitor<DOMDocument> janitor(doc);
277 // Check root element.
278 const DOMElement* root=doc->getDocumentElement();
279 if (!XMLHelper::isNodeNamed(root,CATALOG_NS,catalog)) {
280 auto_ptr_char temp(pathname);
281 log.error("unknown root element, failed to load XML catalog from %s", temp.get());
285 // Fetch all the <system> elements.
286 DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,system);
288 for (XMLSize_t i=0; i<mappings->getLength(); i++) {
289 root=static_cast<DOMElement*>(mappings->item(i));
290 const XMLCh* from=root->getAttributeNS(NULL,systemId);
291 const XMLCh* to=root->getAttributeNS(NULL,uri);
293 m_schemaLocMap[from]=to;
295 auto_ptr_char f(from);
297 m_schemaLocMap[f.get()]=t.get();
300 m_schemaLocations.erase();
302 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
304 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));
307 catch (exception& e) {
308 log.error("catalog loader caught exception: %s", e.what());
315 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
316 DOMLSInput* ParserPool::resolveResource(
317 const XMLCh *const resourceType,
318 const XMLCh *const namespaceUri,
319 const XMLCh *const publicId,
320 const XMLCh *const systemId,
321 const XMLCh *const baseURI
324 DOMInputSource* ParserPool::resolveEntity(
325 const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI
330 xmltooling::NDC ndc("resolveEntity");
335 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
336 if (log.isDebugEnabled()) {
337 auto_ptr_char sysId(systemId);
338 auto_ptr_char base(baseURI);
339 log.debug("asked to resolve %s with baseURI %s",sysId.get(),base.get() ? base.get() : "(null)");
343 // Find well-known schemas in the specified location.
344 map<xstring,xstring>::const_iterator i=m_schemaLocMap.find(systemId);
345 if (i!=m_schemaLocMap.end())
346 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
348 // Check for entity as a value in the map.
349 for (i=m_schemaLocMap.begin(); i!=m_schemaLocMap.end(); ++i) {
350 if (XMLString::endsWith(i->second.c_str(), systemId))
351 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
354 // We'll allow anything without embedded slashes.
355 if (XMLString::indexOf(systemId, chForwardSlash)==-1)
356 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,systemId));
358 // Find well-known schemas in the specified location.
359 auto_ptr_char temp(systemId);
360 map<string,string>::const_iterator i=m_schemaLocMap.find(temp.get());
361 if (i!=m_schemaLocMap.end()) {
362 auto_ptr_XMLCh temp2(i->second.c_str());
363 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,temp2.get()));
366 // Check for entity as a value in the map.
367 for (i=m_schemaLocMap.begin(); i!=m_schemaLocMap.end(); ++i) {
368 auto_ptr_XMLCh temp2(i->second.c_str());
369 if (XMLString::endsWith(temp2.get(), systemId))
370 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,temp2.get()));
373 // We'll allow anything without embedded slashes.
374 if (XMLString::indexOf(systemId, chForwardSlash)==-1)
375 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,systemId));
378 // Shortcircuit the request.
380 auto_ptr_char temp(systemId);
382 log.debug("unauthorized entity request (%s), blocking it", temp.get());
383 static const XMLByte nullbuf[] = {0};
384 return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId));
387 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
389 DOMLSParser* ParserPool::createBuilder()
391 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
392 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
393 DOMLSParser* parser=static_cast<DOMImplementationLS*>(impl)->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS,NULL);
394 parser->getDomConfig()->setParameter(XMLUni::fgDOMNamespaces, m_namespaceAware);
396 parser->getDomConfig()->setParameter(XMLUni::fgDOMNamespaces, true);
397 parser->getDomConfig()->setParameter(XMLUni::fgXercesSchema, true);
398 parser->getDomConfig()->setParameter(XMLUni::fgDOMValidate, true);
399 parser->getDomConfig()->setParameter(XMLUni::fgXercesCacheGrammarFromParse, true);
401 // We build a "fake" schema location hint that binds each namespace to itself.
402 // This ensures the entity resolver will be given the namespace as a systemId it can check.
404 parser->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast<XMLCh*>(m_schemaLocations.c_str()));
406 auto_ptr_XMLCh temp(m_schemaLocations.c_str());
407 parser->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast<XMLCh*>(temp.get()));
410 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
411 parser->getDomConfig()->setParameter(XMLUni::fgXercesDisableDefaultEntityResolution, true);
412 parser->getDomConfig()->setParameter(XMLUni::fgDOMResourceResolver, dynamic_cast<DOMLSResourceResolver*>(this));
413 parser->getDomConfig()->setParameter(XMLUni::fgXercesSecurityManager, m_security);
417 DOMLSParser* ParserPool::checkoutBuilder()
420 if (m_pool.empty()) {
421 DOMLSParser* builder=createBuilder();
424 DOMLSParser* p=m_pool.top();
428 p->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast<XMLCh*>(m_schemaLocations.c_str()));
430 auto_ptr_XMLCh temp2(m_schemaLocations.c_str());
431 p->getDomConfig()->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation, const_cast<XMLCh*>(temp2.get()));
437 void ParserPool::checkinBuilder(DOMLSParser* builder)
441 m_pool.push(builder);
447 DOMBuilder* ParserPool::createBuilder()
449 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
450 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
451 DOMBuilder* parser=static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0);
452 parser->setFeature(XMLUni::fgDOMNamespaces, m_namespaceAware);
454 parser->setFeature(XMLUni::fgDOMNamespaces, true);
455 parser->setFeature(XMLUni::fgXercesSchema, true);
456 parser->setFeature(XMLUni::fgDOMValidation, true);
457 parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse, true);
459 // We build a "fake" schema location hint that binds each namespace to itself.
460 // This ensures the entity resolver will be given the namespace as a systemId it can check.
462 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
464 auto_ptr_XMLCh temp(m_schemaLocations.c_str());
465 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp.get()));
468 parser->setProperty(XMLUni::fgXercesSecurityManager, m_security);
469 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
470 parser->setFeature(XMLUni::fgXercesDisableDefaultEntityResolution, true);
471 parser->setEntityResolver(this);
475 DOMBuilder* ParserPool::checkoutBuilder()
478 if (m_pool.empty()) {
479 DOMBuilder* builder=createBuilder();
482 DOMBuilder* p=m_pool.top();
486 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
488 auto_ptr_XMLCh temp2(m_schemaLocations.c_str());
489 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp2.get()));
495 void ParserPool::checkinBuilder(DOMBuilder* builder)
499 m_pool.push(builder);
505 xsecsize_t StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
507 XMLByte* target=toFill;
508 xsecsize_t bytes_read=0,request=maxToRead;
510 // Fulfill the rest by reading from the stream.
511 if (request && !m_is.eof() && !m_is.fail()) {
513 m_is.read(reinterpret_cast<char* const>(target),request);
514 m_pos+=m_is.gcount();
515 bytes_read+=m_is.gcount();
517 catch(ios_base::failure& e) {
518 Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream()
519 << "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception: " << e.what()
528 #ifdef XMLTOOLING_LITE
530 URLInputSource::URLInputSource(const XMLCh* url, const char* systemId) : InputSource(systemId), m_url(url)
534 URLInputSource::URLInputSource(const DOMElement* e, const char* systemId) : InputSource(systemId)
536 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
537 static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l);
539 const XMLCh* attr = e->getAttributeNS(NULL, url);
540 if (!attr || !*attr) {
541 attr = e->getAttributeNS(NULL, uri);
543 throw IOException("No URL supplied via DOM to URLInputSource constructor.");
549 BinInputStream* URLInputSource::makeStream() const
551 // Ask the URL to create us an appropriate input stream
552 return m_url.makeNewStream();
557 URLInputSource::URLInputSource(const XMLCh* url, const char* systemId)
558 : InputSource(systemId), m_url(url), m_root(NULL)
562 URLInputSource::URLInputSource(const DOMElement* e, const char* systemId)
563 : InputSource(systemId), m_root(e)
567 BinInputStream* URLInputSource::makeStream() const
569 return m_root ? new CurlURLInputStream(m_root) : new CurlURLInputStream(m_url.get());