2 * Copyright 2001-2007 Internet2
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include "exceptions.h"
27 #include "util/ParserPool.h"
28 #include "util/XMLHelper.h"
32 #include <sys/types.h>
34 #include <xercesc/util/PlatformUtils.hpp>
35 #include <xercesc/util/XMLUniDefs.hpp>
36 #include <xercesc/sax/SAXException.hpp>
37 #include <xercesc/framework/MemBufInputSource.hpp>
38 #include <xercesc/framework/LocalFileInputSource.hpp>
39 #include <xercesc/framework/Wrapper4InputSource.hpp>
41 using namespace xmltooling::logging;
42 using namespace xmltooling;
43 using namespace xercesc;
46 ParserPool::ParserPool(bool namespaceAware, bool schemaAware)
47 : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(Mutex::create()), m_security(new SecurityManager()) {}
49 ParserPool::~ParserPool()
51 while(!m_pool.empty()) {
52 m_pool.top()->release();
59 DOMDocument* ParserPool::newDocument()
61 return DOMImplementationRegistry::getDOMImplementation(NULL)->createDocument();
64 DOMDocument* ParserPool::parse(
65 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
69 DOMLSParser* parser=checkoutBuilder();
70 XercesJanitor<DOMLSParser> janitor(parser);
72 DOMDocument* doc=parser->parse(&domsrc);
73 parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument,true);
75 DOMInputSource& domsrc
78 DOMBuilder* parser=checkoutBuilder();
79 XercesJanitor<DOMBuilder> janitor(parser);
81 DOMDocument* doc=parser->parse(domsrc);
82 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);
84 checkinBuilder(janitor.release());
87 catch (XMLException&) {
88 checkinBuilder(janitor.release());
91 catch (XMLToolingException&) {
92 checkinBuilder(janitor.release());
97 DOMDocument* ParserPool::parse(istream& is)
99 StreamInputSource src(is);
100 Wrapper4InputSource domsrc(&src,false);
101 return parse(domsrc);
104 // Functor to double its argument separated by a character and append to a buffer
105 template <class T> class doubleit
108 doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {}
109 void operator() (const pair<const T,T>& s) { temp += s.first + sep + s.first + sep; }
111 const typename T::value_type& sep;
114 bool ParserPool::loadSchema(const XMLCh* nsURI, const XMLCh* pathname)
116 // Just check the pathname and then directly register the pair into the map.
118 auto_ptr_char p(pathname);
120 struct _stat stat_buf;
121 if (_stat(p.get(), &stat_buf) != 0)
123 struct stat stat_buf;
124 if (stat(p.get(), &stat_buf) != 0)
128 xmltooling::NDC ndc("loadSchema");
130 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
131 auto_ptr_char n(nsURI);
132 log.error("failed to load schema for (%s), file not found (%s)",n.get(),p.get());
138 m_schemaLocMap[nsURI]=pathname;
139 m_schemaLocations.erase();
140 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
142 auto_ptr_char n(nsURI);
143 m_schemaLocMap[n.get()]=p.get();
144 m_schemaLocations.erase();
145 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));
151 bool ParserPool::loadCatalog(const XMLCh* pathname)
154 xmltooling::NDC ndc("loadCatalog");
156 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
159 static const XMLCh catalog[] = UNICODE_LITERAL_7(c,a,t,a,l,o,g);
160 static const XMLCh system[] = UNICODE_LITERAL_6(s,y,s,t,e,m);
161 static const XMLCh systemId[] = UNICODE_LITERAL_8(s,y,s,t,e,m,I,d);
162 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
163 static const XMLCh CATALOG_NS[] = {
164 chLatin_u, chLatin_r, chLatin_n, chColon,
165 chLatin_o, chLatin_a, chLatin_s, chLatin_i, chLatin_s, chColon,
166 chLatin_n, chLatin_a, chLatin_m, chLatin_e, chLatin_s, chColon,
167 chLatin_t, chLatin_c, chColon,
168 chLatin_e, chLatin_n, chLatin_t, chLatin_i, chLatin_t, chLatin_y, chColon,
169 chLatin_x, chLatin_m, chLatin_l, chLatin_n, chLatin_s, chColon,
170 chLatin_x, chLatin_m, chLatin_l, chColon,
171 chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull
174 // Parse the catalog with the internal parser pool.
176 if (log.isDebugEnabled()) {
177 auto_ptr_char temp(pathname);
178 log.debug("loading XML catalog from %s", temp.get());
181 LocalFileInputSource fsrc(NULL,pathname);
182 Wrapper4InputSource domsrc(&fsrc,false);
184 DOMDocument* doc=XMLToolingConfig::getConfig().getParser().parse(domsrc);
185 XercesJanitor<DOMDocument> janitor(doc);
187 // Check root element.
188 const DOMElement* root=doc->getDocumentElement();
189 if (!XMLHelper::isNodeNamed(root,CATALOG_NS,catalog)) {
190 auto_ptr_char temp(pathname);
191 log.error("unknown root element, failed to load XML catalog from %s", temp.get());
195 // Fetch all the <system> elements.
196 DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,system);
198 for (XMLSize_t i=0; i<mappings->getLength(); i++) {
199 root=static_cast<DOMElement*>(mappings->item(i));
200 const XMLCh* from=root->getAttributeNS(NULL,systemId);
201 const XMLCh* to=root->getAttributeNS(NULL,uri);
203 m_schemaLocMap[from]=to;
205 auto_ptr_char f(from);
207 m_schemaLocMap[f.get()]=t.get();
210 m_schemaLocations.erase();
212 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
214 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));
217 catch (exception& e) {
218 log.error("catalog loader caught exception: %s", e.what());
225 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
226 DOMLSInput* ParserPool::resolveResource(
227 const XMLCh *const resourceType,
228 const XMLCh *const namespaceUri,
229 const XMLCh *const publicId,
230 const XMLCh *const systemId,
231 const XMLCh *const baseURI
234 DOMInputSource* ParserPool::resolveEntity(
235 const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI
240 xmltooling::NDC ndc("resolveEntity");
245 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
246 if (log.isDebugEnabled()) {
247 auto_ptr_char sysId(systemId);
248 auto_ptr_char base(baseURI);
249 log.debug("asked to resolve %s with baseURI %s",sysId.get(),base.get() ? base.get() : "(null)");
253 // Find well-known schemas in the specified location.
254 map<xstring,xstring>::const_iterator i=m_schemaLocMap.find(systemId);
255 if (i!=m_schemaLocMap.end())
256 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
258 // Check for entity as a value in the map.
259 for (i=m_schemaLocMap.begin(); i!=m_schemaLocMap.end(); ++i) {
260 if (XMLString::endsWith(i->second.c_str(), systemId))
261 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
264 // We'll allow anything without embedded slashes.
265 if (XMLString::indexOf(systemId, chForwardSlash)==-1)
266 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,systemId));
268 // Find well-known schemas in the specified location.
269 auto_ptr_char temp(systemId);
270 map<string,string>::const_iterator i=m_schemaLocMap.find(temp.get());
271 if (i!=m_schemaLocMap.end()) {
272 auto_ptr_XMLCh temp2(i->second.c_str());
273 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,temp2.get()));
276 // Check for entity as a value in the map.
277 for (i=m_schemaLocMap.begin(); i!=m_schemaLocMap.end(); ++i) {
278 auto_ptr_XMLCh temp2(i->second.c_str());
279 if (XMLString::endsWith(temp2.get(), systemId))
280 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,temp2.get()));
283 // We'll allow anything without embedded slashes.
284 if (XMLString::indexOf(systemId, chForwardSlash)==-1)
285 return new Wrapper4InputSource(new LocalFileInputSource(baseURI,systemId));
288 // Shortcircuit the request.
290 auto_ptr_char temp(systemId);
292 log.debug("unauthorized entity request (%s), blocking it", temp.get());
293 static const XMLByte nullbuf[] = {0};
294 return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId));
297 bool ParserPool::handleError(const DOMError& e)
300 xmltooling::NDC ndc("handleError");
302 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
303 DOMLocator* locator=e.getLocation();
304 auto_ptr_char temp(e.getMessage());
306 switch (e.getSeverity()) {
307 case DOMError::DOM_SEVERITY_WARNING:
308 log.warnStream() << "warning on line " << locator->getLineNumber()
309 << ", column " << locator->getColumnNumber()
310 << ", message: " << temp.get() << logging::eol;
313 case DOMError::DOM_SEVERITY_ERROR:
314 log.errorStream() << "error on line " << locator->getLineNumber()
315 << ", column " << locator->getColumnNumber()
316 << ", message: " << temp.get() << logging::eol;
317 throw XMLParserException(string("error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));
319 case DOMError::DOM_SEVERITY_FATAL_ERROR:
320 log.errorStream() << "fatal error on line " << locator->getLineNumber()
321 << ", column " << locator->getColumnNumber()
322 << ", message: " << temp.get() << logging::eol;
323 throw XMLParserException(string("fatal error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));
325 throw XMLParserException(string("unclassified error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));
328 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
332 DOMBuilder* ParserPool::createBuilder()
334 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
335 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
336 DOMBuilder* parser=static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0);
337 if (m_namespaceAware)
338 parser->setFeature(XMLUni::fgDOMNamespaces,true);
340 parser->setFeature(XMLUni::fgXercesSchema,true);
341 parser->setFeature(XMLUni::fgDOMValidation,true);
342 parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse,true);
343 parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal,true);
345 // We build a "fake" schema location hint that binds each namespace to itself.
346 // This ensures the entity resolver will be given the namespace as a systemId it can check.
348 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
350 auto_ptr_XMLCh temp(m_schemaLocations.c_str());
351 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp.get()));
354 parser->setProperty(XMLUni::fgXercesSecurityManager, m_security);
355 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);
356 parser->setEntityResolver(this);
357 parser->setErrorHandler(this);
361 DOMBuilder* ParserPool::checkoutBuilder()
364 if (m_pool.empty()) {
365 DOMBuilder* builder=createBuilder();
368 DOMBuilder* p=m_pool.top();
372 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
374 auto_ptr_XMLCh temp2(m_schemaLocations.c_str());
375 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp2.get()));
381 void ParserPool::checkinBuilder(DOMBuilder* builder)
385 m_pool.push(builder);
391 xsecsize_t StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
393 XMLByte* target=toFill;
394 xsecsize_t bytes_read=0,request=maxToRead;
396 // Fulfill the rest by reading from the stream.
397 if (request && !m_is.eof() && !m_is.fail()) {
399 m_is.read(reinterpret_cast<char* const>(target),request);
400 m_pos+=m_is.gcount();
401 bytes_read+=m_is.gcount();
403 catch(ios_base::failure& e) {
404 Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream()
405 << "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception: " << e.what()