2 * Copyright 2001-2006 Internet2
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include "exceptions.h"
26 #include "util/ParserPool.h"
27 #include "util/XMLHelper.h"
31 #include <sys/types.h>
33 #include <log4cpp/Category.hh>
34 #include <xercesc/util/PlatformUtils.hpp>
35 #include <xercesc/util/XMLUniDefs.hpp>
36 #include <xercesc/sax/SAXException.hpp>
37 #include <xercesc/framework/MemBufInputSource.hpp>
38 #include <xercesc/framework/LocalFileInputSource.hpp>
39 #include <xercesc/framework/Wrapper4InputSource.hpp>
41 using namespace xmltooling;
43 using namespace log4cpp;
45 ParserPool::ParserPool(bool namespaceAware, bool schemaAware)
46 : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(Mutex::create()) {}
48 ParserPool::~ParserPool()
50 while(!m_pool.empty()) {
51 m_pool.top()->release();
57 DOMDocument* ParserPool::newDocument()
59 return DOMImplementationRegistry::getDOMImplementation(NULL)->createDocument();
62 DOMDocument* ParserPool::parse(DOMInputSource& domsrc)
64 DOMBuilder* parser=checkoutBuilder();
65 XercesJanitor<DOMBuilder> janitor(parser);
67 DOMDocument* doc=parser->parse(domsrc);
68 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);
69 checkinBuilder(janitor.release());
72 catch (XMLException&) {
73 checkinBuilder(janitor.release());
76 catch (XMLToolingException&) {
77 checkinBuilder(janitor.release());
82 DOMDocument* ParserPool::parse(istream& is)
84 StreamInputSource src(is);
85 Wrapper4InputSource domsrc(&src,false);
89 // Functor to double its argument separated by a character and append to a buffer
90 template <class T> class doubleit
93 doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {}
94 void operator() (const pair<T,T>& s) { temp += s.first + sep + s.first + sep; }
96 const typename T::value_type& sep;
99 bool ParserPool::loadSchema(const XMLCh* nsURI, const XMLCh* pathname)
101 // Just check the pathname and then directly register the pair into the map.
103 auto_ptr_char p(pathname);
105 struct _stat stat_buf;
106 if (_stat(p.get(), &stat_buf) != 0)
108 struct stat stat_buf;
109 if (stat(p.get(), &stat_buf) != 0)
113 xmltooling::NDC ndc("loadSchema");
115 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
116 auto_ptr_char n(nsURI);
117 log.error("failed to load schema for (%s), file not found (%s)",n.get(),p.get());
123 m_schemaLocMap[nsURI]=pathname;
124 m_schemaLocations.erase();
125 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
127 auto_ptr_char n(nsURI);
128 m_schemaLocMap[n.get()]=p.get();
129 m_schemaLocations.erase();
130 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));
136 bool ParserPool::loadCatalog(const XMLCh* pathname)
139 xmltooling::NDC ndc("loadCatalog");
141 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
144 static const XMLCh catalog[] = { chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull };
145 static const XMLCh uri[] = { chLatin_u, chLatin_r, chLatin_i, chNull };
146 static const XMLCh name[] = { chLatin_n, chLatin_a, chLatin_m, chLatin_e, chNull };
147 static const XMLCh CATALOG_NS[] = {
148 chLatin_u, chLatin_r, chLatin_n, chColon,
149 chLatin_o, chLatin_a, chLatin_s, chLatin_i, chLatin_s, chColon,
150 chLatin_n, chLatin_a, chLatin_m, chLatin_e, chLatin_s, chColon,
151 chLatin_t, chLatin_c, chColon,
152 chLatin_e, chLatin_n, chLatin_t, chLatin_i, chLatin_t, chLatin_y, chColon,
153 chLatin_x, chLatin_m, chLatin_l, chLatin_n, chLatin_s, chColon,
154 chLatin_x, chLatin_m, chLatin_l, chColon,
155 chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull
158 // Parse the catalog with the internal parser pool.
160 if (log.isDebugEnabled()) {
161 auto_ptr_char temp(pathname);
162 log.debug("loading XML catalog from %s", temp.get());
165 LocalFileInputSource fsrc(NULL,pathname);
166 Wrapper4InputSource domsrc(&fsrc,false);
168 DOMDocument* doc=XMLToolingConfig::getConfig().getParser().parse(domsrc);
169 XercesJanitor<DOMDocument> janitor(doc);
171 // Check root element.
172 const DOMElement* root=doc->getDocumentElement();
173 if (!XMLHelper::isNodeNamed(root,CATALOG_NS,catalog)) {
174 auto_ptr_char temp(pathname);
175 log.error("unknown root element, failed to load XML catalog from %s", temp.get());
179 // Fetch all the <uri> elements.
180 DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,uri);
182 for (XMLSize_t i=0; i<mappings->getLength(); i++) {
183 root=static_cast<DOMElement*>(mappings->item(i));
184 const XMLCh* from=root->getAttributeNS(NULL,name);
185 const XMLCh* to=root->getAttributeNS(NULL,uri);
187 m_schemaLocMap[from]=to;
189 auto_ptr_char f(from);
191 m_schemaLocMap[f.get()]=t.get();
194 m_schemaLocations.erase();
196 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
198 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));
201 catch (XMLParserException& e) {
202 log.error("catalog loader caught XMLParserException: %s", e.what());
209 DOMInputSource* ParserPool::resolveEntity(const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI)
212 xmltooling::NDC ndc("resolveEntity");
217 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
218 if (log.isDebugEnabled()) {
219 auto_ptr_char sysId(systemId);
220 auto_ptr_char base(baseURI);
221 log.debug("asked to resolve %s with baseURI %s",sysId.get(),base.get() ? base.get() : "(null)");
224 // Find well-known schemas in the specified location.
226 map<xstring,xstring>::const_iterator i=m_schemaLocMap.find(systemId);
227 if (i!=m_schemaLocMap.end())
228 return new Wrapper4InputSource(new LocalFileInputSource(NULL,i->second.c_str()));
230 auto_ptr_char temp(systemId);
231 map<string,string>::const_iterator i=m_schemaLocMap.find(temp.get());
232 if (i!=m_schemaLocMap.end()) {
233 auto_ptr_XMLCh temp2(i->second.c_str());
234 return new Wrapper4InputSource(new LocalFileInputSource(NULL,temp2.get()));
238 // Shortcircuit the request.
239 auto_ptr_char sysId(systemId);
240 log.warn("unauthorized entity request (%s), blocking it", sysId.get() ? sysId.get() : "no systemId");
241 static const XMLByte nullbuf[] = {0};
242 return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId));
245 bool ParserPool::handleError(const DOMError& e)
248 xmltooling::NDC ndc("handleError");
250 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
251 DOMLocator* locator=e.getLocation();
252 auto_ptr_char temp(e.getMessage());
254 switch (e.getSeverity()) {
255 case DOMError::DOM_SEVERITY_WARNING:
256 log.warnStream() << "warning on line " << locator->getLineNumber()
257 << ", column " << locator->getColumnNumber()
258 << ", message: " << temp.get() << CategoryStream::ENDLINE;
261 case DOMError::DOM_SEVERITY_ERROR:
262 log.errorStream() << "error on line " << locator->getLineNumber()
263 << ", column " << locator->getColumnNumber()
264 << ", message: " << temp.get() << CategoryStream::ENDLINE;
265 throw XMLParserException(string("error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));
267 case DOMError::DOM_SEVERITY_FATAL_ERROR:
268 log.critStream() << "fatal error on line " << locator->getLineNumber()
269 << ", column " << locator->getColumnNumber()
270 << ", message: " << temp.get() << CategoryStream::ENDLINE;
271 throw XMLParserException(string("fatal error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));
273 throw XMLParserException(string("unclassified error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));
276 DOMBuilder* ParserPool::createBuilder()
278 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
279 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
280 DOMBuilder* parser=static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0);
281 if (m_namespaceAware)
282 parser->setFeature(XMLUni::fgDOMNamespaces,true);
284 parser->setFeature(XMLUni::fgXercesSchema,true);
285 parser->setFeature(XMLUni::fgDOMValidation,true);
286 parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse,true);
287 parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal,true);
289 // We build a "fake" schema location hint that binds each namespace to itself.
290 // This ensures the entity resolver will be given the namespace as a systemId it can check.
292 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
294 auto_ptr_XMLCh temp(m_schemaLocations.c_str());
295 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp.get()));
298 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);
299 parser->setEntityResolver(this);
300 parser->setErrorHandler(this);
304 DOMBuilder* ParserPool::checkoutBuilder()
307 if (m_pool.empty()) {
308 DOMBuilder* builder=createBuilder();
311 DOMBuilder* p=m_pool.top();
315 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
317 auto_ptr_XMLCh temp2(m_schemaLocations.c_str());
318 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp2.get()));
324 void ParserPool::checkinBuilder(DOMBuilder* builder)
328 m_pool.push(builder);
332 unsigned int StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const unsigned int maxToRead)
334 XMLByte* target=toFill;
335 unsigned int bytes_read=0,request=maxToRead;
337 // Fulfill the rest by reading from the stream.
338 if (request && !m_is.eof() && !m_is.fail()) {
340 m_is.read(reinterpret_cast<char* const>(target),request);
341 m_pos+=m_is.gcount();
342 bytes_read+=m_is.gcount();
344 catch(ios_base::failure& e) {
345 Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream()
346 << "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception: " << e.what()
347 << CategoryStream::ENDLINE;