2 * Copyright 2001-2006 Internet2
\r
4 * Licensed under the Apache License, Version 2.0 (the "License");
\r
5 * you may not use this file except in compliance with the License.
\r
6 * You may obtain a copy of the License at
\r
8 * http://www.apache.org/licenses/LICENSE-2.0
\r
10 * Unless required by applicable law or agreed to in writing, software
\r
11 * distributed under the License is distributed on an "AS IS" BASIS,
\r
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
13 * See the License for the specific language governing permissions and
\r
14 * limitations under the License.
\r
23 #include "internal.h"
\r
24 #include "exceptions.h"
\r
25 #include "util/NDC.h"
\r
26 #include "util/ParserPool.h"
\r
27 #include "util/XMLHelper.h"
\r
29 #include <algorithm>
\r
30 #include <functional>
\r
31 #include <sys/types.h>
\r
32 #include <sys/stat.h>
\r
33 #include <log4cpp/Category.hh>
\r
34 #include <xercesc/util/PlatformUtils.hpp>
\r
35 #include <xercesc/util/XMLUniDefs.hpp>
\r
36 #include <xercesc/sax/SAXException.hpp>
\r
37 #include <xercesc/framework/MemBufInputSource.hpp>
\r
38 #include <xercesc/framework/LocalFileInputSource.hpp>
\r
39 #include <xercesc/framework/Wrapper4InputSource.hpp>
\r
41 using namespace xmltooling;
\r
42 using namespace std;
\r
43 using namespace log4cpp;
\r
45 ParserPool::ParserPool(bool namespaceAware, bool schemaAware)
\r
46 : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(XMLPlatformUtils::makeMutex()) {}
\r
48 ParserPool::~ParserPool()
\r
50 while(!m_pool.empty()) {
\r
51 m_pool.top()->release();
\r
54 XMLPlatformUtils::closeMutex(m_lock);
\r
57 DOMDocument* ParserPool::newDocument()
\r
59 return DOMImplementationRegistry::getDOMImplementation(NULL)->createDocument();
\r
62 DOMDocument* ParserPool::parse(DOMInputSource& domsrc)
\r
64 DOMBuilder* parser=checkoutBuilder();
\r
66 DOMDocument* doc=parser->parse(domsrc);
\r
67 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);
\r
68 checkinBuilder(parser);
\r
72 checkinBuilder(parser);
\r
77 DOMDocument* ParserPool::parse(istream& is)
\r
79 StreamInputSource src(is);
\r
80 Wrapper4InputSource domsrc(&src,false);
\r
81 return parse(domsrc);
\r
84 // Functor to double its argument separated by a character and append to a buffer
\r
85 template <class T> class doubleit
\r
88 doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {}
\r
89 void operator() (const pair<T,T>& s) { temp += s.first + sep + s.first + sep; }
\r
91 const typename T::value_type& sep;
\r
94 bool ParserPool::loadSchema(const XMLCh* nsURI, const XMLCh* pathname)
\r
96 // Just check the pathname and then directly register the pair into the map.
\r
98 auto_ptr_char p(pathname);
\r
100 struct _stat stat_buf;
\r
101 if (_stat(p.get(), &stat_buf) != 0)
\r
103 struct stat stat_buf;
\r
104 if (stat(p.get(), &stat_buf) != 0)
\r
108 xmltooling::NDC ndc("loadSchema");
\r
110 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
\r
111 auto_ptr_char n(nsURI);
\r
112 log.error("failed to load schema for (%s), file not found (%s)",n.get(),p.get());
\r
116 XMLPlatformUtils::lockMutex(m_lock);
\r
117 #ifdef HAVE_GOOD_STL
\r
118 m_schemaLocMap[nsURI]=pathname;
\r
119 m_schemaLocations.erase();
\r
120 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
\r
122 auto_ptr_char n(nsURI);
\r
123 m_schemaLocMap[n.get()]=p.get();
\r
124 m_schemaLocations.erase();
\r
125 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));
\r
127 XMLPlatformUtils::unlockMutex(m_lock);
\r
132 bool ParserPool::loadCatalog(const XMLCh* pathname)
\r
135 xmltooling::NDC ndc("loadCatalog");
\r
137 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
\r
140 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
\r
141 static const XMLCh catalog[] = { chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull };
\r
142 static const XMLCh uri[] = { chLatin_u, chLatin_r, chLatin_i, chNull };
\r
143 static const XMLCh name[] = { chLatin_n, chLatin_a, chLatin_m, chLatin_e, chNull };
\r
144 static const XMLCh CATALOG_NS[] = {
\r
145 chLatin_u, chLatin_r, chLatin_n, chColon,
\r
146 chLatin_o, chLatin_a, chLatin_s, chLatin_i, chLatin_s, chColon,
\r
147 chLatin_n, chLatin_a, chLatin_m, chLatin_e, chLatin_s, chColon,
\r
148 chLatin_t, chLatin_c, chColon,
\r
149 chLatin_e, chLatin_n, chLatin_t, chLatin_i, chLatin_t, chLatin_y, chColon,
\r
150 chLatin_x, chLatin_m, chLatin_l, chLatin_n, chLatin_s, chColon,
\r
151 chLatin_x, chLatin_m, chLatin_l, chColon,
\r
152 chLatin_c, chLatin_a, chLatin_t, chLatin_a, chLatin_l, chLatin_o, chLatin_g, chNull
\r
155 // Get a local parser to use. When it pops, the document will go with it.
\r
156 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
\r
157 auto_ptr<DOMBuilder> parser(static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0));
\r
158 parser->setFeature(XMLUni::fgDOMNamespaces,true);
\r
160 if (log.isDebugEnabled()) {
\r
161 auto_ptr_char temp(pathname);
\r
162 log.debug("loading XML catalog from %s", temp.get());
\r
165 LocalFileInputSource fsrc(NULL,pathname);
\r
166 Wrapper4InputSource domsrc(&fsrc,false);
\r
168 DOMDocument* doc=parser->parse(domsrc);
\r
170 // Check root element.
\r
171 const DOMElement* root=doc->getDocumentElement();
\r
172 if (!XMLHelper::isElementNamed(root,CATALOG_NS,catalog)) {
\r
173 auto_ptr_char temp(pathname);
\r
174 log.error("unknown root element, failed to load XML catalog from %s", temp.get());
\r
178 // Fetch all the <uri> elements.
\r
179 DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,uri);
\r
180 XMLPlatformUtils::lockMutex(m_lock);
\r
181 for (XMLSize_t i=0; i<mappings->getLength(); i++) {
\r
182 root=static_cast<DOMElement*>(mappings->item(i));
\r
183 const XMLCh* from=root->getAttributeNS(NULL,name);
\r
184 const XMLCh* to=root->getAttributeNS(NULL,uri);
\r
185 #ifdef HAVE_GOOD_STL
\r
186 m_schemaLocMap[from]=to;
\r
188 auto_ptr_char f(from);
\r
189 auto_ptr_char t(to);
\r
190 m_schemaLocMap[f.get()]=t.get();
\r
193 m_schemaLocations.erase();
\r
194 #ifdef HAVE_GOOD_STL
\r
195 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
\r
197 for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<string>(m_schemaLocations,' '));
\r
199 XMLPlatformUtils::unlockMutex(m_lock);
\r
201 catch (DOMException& e) {
\r
202 auto_ptr_char p(pathname);
\r
203 auto_ptr_char m(e.getMessage());
\r
204 log.error("catalog loader caught DOMException (%s) from file (%s)", m.get(), p.get());
\r
207 catch (SAXException& e) {
\r
208 auto_ptr_char p(pathname);
\r
209 auto_ptr_char m(e.getMessage());
\r
210 log.error("catalog loader caught SAXException (%s) from file (%s)", m.get(), p.get());
\r
213 catch (XMLException& e) {
\r
214 auto_ptr_char p(pathname);
\r
215 auto_ptr_char m(e.getMessage());
\r
216 log.error("catalog loader caught XMLException (%s) from file (%s)", m.get(), p.get());
\r
223 DOMInputSource* ParserPool::resolveEntity(const XMLCh* const publicId, const XMLCh* const systemId, const XMLCh* const baseURI)
\r
226 xmltooling::NDC ndc("resolveEntity");
\r
231 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
\r
232 if (log.isDebugEnabled()) {
\r
233 auto_ptr_char sysId(systemId);
\r
234 auto_ptr_char base(baseURI);
\r
235 log.debug("asked to resolve %s with baseURI %s",sysId.get(),base.get() ? base.get() : "(null)");
\r
238 // Find well-known schemas in the specified location.
\r
239 #ifdef HAVE_GOOD_STL
\r
240 map<xstring,xstring>::const_iterator i=m_schemaLocMap.find(systemId);
\r
241 if (i!=m_schemaLocMap.end())
\r
242 return new Wrapper4InputSource(new LocalFileInputSource(NULL,i->second.c_str()));
\r
244 auto_ptr_char temp(systemId);
\r
245 map<string,string>::const_iterator i=m_schemaLocMap.find(temp.get());
\r
246 auto_ptr_XMLCh temp2(i->second.c_str());
\r
247 if (i!=m_schemaLocMap.end())
\r
248 return new Wrapper4InputSource(new LocalFileInputSource(NULL,temp2.get()));
\r
251 // Shortcircuit the request.
\r
252 log.warn("unauthorized entity request, blocking it");
\r
253 static const XMLByte nullbuf[] = {0};
\r
254 return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId));
\r
257 bool ParserPool::handleError(const DOMError& e)
\r
260 xmltooling::NDC ndc("handleError");
\r
262 Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
\r
263 DOMLocator* locator=e.getLocation();
\r
264 auto_ptr_char temp(e.getMessage());
\r
266 switch (e.getSeverity()) {
\r
267 case DOMError::DOM_SEVERITY_WARNING:
\r
268 log.warnStream() << "warning on line " << locator->getLineNumber()
\r
269 << ", column " << locator->getColumnNumber()
\r
270 << ", message: " << temp.get() << CategoryStream::ENDLINE;
\r
273 case DOMError::DOM_SEVERITY_ERROR:
\r
274 log.errorStream() << "error on line " << locator->getLineNumber()
\r
275 << ", column " << locator->getColumnNumber()
\r
276 << ", message: " << temp.get() << CategoryStream::ENDLINE;
\r
277 throw XMLParserException(string("error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));
\r
279 case DOMError::DOM_SEVERITY_FATAL_ERROR:
\r
280 log.critStream() << "fatal error on line " << locator->getLineNumber()
\r
281 << ", column " << locator->getColumnNumber()
\r
282 << ", message: " << temp.get() << CategoryStream::ENDLINE;
\r
283 throw XMLParserException(string("fatal error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));
\r
285 throw XMLParserException(string("unclassified error during XML parsing: ") + (temp.get() ? temp.get() : "no message"));
\r
288 DOMBuilder* ParserPool::createBuilder()
\r
290 static const XMLCh impltype[] = { chLatin_L, chLatin_S, chNull };
\r
291 DOMImplementation* impl=DOMImplementationRegistry::getDOMImplementation(impltype);
\r
292 DOMBuilder* parser=static_cast<DOMImplementationLS*>(impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS,0);
\r
293 if (m_namespaceAware)
\r
294 parser->setFeature(XMLUni::fgDOMNamespaces,true);
\r
295 if (m_schemaAware) {
\r
296 parser->setFeature(XMLUni::fgXercesSchema,true);
\r
297 parser->setFeature(XMLUni::fgDOMValidation,true);
\r
298 parser->setFeature(XMLUni::fgXercesCacheGrammarFromParse,true);
\r
299 parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal,true);
\r
301 // We build a "fake" schema location hint that binds each namespace to itself.
\r
302 // This ensures the entity resolver will be given the namespace as a systemId it can check.
\r
303 #ifdef HAVE_GOOD_STL
\r
304 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
\r
306 auto_ptr_XMLCh temp(m_schemaLocations.c_str());
\r
307 parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp.get()));
\r
310 parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument,true);
\r
311 parser->setEntityResolver(this);
\r
312 parser->setErrorHandler(this);
\r
316 DOMBuilder* ParserPool::checkoutBuilder()
\r
318 XMLPlatformUtils::lockMutex(m_lock);
\r
320 if (m_pool.empty()) {
\r
321 DOMBuilder* builder=createBuilder();
\r
322 XMLPlatformUtils::unlockMutex(m_lock);
\r
325 DOMBuilder* p=m_pool.top();
\r
327 if (m_schemaAware) {
\r
328 #ifdef HAVE_GOOD_STL
\r
329 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
\r
331 auto_ptr_XMLCh temp2(m_schemaLocations.c_str());
\r
332 p->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(temp2.get()));
\r
335 XMLPlatformUtils::unlockMutex(m_lock);
\r
339 XMLPlatformUtils::unlockMutex(m_lock);
\r
344 void ParserPool::checkinBuilder(DOMBuilder* builder)
\r
347 XMLPlatformUtils::lockMutex(m_lock);
\r
348 m_pool.push(builder);
\r
349 XMLPlatformUtils::unlockMutex(m_lock);
\r
353 unsigned int StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toFill, const unsigned int maxToRead)
\r
355 XMLByte* target=toFill;
\r
356 unsigned int bytes_read=0,request=maxToRead;
\r
358 // Fulfill the rest by reading from the stream.
\r
359 if (request && !m_is.eof()) {
\r
361 m_is.read(reinterpret_cast<char* const>(target),request);
\r
362 m_pos+=m_is.gcount();
\r
363 bytes_read+=m_is.gcount();
\r
366 Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream() <<
\r
367 "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception" << CategoryStream::ENDLINE;
\r