SSPCPP-616 - clean up concatenated string literals
[shibboleth/cpp-xmltooling.git] / xmltooling / util / ParserPool.cpp
index 72156c0..71a36f5 100644 (file)
 #include "logging.h"
 #include "util/CurlURLInputStream.h"
 #include "util/NDC.h"
+#include "util/PathResolver.h"
 #include "util/ParserPool.h"
 #include "util/Threads.h"
 #include "util/XMLHelper.h"
 
-#include <algorithm>
-#include <functional>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <algorithm>
+#include <functional>
+#include <boost/algorithm/string.hpp>
+#include <boost/bind.hpp>
 #include <xercesc/util/PlatformUtils.hpp>
 #include <xercesc/util/XMLUniDefs.hpp>
 #include <xercesc/sax/SAXException.hpp>
@@ -47,6 +50,7 @@
 using namespace xmltooling::logging;
 using namespace xmltooling;
 using namespace xercesc;
+using namespace boost;
 using namespace std;
 
 
@@ -62,7 +66,7 @@ namespace {
 #ifdef _DEBUG
             xmltooling::NDC ndc("handleError");
 #endif
-            Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
+            Category& log=Category::getInstance(XMLTOOLING_LOGCAT ".ParserPool");
 
             DOMLocator* locator=e.getLocation();
             auto_ptr_char temp(e.getMessage());
@@ -100,7 +104,17 @@ namespace {
 
 
 ParserPool::ParserPool(bool namespaceAware, bool schemaAware)
-    : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(Mutex::create()), m_security(new SecurityManager()) {}
+        : m_namespaceAware(namespaceAware), m_schemaAware(schemaAware), m_lock(Mutex::create()), m_security(new SecurityManager()) {
+
+    int expLimit = 0;
+    const char* env = getenv("XMLTOOLING_ENTITY_EXPANSION_LIMIT");
+    if (env) {
+        expLimit = atoi(env);
+    }
+    if (expLimit <= 0)
+        expLimit = XMLTOOLING_ENTITY_EXPANSION_LIMIT;
+    m_security->setEntityExpansionLimit(expLimit);
+}
 
 ParserPool::~ParserPool()
 {
@@ -108,8 +122,6 @@ ParserPool::~ParserPool()
         m_pool.top()->release();
         m_pool.pop();
     }
-    delete m_lock;
-    delete m_security;
 }
 
 DOMDocument* ParserPool::newDocument()
@@ -197,8 +209,7 @@ DOMDocument* ParserPool::parse(istream& is)
 }
 
 // Functor to double its argument separated by a character and append to a buffer
-template <class T> class doubleit
-{
+template <class T> class doubleit {
 public:
     doubleit(T& t, const typename T::value_type& s) : temp(t), sep(s) {}
     void operator() (const pair<const T,T>& s) { temp += s.first + sep + s.first + sep; }
@@ -222,23 +233,41 @@ bool ParserPool::loadSchema(const XMLCh* nsURI, const XMLCh* pathname)
 #if _DEBUG
         xmltooling::NDC ndc("loadSchema");
 #endif
-        Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
+        Category& log=Category::getInstance(XMLTOOLING_LOGCAT ".ParserPool");
         auto_ptr_char n(nsURI);
         log.error("failed to load schema for (%s), file not found (%s)",n.get(),p.get());
         return false;
     }
 
+    // Roundtrip to local code page and back to translate path as needed.
+    string topath(p.get());
+    XMLToolingConfig::getConfig().getPathResolver()->resolve(topath, PathResolver::XMLTOOLING_XML_FILE);
+    auto_ptr_XMLCh temp(topath.c_str());
+
     Lock lock(m_lock);
-    m_schemaLocMap[nsURI]=pathname;
+    m_schemaLocMap[nsURI] = temp.get();
     m_schemaLocations.erase();
-    for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
+    for_each(m_schemaLocMap.begin(), m_schemaLocMap.end(), doubleit<xstring>(m_schemaLocations,chSpace));
 
     return true;
 }
 
+bool ParserPool::loadCatalogs(const char* pathnames)
+{
+    string temp(pathnames);
+    trim(temp);
+    vector<string> catpaths;
+    split(catpaths, temp, is_any_of(PATH_SEPARATOR_STR), algorithm::token_compress_on);
+    static bool (ParserPool::* lc)(const char*) = &ParserPool::loadCatalog;
+    for_each(catpaths.begin(), catpaths.end(), boost::bind(lc, this, boost::bind(&string::c_str, _1)));
+    return !catpaths.empty();
+}
+
 bool ParserPool::loadCatalog(const char* pathname)
 {
-    auto_ptr_XMLCh temp(pathname);
+    string p(pathname);
+    XMLToolingConfig::getConfig().getPathResolver()->resolve(p, PathResolver::XMLTOOLING_XML_FILE);
+    auto_ptr_XMLCh temp(p.c_str());
     return loadCatalog(temp.get());
 }
 
@@ -247,7 +276,7 @@ bool ParserPool::loadCatalog(const XMLCh* pathname)
 #if _DEBUG
     xmltooling::NDC ndc("loadCatalog");
 #endif
-    Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
+    Category& log=Category::getInstance(XMLTOOLING_LOGCAT ".ParserPool");
 
     // XML constants
     static const XMLCh catalog[] =  UNICODE_LITERAL_7(c,a,t,a,l,o,g);
@@ -287,18 +316,25 @@ bool ParserPool::loadCatalog(const XMLCh* pathname)
         }
 
         // Fetch all the <system> elements.
-        DOMNodeList* mappings=root->getElementsByTagNameNS(CATALOG_NS,system);
+        DOMNodeList* mappings = root->getElementsByTagNameNS(CATALOG_NS,system);
         Lock lock(m_lock);
-        for (XMLSize_t i=0; i<mappings->getLength(); i++) {
-            root=static_cast<DOMElement*>(mappings->item(i));
-            const XMLCh* from=root->getAttributeNS(nullptr,systemId);
-            const XMLCh* to=root->getAttributeNS(nullptr,uri);
-            m_schemaLocMap[from]=to;
+        for (XMLSize_t i = 0; i < mappings->getLength(); i++) {
+            root = static_cast<DOMElement*>(mappings->item(i));
+            const XMLCh* from = root->getAttributeNS(nullptr,systemId);
+            const XMLCh* to = root->getAttributeNS(nullptr,uri);
+
+            // Roundtrip to local code page and back to translate path as needed.
+            auto_ptr_char temp(to);
+            string topath(temp.get());
+            XMLToolingConfig::getConfig().getPathResolver()->resolve(topath, PathResolver::XMLTOOLING_XML_FILE);
+            auto_ptr_XMLCh temp2(topath.c_str());
+
+            m_schemaLocMap[from] = temp2.get();
         }
         m_schemaLocations.erase();
-        for_each(m_schemaLocMap.begin(),m_schemaLocMap.end(),doubleit<xstring>(m_schemaLocations,chSpace));
+        for_each(m_schemaLocMap.begin(), m_schemaLocMap.end(), doubleit<xstring>(m_schemaLocations,chSpace));
     }
-    catch (exception& e) {
+    catch (std::exception& e) {
         log.error("catalog loader caught exception: %s", e.what());
         return false;
     }
@@ -325,8 +361,9 @@ DOMInputSource* ParserPool::resolveEntity(
 #endif
     if (!systemId)
         return nullptr;
+    xstring sysId(systemId);
 
-    Category& log=Category::getInstance(XMLTOOLING_LOGCAT".ParserPool");
+    Category& log=Category::getInstance(XMLTOOLING_LOGCAT ".ParserPool");
     if (log.isDebugEnabled()) {
         auto_ptr_char sysId(systemId);
         auto_ptr_char base(baseURI);
@@ -334,25 +371,28 @@ DOMInputSource* ParserPool::resolveEntity(
     }
 
     // Find well-known schemas in the specified location.
-    map<xstring,xstring>::const_iterator i=m_schemaLocMap.find(systemId);
-    if (i!=m_schemaLocMap.end())
-        return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
-
-    // Check for entity as a value in the map.
-    for (i=m_schemaLocMap.begin(); i!=m_schemaLocMap.end(); ++i) {
-        if (XMLString::endsWith(i->second.c_str(), systemId))
-            return new Wrapper4InputSource(new LocalFileInputSource(baseURI,i->second.c_str()));
-    }
+    map<xstring,xstring>::const_iterator i = m_schemaLocMap.find(sysId);
+    if (i != m_schemaLocMap.end())
+        return new Wrapper4InputSource(new LocalFileInputSource(baseURI, i->second.c_str()));
+
+    // Check for entity as a suffix of a value in the map.
+    bool (*p_ends_with)(const xstring&, const xstring&) = ends_with;
+    i = find_if(
+        m_schemaLocMap.begin(), m_schemaLocMap.end(),
+        boost::bind(p_ends_with, boost::bind(&map<xstring,xstring>::value_type::second, _1), boost::ref(sysId))
+        );
+    if (i != m_schemaLocMap.end())
+        return new Wrapper4InputSource(new LocalFileInputSource(baseURI, i->second.c_str()));
 
     // We'll allow anything without embedded slashes.
-    if (XMLString::indexOf(systemId, chForwardSlash)==-1)
-        return new Wrapper4InputSource(new LocalFileInputSource(baseURI,systemId));
+    if (XMLString::indexOf(systemId, chForwardSlash) == -1 && XMLString::indexOf(systemId, chBackSlash) == -1)
+        return new Wrapper4InputSource(new LocalFileInputSource(baseURI, systemId));
 
     // Shortcircuit the request.
     auto_ptr_char temp(systemId);
     log.debug("unauthorized entity request (%s), blocking it", temp.get());
     static const XMLByte nullbuf[] = {0};
-    return new Wrapper4InputSource(new MemBufInputSource(nullbuf,0,systemId));
+    return new Wrapper4InputSource(new MemBufInputSource(nullbuf, 0, systemId));
 }
 
 #ifdef XMLTOOLING_XERCESC_COMPLIANT_DOMLS
@@ -376,7 +416,7 @@ DOMLSParser* ParserPool::createBuilder()
     parser->getDomConfig()->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
     parser->getDomConfig()->setParameter(XMLUni::fgXercesDisableDefaultEntityResolution, true);
     parser->getDomConfig()->setParameter(XMLUni::fgDOMResourceResolver, dynamic_cast<DOMLSResourceResolver*>(this));
-    parser->getDomConfig()->setParameter(XMLUni::fgXercesSecurityManager, m_security);
+    parser->getDomConfig()->setParameter(XMLUni::fgXercesSecurityManager, m_security.get());
     return parser;
 }
 
@@ -420,7 +460,7 @@ DOMBuilder* ParserPool::createBuilder()
         // This ensures the entity resolver will be given the namespace as a systemId it can check.
         parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,const_cast<XMLCh*>(m_schemaLocations.c_str()));
     }
-    parser->setProperty(XMLUni::fgXercesSecurityManager, m_security);
+    parser->setProperty(XMLUni::fgXercesSecurityManager, m_security.get());
     parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
     parser->setFeature(XMLUni::fgXercesDisableDefaultEntityResolution, true);
     parser->setEntityResolver(this);
@@ -494,7 +534,7 @@ xsecsize_t StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toF
             bytes_read+=m_is.gcount();
         }
         catch(ios_base::failure& e) {
-            Category::getInstance(XMLTOOLING_LOGCAT".StreamInputSource").critStream()
+            Category::getInstance(XMLTOOLING_LOGCAT ".StreamInputSource").critStream()
                 << "XML::StreamInputSource::StreamBinInputStream::readBytes caught an exception: " << e.what()
                 << logging::eol;
             *toFill=0;