2 * Licensed to the University Corporation for Advanced Internet
3 * Development, Inc. (UCAID) under one or more contributor license
4 * agreements. See the NOTICE file distributed with this work for
5 * additional information regarding copyright ownership.
7 * UCAID licenses this file to you under the Apache License,
8 * Version 2.0 (the "License"); you may not use this file except
9 * in compliance with the License. You may obtain a copy of the
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
17 * either express or implied. See the License for the specific
18 * language governing permissions and limitations under the License.
21 /** XMLRequestMapper.cpp
23 * XML-based RequestMapper implementation.
27 #include "exceptions.h"
28 #include "AccessControl.h"
29 #include "RequestMapper.h"
30 #include "SPRequest.h"
31 #include "util/CGIParser.h"
32 #include "util/DOMPropertySet.h"
33 #include "util/SPConstants.h"
36 #include <boost/shared_ptr.hpp>
37 #include <boost/lexical_cast.hpp>
38 #include <boost/tokenizer.hpp>
39 #include <boost/tuple/tuple.hpp>
40 #include <boost/algorithm/string.hpp>
41 #include <xmltooling/util/NDC.h>
42 #include <xmltooling/util/ReloadableXMLFile.h>
43 #include <xmltooling/util/Threads.h>
44 #include <xmltooling/util/XMLHelper.h>
45 #include <xercesc/util/XMLUniDefs.hpp>
46 #include <xercesc/util/regx/RegularExpression.hpp>
48 using shibspconstants::SHIB2SPCONFIG_NS;
49 using namespace shibsp;
50 using namespace xmltooling;
51 using namespace boost;
56 // Blocks access when an ACL plugin fails to load.
57 class AccessControlDummy : public AccessControl
66 aclresult_t authorized(const SPRequest& request, const Session* session) const {
67 return shib_acl_false;
71 class Override : public DOMPropertySet, public DOMNodeFilter
74 Override(bool unicodeAware=false) : m_unicodeAware(unicodeAware) {}
75 Override(bool unicodeAware, const DOMElement* e, Category& log, const Override* base=nullptr);
78 // Provides filter to exclude special config elements.
79 #ifdef SHIBSP_XERCESC_SHORT_ACCEPTNODE
84 acceptNode(const DOMNode* node) const {
88 const Override* locate(const HTTPRequest& request) const;
89 AccessControl* getAC() const { return (m_acl ? m_acl.get() : (getParent() ? dynamic_cast<const Override*>(getParent())->getAC() : nullptr)); }
92 void loadACL(const DOMElement* e, Category& log);
95 map< string,boost::shared_ptr<Override> > m_map;
96 vector< pair< boost::shared_ptr<RegularExpression>,boost::shared_ptr<Override> > > m_regexps;
97 vector< tuple< string,boost::shared_ptr<RegularExpression>,boost::shared_ptr<Override> > > m_queries;
100 scoped_ptr<AccessControl> m_acl;
103 class XMLRequestMapperImpl : public Override
106 XMLRequestMapperImpl(const DOMElement* e, Category& log);
108 ~XMLRequestMapperImpl() {
110 m_document->release();
113 void setDocument(DOMDocument* doc) {
117 const Override* findOverride(const char* vhost, const HTTPRequest& request) const;
120 DOMDocument* m_document;
123 #if defined (_MSC_VER)
124 #pragma warning( push )
125 #pragma warning( disable : 4250 )
128 class XMLRequestMapper : public RequestMapper, public ReloadableXMLFile
131 XMLRequestMapper(const DOMElement* e) : ReloadableXMLFile(e,Category::getInstance(SHIBSP_LOGCAT".RequestMapper")) {
135 ~XMLRequestMapper() {
139 Settings getSettings(const HTTPRequest& request) const;
142 pair<bool,DOMElement*> background_load();
145 scoped_ptr<XMLRequestMapperImpl> m_impl;
148 #if defined (_MSC_VER)
149 #pragma warning( pop )
152 RequestMapper* SHIBSP_DLLLOCAL XMLRequestMapperFactory(const DOMElement* const & e)
154 return new XMLRequestMapper(e);
157 static const XMLCh _AccessControl[] = UNICODE_LITERAL_13(A,c,c,e,s,s,C,o,n,t,r,o,l);
158 static const XMLCh AccessControlProvider[] = UNICODE_LITERAL_21(A,c,c,e,s,s,C,o,n,t,r,o,l,P,r,o,v,i,d,e,r);
159 static const XMLCh Host[] = UNICODE_LITERAL_4(H,o,s,t);
160 static const XMLCh HostRegex[] = UNICODE_LITERAL_9(H,o,s,t,R,e,g,e,x);
161 static const XMLCh htaccess[] = UNICODE_LITERAL_8(h,t,a,c,c,e,s,s);
162 static const XMLCh ignoreCase[] = UNICODE_LITERAL_10(i,g,n,o,r,e,C,a,s,e);
163 static const XMLCh ignoreOption[] = UNICODE_LITERAL_1(i);
164 static const XMLCh Path[] = UNICODE_LITERAL_4(P,a,t,h);
165 static const XMLCh PathRegex[] = UNICODE_LITERAL_9(P,a,t,h,R,e,g,e,x);
166 static const XMLCh Query[] = UNICODE_LITERAL_5(Q,u,e,r,y);
167 static const XMLCh name[] = UNICODE_LITERAL_4(n,a,m,e);
168 static const XMLCh regex[] = UNICODE_LITERAL_5(r,e,g,e,x);
169 static const XMLCh _type[] = UNICODE_LITERAL_4(t,y,p,e);
172 void SHIBSP_API shibsp::registerRequestMappers()
174 SPConfig& conf=SPConfig::getConfig();
175 conf.RequestMapperManager.registerFactory(XML_REQUEST_MAPPER, XMLRequestMapperFactory);
176 conf.RequestMapperManager.registerFactory(NATIVE_REQUEST_MAPPER, XMLRequestMapperFactory);
179 RequestMapper::RequestMapper()
183 RequestMapper::~RequestMapper()
187 void Override::loadACL(const DOMElement* e, Category& log)
190 const DOMElement* acl = XMLHelper::getFirstChildElement(e,htaccess);
192 log.info("building Apache htaccess AccessControl provider...");
193 m_acl.reset(SPConfig::getConfig().AccessControlManager.newPlugin(HT_ACCESS_CONTROL,acl));
196 acl = XMLHelper::getFirstChildElement(e,_AccessControl);
198 log.info("building XML-based AccessControl provider...");
199 m_acl.reset(SPConfig::getConfig().AccessControlManager.newPlugin(XML_ACCESS_CONTROL,acl));
202 acl = XMLHelper::getFirstChildElement(e,AccessControlProvider);
204 string t(XMLHelper::getAttrString(acl, nullptr, _type));
206 log.info("building AccessControl provider of type %s...", t.c_str());
207 m_acl.reset(SPConfig::getConfig().AccessControlManager.newPlugin(t.c_str(), acl));
210 throw ConfigurationException("<AccessControlProvider> missing type attribute.");
216 catch (std::exception& ex) {
217 log.crit("exception building AccessControl provider: %s", ex.what());
218 m_acl.reset(new AccessControlDummy());
222 Override::Override(bool unicodeAware, const DOMElement* e, Category& log, const Override* base)
223 : m_unicodeAware(unicodeAware)
225 // Load the property set.
226 load(e, nullptr, this);
229 // Load any AccessControl provider.
232 // Handle nested Paths.
233 DOMElement* path = XMLHelper::getFirstChildElement(e, Path);
234 for (int i = 1; path; ++i, path = XMLHelper::getNextSiblingElement(path, Path)) {
235 const XMLCh* n = path->getAttributeNS(nullptr,name);
237 // Skip any leading slashes.
238 while (n && *n == chForwardSlash)
241 // Check for empty name.
243 log.warn("skipping Path element (%d) with empty name attribute", i);
247 // Check for an embedded slash.
248 int slash = XMLString::indexOf(n, chForwardSlash);
250 // Copy the first path segment.
252 for (int pos = 0; pos < slash; ++pos)
255 // Move past the slash in the original pathname.
258 // Skip any leading slashes again.
259 while (*n == chForwardSlash)
263 // Create a placeholder Path element for the first path segment and replant under it.
264 DOMElement* newpath = path->getOwnerDocument()->createElementNS(shibspconstants::SHIB2SPCONFIG_NS, Path);
265 newpath->setAttributeNS(nullptr, name, namebuf.c_str());
266 path->setAttributeNS(nullptr, name, n);
267 path->getParentNode()->replaceChild(newpath, path);
268 newpath->appendChild(path);
270 // Repoint our locals at the new parent.
272 n = path->getAttributeNS(nullptr, name);
275 // All we had was a pathname with trailing slash(es), so just reset it without them.
276 path->setAttributeNS(nullptr, name, namebuf.c_str());
277 n = path->getAttributeNS(nullptr, name);
283 boost::shared_ptr<Override> o(new Override(m_unicodeAware, path, log, this));
284 if (m_unicodeAware) {
285 dup = toUTF8(o->getXMLString("name").second, true /* use malloc */);
288 dup = strdup(o->getString("name").second);
289 for (char* pch = dup; *pch; ++pch)
290 *pch = tolower(*pch);
292 if (m_map.count(dup)) {
293 log.warn("skipping duplicate Path element (%s)", dup);
297 log.debug("added Path mapping (%s)", dup);
301 catch (std::exception&) {
307 if (!XMLString::equals(e->getLocalName(), PathRegex)) {
308 // Handle nested PathRegexs.
309 path = XMLHelper::getFirstChildElement(e, PathRegex);
310 for (int i = 1; path; ++i, path = XMLHelper::getNextSiblingElement(path, PathRegex)) {
311 const XMLCh* n = path->getAttributeNS(nullptr, regex);
313 log.warn("skipping PathRegex element (%d) with empty regex attribute",i);
317 boost::shared_ptr<Override> o(new Override(m_unicodeAware, path, log, this));
319 bool flag = XMLHelper::getAttrBool(path, true, ignoreCase);
321 boost::shared_ptr<RegularExpression> re(new RegularExpression(n, flag ? &chNull : ignoreOption));
322 m_regexps.push_back(make_pair(re, o));
324 catch (XMLException& ex) {
325 auto_ptr_char tmp(ex.getMessage());
326 log.error("caught exception while parsing PathRegex regular expression (%d): %s", i, tmp.get());
327 throw ConfigurationException("Invalid regular expression in PathRegex element.");
330 if (log.isDebugEnabled())
331 log.debug("added <PathRegex> mapping (%s)", o->getString("regex").second);
335 // Handle nested Querys.
336 path = XMLHelper::getFirstChildElement(e, Query);
337 for (int i = 1; path; ++i, path = XMLHelper::getNextSiblingElement(path, Query)) {
338 const XMLCh* n = path->getAttributeNS(nullptr, name);
340 log.warn("skipping Query element (%d) with empty name attribute",i);
343 auto_ptr_char ntemp(n);
344 const XMLCh* v = path->getAttributeNS(nullptr, regex);
347 boost::shared_ptr<Override> o(new Override(m_unicodeAware, path, log, this));
348 boost::shared_ptr<RegularExpression> re((v && *v) ? new RegularExpression(v) : nullptr);
349 m_queries.push_back(make_tuple(string(ntemp.get()), re, o));
351 catch (XMLException& ex) {
352 auto_ptr_char tmp(ex.getMessage());
353 log.error("caught exception while parsing Query regular expression (%d): %s", i, tmp.get());
354 throw ConfigurationException("Invalid regular expression in Query element.");
357 log.debug("added <Query> mapping (%s)", ntemp.get());
361 const Override* Override::locate(const HTTPRequest& request) const
363 // This function is confusing because it's *not* recursive.
364 // The whole path is tokenized and mapped in a loop, so the
365 // path parameter starts with the entire request path and
366 // we can skip the leading slash as irrelevant.
367 const char* path = request.getRequestURI();
371 // Now we copy the path, chop the query string, and possibly lower case it.
373 string::size_type sep = dup.find('?');
374 if (sep != string::npos)
375 dup = dup.substr(0, sep);
376 if (!m_unicodeAware) {
380 // Default is for the current object to provide settings.
381 const Override* o = this;
383 // Tokenize the path by segment and try and map each segment.
384 tokenizer< char_separator<char> > tokens(dup, char_separator<char>("/"));
385 for (tokenizer< char_separator<char> >::iterator token = tokens.begin(); token != tokens.end(); ++token) {
386 map< string,boost::shared_ptr<Override> >::const_iterator i = o->m_map.find(*token);
387 if (i == o->m_map.end())
388 break; // Once there's no match, we've consumed as much of the path as possible here.
389 // We found a match, so reset the settings pointer.
392 // We descended a step down the path, so we need to advance the original
393 // parameter for the regex step later.
394 path += token->length();
399 // If there's anything left, we try for a regex match on the rest of the path minus the query string.
402 sep = path2.find('?');
403 if (sep != string::npos)
404 path2 = path2.substr(0, sep);
406 for (vector< pair< boost::shared_ptr<RegularExpression>,boost::shared_ptr<Override> > >::const_iterator re = o->m_regexps.begin(); re != o->m_regexps.end(); ++re) {
407 if (re->first->matches(path2.c_str())) {
408 o = re->second.get();
414 // Finally, check for query string matches. This is another "unrolled" recursive descent in a loop.
415 // To avoid consuming any POST data, we use a dedicated CGIParser.
416 if (!o->m_queries.empty()) {
418 CGIParser cgi(request, true);
421 for (vector< tuple< string,boost::shared_ptr<RegularExpression>,boost::shared_ptr<Override> > >::const_iterator q = o->m_queries.begin(); !descended && q != o->m_queries.end(); ++q) {
422 pair<CGIParser::walker,CGIParser::walker> vals = cgi.getParameters(q->get<0>().c_str());
423 if (vals.first != vals.second) {
425 // We have to match one of the values.
426 while (vals.first != vals.second) {
427 if (q->get<1>()->matches(vals.first->second)) {
428 o = q->get<2>().get();
436 // The simple presence of the parameter is sufficient to match.
437 o = q->get<2>().get();
448 XMLRequestMapperImpl::XMLRequestMapperImpl(const DOMElement* e, Category& log) : m_document(nullptr)
451 xmltooling::NDC ndc("XMLRequestMapperImpl");
453 static const XMLCh _RequestMap[] = UNICODE_LITERAL_10(R,e,q,u,e,s,t,M,a,p);
455 if (e && !XMLHelper::isNodeNamed(e, SHIB2SPCONFIG_NS, _RequestMap))
456 throw ConfigurationException("XML RequestMapper requires conf:RequestMap at root of configuration.");
458 // Load the property set.
459 load(e, nullptr, this);
461 // Inject "default" app ID if not explicit.
462 if (!getString("applicationId").first)
463 setProperty("applicationId", "default");
465 // Load any AccessControl provider.
468 pair<bool,bool> unicodeAware = getBool("unicodeAware");
469 m_unicodeAware = (unicodeAware.first && unicodeAware.second);
471 // Loop over the HostRegex elements.
472 const DOMElement* host = XMLHelper::getFirstChildElement(e, HostRegex);
473 for (int i = 1; host; ++i, host = XMLHelper::getNextSiblingElement(host, HostRegex)) {
474 const XMLCh* n = host->getAttributeNS(nullptr,regex);
476 log.warn("Skipping HostRegex element (%d) with empty regex attribute", i);
480 boost::shared_ptr<Override> o(new Override(m_unicodeAware, host, log, this));
482 const XMLCh* flag = host->getAttributeNS(nullptr,ignoreCase);
484 boost::shared_ptr<RegularExpression> re(
485 new RegularExpression(n, (flag && (*flag==chLatin_f || *flag==chDigit_0)) ? &chNull : ignoreOption)
487 m_regexps.push_back(make_pair(re, o));
489 catch (XMLException& ex) {
490 auto_ptr_char tmp(ex.getMessage());
491 log.error("caught exception while parsing HostRegex regular expression (%d): %s", i, tmp.get());
494 log.debug("Added <HostRegex> mapping for %s", m_regexps.back().second->getString("regex").second);
497 // Loop over the Host elements.
498 host = XMLHelper::getFirstChildElement(e, Host);
499 for (int i = 1; host; ++i, host = XMLHelper::getNextSiblingElement(host, Host)) {
500 const XMLCh* n=host->getAttributeNS(nullptr,name);
502 log.warn("Skipping Host element (%d) with empty name attribute", i);
506 boost::shared_ptr<Override> o(new Override(m_unicodeAware, host, log, this));
507 pair<bool,const char*> name=o->getString("name");
508 pair<bool,const char*> scheme=o->getString("scheme");
509 pair<bool,const char*> port=o->getString("port");
511 string dup(name.first ? name.second : "");
514 if (!scheme.first && port.first) {
515 // No scheme, but a port, so assume http.
516 scheme = pair<bool,const char*>(true,"http");
518 else if (scheme.first && !port.first) {
519 // Scheme, no port, so default it.
520 // XXX Use getservbyname instead?
522 if (!strcmp(scheme.second,"http"))
524 else if (!strcmp(scheme.second,"https"))
526 else if (!strcmp(scheme.second,"ftp"))
528 else if (!strcmp(scheme.second,"ldap"))
530 else if (!strcmp(scheme.second,"ldaps"))
535 string url(scheme.second);
536 url=url + "://" + dup;
538 // Is this the default port?
539 if ((!strcmp(scheme.second,"http") && !strcmp(port.second,"80")) ||
540 (!strcmp(scheme.second,"https") && !strcmp(port.second,"443")) ||
541 (!strcmp(scheme.second,"ftp") && !strcmp(port.second,"21")) ||
542 (!strcmp(scheme.second,"ldap") && !strcmp(port.second,"389")) ||
543 (!strcmp(scheme.second,"ldaps") && !strcmp(port.second,"636"))) {
544 // First store a port-less version.
545 if (m_map.count(url)) {
546 log.warn("Skipping duplicate Host element (%s)",url.c_str());
550 log.debug("Added <Host> mapping for %s",url.c_str());
552 // Now append the port. The shared_ptr should refcount the Override to avoid double deletes.
553 url=url + ':' + port.second;
555 log.debug("Added <Host> mapping for %s",url.c_str());
558 url=url + ':' + port.second;
559 if (m_map.count(url)) {
560 log.warn("Skipping duplicate Host element (%s)",url.c_str());
564 log.debug("Added <Host> mapping for %s",url.c_str());
568 // No scheme or port, so we enter dual hosts on http:80 and https:443
569 string url("http://");
571 if (m_map.count(url)) {
572 log.warn("Skipping duplicate Host element (%s)",url.c_str());
576 log.debug("Added <Host> mapping for %s",url.c_str());
579 if (m_map.count(url)) {
580 log.warn("Skipping duplicate Host element (%s)",url.c_str());
584 log.debug("Added <Host> mapping for %s",url.c_str());
586 url = "https://" + dup;
587 if (m_map.count(url)) {
588 log.warn("Skipping duplicate Host element (%s)",url.c_str());
592 log.debug("Added <Host> mapping for %s",url.c_str());
595 if (m_map.count(url)) {
596 log.warn("Skipping duplicate Host element (%s)",url.c_str());
600 log.debug("Added <Host> mapping for %s",url.c_str());
605 const Override* XMLRequestMapperImpl::findOverride(const char* vhost, const HTTPRequest& request) const
607 const Override* o = nullptr;
608 map< string,boost::shared_ptr<Override> >::const_iterator i = m_map.find(vhost);
609 if (i != m_map.end())
612 for (vector< pair< boost::shared_ptr<RegularExpression>,boost::shared_ptr<Override> > >::const_iterator re = m_regexps.begin(); !o && re != m_regexps.end(); ++re) {
613 if (re->first->matches(vhost))
618 return o ? o->locate(request) : this;
621 pair<bool,DOMElement*> XMLRequestMapper::background_load()
623 // Load from source using base class.
624 pair<bool,DOMElement*> raw = ReloadableXMLFile::load();
626 // If we own it, wrap it.
627 XercesJanitor<DOMDocument> docjanitor(raw.first ? raw.second->getOwnerDocument() : nullptr);
629 scoped_ptr<XMLRequestMapperImpl> impl(new XMLRequestMapperImpl(raw.second, m_log));
631 // If we held the document, transfer it to the impl. If we didn't, it's a no-op.
632 impl->setDocument(docjanitor.release());
634 // Perform the swap inside a lock.
637 SharedLock locker(m_lock, false);
640 return make_pair(false,(DOMElement*)nullptr);
643 RequestMapper::Settings XMLRequestMapper::getSettings(const HTTPRequest& request) const
646 string normalizedhost(request.getHostname());
647 to_lower(normalizedhost);
648 string vhost = string(request.getScheme()) + "://" + normalizedhost + ':' + lexical_cast<string>(request.getPort());
649 const Override* o = m_impl->findOverride(vhost.c_str(), request);
650 return Settings(o, o->getAC());
652 catch (XMLException& ex) {
653 auto_ptr_char tmp(ex.getMessage());
654 m_log.error("caught exception while locating content settings: %s", tmp.get());
655 throw ConfigurationException("XML-based RequestMapper failed to retrieve content settings.");