From 04302f1b6d2b753b00ff2bb99b20a89488f2f13d Mon Sep 17 00:00:00 2001 From: Scott Cantor Date: Wed, 30 Dec 2009 03:58:16 +0000 Subject: [PATCH] https://issues.shibboleth.net/jira/browse/CPPXT-52 --- .cproject | 32 +++++----- schemas/xmltooling.xsd | 5 +- xmltooling/io/HTTPResponse.h | 1 + xmltooling/util/CurlURLInputStream.cpp | 113 +++++++++++++++++++++++++++++---- xmltooling/util/CurlURLInputStream.h | 18 ++++-- xmltooling/util/ParserPool.cpp | 18 ++++-- xmltooling/util/ParserPool.h | 13 +++- xmltooling/util/ReloadableXMLFile.cpp | 30 ++++++++- xmltooling/util/ReloadableXMLFile.h | 5 +- 9 files changed, 189 insertions(+), 46 deletions(-) diff --git a/.cproject b/.cproject index b7338db..23da8f4 100644 --- a/.cproject +++ b/.cproject @@ -1,4 +1,7 @@ - + + + + @@ -34,19 +37,6 @@ - - - - - - - - - - - - - @@ -76,7 +66,19 @@ - + + + + + + + + + + + + + diff --git a/schemas/xmltooling.xsd b/schemas/xmltooling.xsd index 05a67cb..279a13c 100644 --- a/schemas/xmltooling.xsd +++ b/schemas/xmltooling.xsd @@ -2,7 +2,8 @@ + elementFormDefault="qualified" + version="1.4"> @@ -21,4 +22,6 @@ + + diff --git a/xmltooling/io/HTTPResponse.h b/xmltooling/io/HTTPResponse.h index 912c0f8..3810cf3 100644 --- a/xmltooling/io/HTTPResponse.h +++ b/xmltooling/io/HTTPResponse.h @@ -84,6 +84,7 @@ namespace xmltooling { enum status_t { XMLTOOLING_HTTP_STATUS_OK = 200, XMLTOOLING_HTTP_STATUS_MOVED = 302, + XMLTOOLING_HTTP_STATUS_NOTMODIFIED = 304, XMLTOOLING_HTTP_STATUS_UNAUTHORIZED = 401, XMLTOOLING_HTTP_STATUS_FORBIDDEN = 403, XMLTOOLING_HTTP_STATUS_NOTFOUND = 404, diff --git a/xmltooling/util/CurlURLInputStream.cpp b/xmltooling/util/CurlURLInputStream.cpp index 1e21ea8..88219f2 100644 --- a/xmltooling/util/CurlURLInputStream.cpp +++ b/xmltooling/util/CurlURLInputStream.cpp @@ -24,6 +24,7 @@ #include "internal.h" #include +#include #include #include @@ -39,6 +40,7 @@ using namespace xmltooling; using namespace xercesc; +using namespace std; namespace { static const XMLCh _CURL[] = UNICODE_LITERAL_4(C,U,R,L); @@ -63,13 +65,48 @@ namespace { return CURLE_OK; } + + size_t curl_header_hook(void* ptr, size_t size, size_t nmemb, void* stream) + { + // only handle single-byte data + if (size!=1 || nmemb<5 || !stream) + return nmemb; + string* cacheTag = reinterpret_cast(stream); + const char* hdr = reinterpret_cast(ptr); + if (strncmp(hdr, "ETag:", 5) == 0) { + hdr += 5; + size_t remaining = nmemb - 5; + // skip leading spaces + while (remaining > 0) { + if (*hdr == ' ') { + ++hdr; + --remaining; + continue; + } + break; + } + // append until whitespace + while (remaining > 0) { + if (!isspace(*hdr)) { + (*cacheTag) += *hdr++; + --remaining; + continue; + } + break; + } + } + + return nmemb; + } } -CurlURLInputStream::CurlURLInputStream(const char* url) +CurlURLInputStream::CurlURLInputStream(const char* url, string* cacheTag) : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream")) - , fURL(url) + , fCacheTag(cacheTag) + , fURL(url ? url : "") , fMulti(0) , fEasy(0) + , fHeaders(0) , fTotalBytesRead(0) , fWritePtr(0) , fBytesRead(0) @@ -78,14 +115,19 @@ CurlURLInputStream::CurlURLInputStream(const char* url) , fBufferHeadPtr(fBuffer) , fBufferTailPtr(fBuffer) , fContentType(0) + , fStatusCode(200) { + if (fURL.empty()) + throw IOException("No URL supplied to CurlURLInputStream constructor."); init(); } -CurlURLInputStream::CurlURLInputStream(const XMLCh* url) +CurlURLInputStream::CurlURLInputStream(const XMLCh* url, string* cacheTag) : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream")) + , fCacheTag(cacheTag) , fMulti(0) , fEasy(0) + , fHeaders(0) , fTotalBytesRead(0) , fWritePtr(0) , fBytesRead(0) @@ -94,16 +136,23 @@ CurlURLInputStream::CurlURLInputStream(const XMLCh* url) , fBufferHeadPtr(fBuffer) , fBufferTailPtr(fBuffer) , fContentType(0) + , fStatusCode(200) { - auto_ptr_char temp(url); - fURL = temp.get(); + if (url) { + auto_ptr_char temp(url); + fURL = temp.get(); + } + if (fURL.empty()) + throw IOException("No URL supplied to CurlURLInputStream constructor."); init(); } -CurlURLInputStream::CurlURLInputStream(const DOMElement* e) +CurlURLInputStream::CurlURLInputStream(const DOMElement* e, string* cacheTag) : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream")) + , fCacheTag(cacheTag) , fMulti(0) , fEasy(0) + , fHeaders(0) , fTotalBytesRead(0) , fWritePtr(0) , fBytesRead(0) @@ -112,6 +161,7 @@ CurlURLInputStream::CurlURLInputStream(const DOMElement* e) , fBufferHeadPtr(fBuffer) , fBufferTailPtr(fBuffer) , fContentType(0) + , fStatusCode(200) { const XMLCh* attr = e->getAttributeNS(NULL, url); if (!attr || !*attr) { @@ -140,6 +190,10 @@ CurlURLInputStream::~CurlURLInputStream() curl_multi_cleanup(fMulti); } + if (fHeaders) { + curl_slist_free_all(fHeaders); + } + XMLString::release(&fContentType); } @@ -183,6 +237,20 @@ void CurlURLInputStream::init(const DOMElement* e) fError[0] = 0; curl_easy_setopt(fEasy, CURLOPT_ERRORBUFFER, fError); + // Check for cache tag. + if (fCacheTag) { + // Outgoing tag. + if (!fCacheTag->empty()) { + string hdr("If-None-Match: "); + hdr += *fCacheTag; + fHeaders = curl_slist_append(fHeaders, hdr.c_str()); + curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, fHeaders); + } + // Incoming tag. + curl_easy_setopt(fEasy, CURLOPT_HEADERFUNCTION, curl_header_hook); + curl_easy_setopt(fEasy, CURLOPT_HEADERDATA, fCacheTag); + } + if (e) { const XMLCh* flag = e->getAttributeNS(NULL, verifyHost); if (flag && (*flag == chLatin_f || *flag == chDigit_0)) @@ -239,22 +307,37 @@ void CurlURLInputStream::init(const DOMElement* e) try { readMore(&runningHandles); } - catch (XMLException& ex) { + catch (XMLException&) { curl_multi_remove_handle(fMulti, fEasy); curl_easy_cleanup(fEasy); fEasy = NULL; curl_multi_cleanup(fMulti); fMulti = NULL; - auto_ptr_char msg(ex.getMessage()); - throw IOException(msg.get()); + throw; } if(runningHandles == 0) break; } + // Check for a response code. + if (curl_easy_getinfo(fEasy, CURLINFO_RESPONSE_CODE, &fStatusCode) == CURLE_OK) { + if (fStatusCode >= 300 ) { + // Short-circuit usual processing by storing a special XML document in the buffer. + ostringstream specialdoc; + specialdoc << '<' << URLInputSource::asciiStatusCodeElementName << " xmlns=\"http://www.opensaml.org/xmltooling\">" + << fStatusCode + << "'; + string specialxml = specialdoc.str(); + memcpy(fBuffer, specialxml.c_str(), specialxml.length()); + fBufferHeadPtr += specialxml.length(); + } + } + else { + fStatusCode = 200; // reset to 200 to ensure no special processing occurs + } + // Find the content type char* contentType8 = NULL; - curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8); - if(contentType8) + if(curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8) == CURLE_OK && contentType8) fContentType = XMLString::transcode(contentType8); } @@ -334,6 +417,10 @@ bool CurlURLInputStream::readMore(int* runningHandles) ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str()); break; + case CURLE_OPERATION_TIMEDOUT: + ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str()); + break; + case CURLE_RECV_ERROR: ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURL.c_str()); break; @@ -404,6 +491,10 @@ xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t continue; } + // Check for a non-2xx status that means to ignore the curl response. + if (fStatusCode >= 300) + break; + // Ask the curl to do some work int runningHandles = 0; tryAgain = readMore(&runningHandles); diff --git a/xmltooling/util/CurlURLInputStream.h b/xmltooling/util/CurlURLInputStream.h index 53b784f..a2be81e 100644 --- a/xmltooling/util/CurlURLInputStream.h +++ b/xmltooling/util/CurlURLInputStream.h @@ -43,16 +43,18 @@ namespace xmltooling { /** * Constructor. * - * @param url the URL of the resource to fetch + * @param url the URL of the resource to fetch + * @param cacheTag optional pointer to string used for cache management */ - CurlURLInputStream(const char* url); + CurlURLInputStream(const char* url, std::string* cacheTag=NULL); /** * Constructor. * - * @param url the URL of the resource to fetch + * @param url the URL of the resource to fetch + * @param cacheTag optional pointer to string used for cache management */ - CurlURLInputStream(const XMLCh* url); + CurlURLInputStream(const XMLCh* url, std::string* cacheTag=NULL); /** * Constructor taking a DOM element supporting the following content: @@ -66,9 +68,10 @@ namespace xmltooling { *
<TransportOption provider="CURL" option="150">0</TransportOption>
* * - * @param e DOM to supply configuration + * @param e DOM to supply configuration + * @param cacheTag optional pointer to string used for cache management */ - CurlURLInputStream(const xercesc::DOMElement* e); + CurlURLInputStream(const xercesc::DOMElement* e, std::string* cacheTag=NULL); ~CurlURLInputStream(); @@ -101,11 +104,13 @@ namespace xmltooling { bool readMore(int *runningHandles); logging::Category& fLog; + std::string* fCacheTag; std::string fURL; std::vector fSavedOptions; CURLM* fMulti; CURL* fEasy; + struct curl_slist* fHeaders; unsigned long fTotalBytesRead; XMLByte* fWritePtr; @@ -120,6 +125,7 @@ namespace xmltooling { XMLByte* fBufferTailPtr; XMLCh* fContentType; + long fStatusCode; char fError[CURL_ERROR_SIZE]; }; diff --git a/xmltooling/util/ParserPool.cpp b/xmltooling/util/ParserPool.cpp index 6da2911..39d6369 100644 --- a/xmltooling/util/ParserPool.cpp +++ b/xmltooling/util/ParserPool.cpp @@ -496,11 +496,11 @@ xsecsize_t StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toF #ifdef XMLTOOLING_LITE -URLInputSource::URLInputSource(const XMLCh* url, const char* systemId) : InputSource(systemId), m_url(url) +URLInputSource::URLInputSource(const XMLCh* url, const char* systemId, string* cacheTag) : InputSource(systemId), m_url(url) { } -URLInputSource::URLInputSource(const DOMElement* e, const char* systemId) : InputSource(systemId) +URLInputSource::URLInputSource(const DOMElement* e, const char* systemId, string* cacheTag) : InputSource(systemId) { static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i); static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l); @@ -523,19 +523,23 @@ BinInputStream* URLInputSource::makeStream() const #else -URLInputSource::URLInputSource(const XMLCh* url, const char* systemId) - : InputSource(systemId), m_url(url), m_root(NULL) +URLInputSource::URLInputSource(const XMLCh* url, const char* systemId, string* cacheTag) + : InputSource(systemId), m_cacheTag(cacheTag), m_url(url), m_root(NULL) { } -URLInputSource::URLInputSource(const DOMElement* e, const char* systemId) - : InputSource(systemId), m_root(e) +URLInputSource::URLInputSource(const DOMElement* e, const char* systemId, string* cacheTag) + : InputSource(systemId), m_cacheTag(cacheTag), m_root(e) { } BinInputStream* URLInputSource::makeStream() const { - return m_root ? new CurlURLInputStream(m_root) : new CurlURLInputStream(m_url.get()); + return m_root ? new CurlURLInputStream(m_root, m_cacheTag) : new CurlURLInputStream(m_url.get(), m_cacheTag); } #endif + +const char URLInputSource::asciiStatusCodeElementName[] = "URLInputSourceStatus"; + +const XMLCh URLInputSource::utf16StatusCodeElementName[] = UNICODE_LITERAL_20(U,R,L,I,n,p,u,t,S,o,u,r,c,e,S,t,a,t,u,s); diff --git a/xmltooling/util/ParserPool.h b/xmltooling/util/ParserPool.h index 87ba21e..004f7c7 100644 --- a/xmltooling/util/ParserPool.h +++ b/xmltooling/util/ParserPool.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -224,8 +225,9 @@ namespace xmltooling { * * @param url source of input * @param systemId optional system identifier to attach to the source + * @param cacheTag optional pointer to string used for cache management */ - URLInputSource(const XMLCh* url, const char* systemId=NULL); + URLInputSource(const XMLCh* url, const char* systemId=NULL, std::string* cacheTag=NULL); /** * Constructor taking a DOM element supporting the following content: @@ -241,17 +243,24 @@ namespace xmltooling { * * @param e DOM to supply configuration * @param systemId optional system identifier to attach to the source + * @param cacheTag optional pointer to string used for cache management */ - URLInputSource(const xercesc::DOMElement* e, const char* systemId=NULL); + URLInputSource(const xercesc::DOMElement* e, const char* systemId=NULL, std::string* cacheTag=NULL); /// @cond off virtual xercesc::BinInputStream* makeStream() const; /// @endcond + /** Element name used to signal a non-successful response when fetching a remote document. */ + static const char asciiStatusCodeElementName[]; + + /** Element name used to signal a non-successful response when fetching a remote document. */ + static const XMLCh utf16StatusCodeElementName[]; private: #ifdef XMLTOOLING_LITE xercesc::XMLURL m_url; #else + std::string* m_cacheTag; xmltooling::auto_ptr_char m_url; const xercesc::DOMElement* m_root; #endif diff --git a/xmltooling/util/ReloadableXMLFile.cpp b/xmltooling/util/ReloadableXMLFile.cpp index 726ad74..03a8c90 100644 --- a/xmltooling/util/ReloadableXMLFile.cpp +++ b/xmltooling/util/ReloadableXMLFile.cpp @@ -21,6 +21,7 @@ */ #include "internal.h" +#include "io/HTTPResponse.h" #include "util/NDC.h" #include "util/PathResolver.h" #include "util/ReloadableXMLFile.h" @@ -173,12 +174,25 @@ pair ReloadableXMLFile::load(bool backup) doc=XMLToolingConfig::getConfig().getParser().parse(dsrc); } else { - URLInputSource src(m_root); + URLInputSource src(m_root, NULL, &m_cacheTag); Wrapper4InputSource dsrc(&src,false); if (m_validate) doc=XMLToolingConfig::getConfig().getValidatingParser().parse(dsrc); else doc=XMLToolingConfig::getConfig().getParser().parse(dsrc); + + // Check for a response code signal. + if (XMLHelper::isNodeNamed(doc->getDocumentElement(), xmlconstants::XMLTOOLING_NS, URLInputSource::utf16StatusCodeElementName)) { + int responseCode = XMLString::parseInt(doc->getDocumentElement()->getFirstChild()->getNodeValue()); + doc->release(); + if (responseCode == HTTPResponse::XMLTOOLING_HTTP_STATUS_NOTMODIFIED) { + throw responseCode; // toss out as a "known" case to handle gracefully + } + else { + m_log.warn("remote resource fetch returned atypical status code (%d)", responseCode); + throw IOException("remote resource fetch failed, check log for status code of response"); + } + } } m_log.infoStream() << "loaded XML resource (" << (backup ? m_backing : m_source) << ")" << logging::eol; @@ -206,7 +220,7 @@ pair ReloadableXMLFile::load(bool backup) throw XMLParserException(msg.get()); } catch (exception& e) { - m_log.errorStream() << "error while loading configuration from (" + m_log.errorStream() << "error while loading resource (" << (m_source.empty() ? "inline" : (backup ? m_backing : m_source)) << "): " << e.what() << logging::eol; if (!backup && !m_backing.empty()) return load(true); @@ -282,7 +296,17 @@ Lockable* ReloadableXMLFile::lock() pair ret=load(); if (ret.first) ret.second->getOwnerDocument()->release(); - } catch (exception& ex) { + } + catch (int& ex) { + if (ex == HTTPResponse::XMLTOOLING_HTTP_STATUS_NOTMODIFIED) { + m_log.info("remote resource (%s) unchanged from cached version", m_source.c_str()); + } + else { + // Shouldn't happen, we should only get codes intended to be gracefully handled. + m_log.crit("maintaining existing configuration, remote resource fetch returned atypical status code (%d)", ex); + } + } + catch (exception& ex) { m_log.crit("maintaining existing configuration, error reloading resource (%s): %s", m_source.c_str(), ex.what()); } diff --git a/xmltooling/util/ReloadableXMLFile.h b/xmltooling/util/ReloadableXMLFile.h index fb66882..8ae62cf 100644 --- a/xmltooling/util/ReloadableXMLFile.h +++ b/xmltooling/util/ReloadableXMLFile.h @@ -90,7 +90,7 @@ namespace xmltooling { /** Resource location, may be a local path or a URI. */ std::string m_source; - /** Path to backup copy for remote resources. */ + /** Path to backup copy for remote resource. */ std::string m_backing; /** Last modification of local resource or reload of remote resource. */ @@ -99,6 +99,9 @@ namespace xmltooling { /** Time in seconds to wait before trying for new copy of remote resource. */ time_t m_reloadInterval; + /** Caching tag associated with remote resource. */ + std::string m_cacheTag; + /** Shared lock for guarding reloads. */ RWLock* m_lock; -- 2.1.4