From c9a063e554871e212edc46a1fd9ae865cbc9da9e Mon Sep 17 00:00:00 2001 From: Scott Cantor Date: Wed, 4 Feb 2009 04:15:57 +0000 Subject: [PATCH] Rework support for libcurl-based input to parser. --- xmltooling/Makefile.am | 4 +- xmltooling/util/CurlNetAccessor.cpp | 59 ------ xmltooling/util/CurlNetAccessor.hpp | 68 ------ xmltooling/util/CurlURLInputStream.cpp | 372 +++++++++++++++++++++++++++------ xmltooling/util/CurlURLInputStream.h | 125 +++++++++++ xmltooling/util/CurlURLInputStream.hpp | 95 --------- xmltooling/util/ParserPool.cpp | 47 +++++ xmltooling/util/ParserPool.h | 46 ++++ xmltooling/util/ReloadableXMLFile.cpp | 7 +- xmltooling/util/ReloadableXMLFile.h | 6 +- xmltooling/xmltooling.vcproj | 10 +- 11 files changed, 540 insertions(+), 299 deletions(-) delete mode 100644 xmltooling/util/CurlNetAccessor.cpp delete mode 100644 xmltooling/util/CurlNetAccessor.hpp create mode 100644 xmltooling/util/CurlURLInputStream.h delete mode 100644 xmltooling/util/CurlURLInputStream.hpp diff --git a/xmltooling/Makefile.am b/xmltooling/Makefile.am index 6096ce1..15e4378 100644 --- a/xmltooling/Makefile.am +++ b/xmltooling/Makefile.am @@ -100,8 +100,7 @@ soapinclude_HEADERS = \ soap/OpenSSLSOAPTransport.h utilinclude_HEADERS = \ - util/CurlNetAccessor.hpp \ - util/CurlURLInputStream.hpp \ + util/CurlURLInputStream.h \ util/DateTime.h \ util/NDC.h \ util/ParserPool.h \ @@ -151,7 +150,6 @@ xmlsec_sources = \ signature/impl/SignatureValidator.cpp \ signature/impl/XMLSecSignatureImpl.cpp \ soap/impl/CURLSOAPTransport.cpp \ - util/CurlNetAccessor.cpp \ util/CurlURLInputStream.cpp \ util/ReplayCache.cpp \ util/StorageService.cpp diff --git a/xmltooling/util/CurlNetAccessor.cpp b/xmltooling/util/CurlNetAccessor.cpp deleted file mode 100644 index 1774259..0000000 --- a/xmltooling/util/CurlNetAccessor.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * $Id$ - */ - -#include "internal.h" - -#include -#include -#include -#include -#include -#include - -using namespace xmltooling; -using namespace xercesc; - -const XMLCh xmltooling::CurlNetAccessor::fgMyName[] = -{ - chLatin_C, chLatin_u, chLatin_r, chLatin_l, chLatin_N, chLatin_e, - chLatin_t, chLatin_A, chLatin_c, chLatin_c, chLatin_e, chLatin_s, - chLatin_s, chLatin_o, chLatin_r, chNull -}; - - -CurlNetAccessor::CurlNetAccessor() -{ -} - - -CurlNetAccessor::~CurlNetAccessor() -{ -} - -BinInputStream* -CurlNetAccessor::makeNew(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/) -{ - // Just create a CurlURLInputStream - // We defer any checking of the url type for curl in CurlURLInputStream - CurlURLInputStream* retStrm = - new (urlSource.getMemoryManager()) CurlURLInputStream(urlSource, httpInfo); - return retStrm; -} diff --git a/xmltooling/util/CurlNetAccessor.hpp b/xmltooling/util/CurlNetAccessor.hpp deleted file mode 100644 index 72ab0dd..0000000 --- a/xmltooling/util/CurlNetAccessor.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * $Id$ - */ - -#if !defined(XERCESC_INCLUDE_GUARD_CURLNETACCESSOR_HPP) && !defined(XMLTOOLING_LITE) -#define XERCESC_INCLUDE_GUARD_CURLNETACCESSOR_HPP - -#include - -#include -#include -#include -#include - -namespace xmltooling { - -// -// This class is the wrapper for the socket based code which -// provides the ability to fetch a resource specified using -// a HTTP or FTP URL. -// - -class XMLTOOL_API CurlNetAccessor : public xercesc::XMLNetAccessor -{ -public : - CurlNetAccessor(); - ~CurlNetAccessor(); - - virtual xercesc::BinInputStream* makeNew(const xercesc::XMLURL& urlSource, const xercesc::XMLNetHTTPInfo* httpInfo=0); - virtual const XMLCh* getId() const; - -private : - static const XMLCh fgMyName[]; - - CurlNetAccessor(const CurlNetAccessor&); - CurlNetAccessor& operator=(const CurlNetAccessor&); - -}; // CurlNetAccessor - - -inline const XMLCh* CurlNetAccessor::getId() const -{ - return fgMyName; -} - - -}; - -#endif // CURLNETACCESSOR_HPP - - diff --git a/xmltooling/util/CurlURLInputStream.cpp b/xmltooling/util/CurlURLInputStream.cpp index fc716c1..59ad097 100644 --- a/xmltooling/util/CurlURLInputStream.cpp +++ b/xmltooling/util/CurlURLInputStream.cpp @@ -15,13 +15,16 @@ * limitations under the License. */ -/* - * $Id$ +/** + * xmltooling/util/CurlURLInputStream.cpp + * + * Asynchronous use of curl to fetch data from a URL. */ #include "internal.h" -#include +#include +#include #include #include @@ -33,72 +36,256 @@ #include #include -#include - using namespace xmltooling; using namespace xercesc; -CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/) - : fMemoryManager(urlSource.getMemoryManager()) - , fURLSource(urlSource) - , fURL(0) - , fInputStream(NULL) - , m_log(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.NetAccessor")) +namespace { + static const XMLCh _CURL[] = UNICODE_LITERAL_4(C,U,R,L); + static const XMLCh _option[] = UNICODE_LITERAL_6(o,p,t,i,o,n); + static const XMLCh _provider[] = UNICODE_LITERAL_8(p,r,o,v,i,d,e,r); + static const XMLCh TransportOption[] = UNICODE_LITERAL_15(T,r,a,n,s,p,o,r,t,O,p,t,i,o,n); + static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i); + static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l); + static const XMLCh verifyHost[] = UNICODE_LITERAL_10(v,e,r,i,f,y,H,o,s,t); +} + +CurlURLInputStream::CurlURLInputStream(const char* url) + : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream")) + , fURL(url) + , fMulti(0) + , fEasy(0) + , fTotalBytesRead(0) + , fWritePtr(0) + , fBytesRead(0) + , fBytesToRead(0) + , fDataAvailable(false) + , fBufferHeadPtr(fBuffer) + , fBufferTailPtr(fBuffer) + , fContentType(0) { - // Get the text of the URL we're going to use - fURL.reset(XMLString::transcode(fURLSource.getURLText(), fMemoryManager), fMemoryManager); + init(); } +CurlURLInputStream::CurlURLInputStream(const XMLCh* url) + : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream")) + , fMulti(0) + , fEasy(0) + , fTotalBytesRead(0) + , fWritePtr(0) + , fBytesRead(0) + , fBytesToRead(0) + , fDataAvailable(false) + , fBufferHeadPtr(fBuffer) + , fBufferTailPtr(fBuffer) + , fContentType(0) +{ + auto_ptr_char temp(url); + fURL = temp.get(); + init(); +} + +CurlURLInputStream::CurlURLInputStream(const DOMElement* e) + : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream")) + , fMulti(0) + , fEasy(0) + , fTotalBytesRead(0) + , fWritePtr(0) + , fBytesRead(0) + , fBytesToRead(0) + , fDataAvailable(false) + , fBufferHeadPtr(fBuffer) + , fBufferTailPtr(fBuffer) + , fContentType(0) +{ + const XMLCh* attr = e->getAttributeNS(NULL, url); + if (!attr || !*attr) { + attr = e->getAttributeNS(NULL, uri); + if (!attr || !*attr) + throw IOException("No URL supplied via DOM to CurlURLInputStream constructor."); + } + + auto_ptr_char temp(attr); + fURL = temp.get(); + init(e); +} CurlURLInputStream::~CurlURLInputStream() { - delete fInputStream; + if (fEasy) { + // Remove the easy handle from the multi stack + curl_multi_remove_handle(fMulti, fEasy); + + // Cleanup the easy handle + curl_easy_cleanup(fEasy); + } + + if (fMulti) { + // Cleanup the multi handle + curl_multi_cleanup(fMulti); + } + + XMLString::release(&fContentType); } +void CurlURLInputStream::init(const DOMElement* e) +{ + // Allocate the curl multi handle + fMulti = curl_multi_init(); + + // Allocate the curl easy handle + fEasy = curl_easy_init(); + + if (!fMulti || !fEasy) + throw IOException("Failed to allocate libcurl handles."); + + curl_easy_setopt(fEasy, CURLOPT_URL, fURL.c_str()); + + // Set up a way to recieve the data + curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function + curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function + + // Do redirects + curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, 1); + curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, 6); + + // Default settings. + curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT,15); + curl_easy_setopt(fEasy, CURLOPT_TIMEOUT,30); + curl_easy_setopt(fEasy, CURLOPT_HTTPAUTH,0); + curl_easy_setopt(fEasy, CURLOPT_USERPWD,NULL); + curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 2); + curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(fEasy, CURLOPT_SSL_CIPHER_LIST, "ALL:!aNULL:!LOW:!EXPORT:!SSLv2"); + curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1); + curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1); + + fError[0] = 0; + curl_easy_setopt(fEasy, CURLOPT_ERRORBUFFER, fError); + + if (e) { + const XMLCh* flag = e->getAttributeNS(NULL, verifyHost); + if (flag && (*flag == chLatin_f || *flag == chDigit_0)) + curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 2); + + // Process TransportOption elements. + bool success; + DOMElement* child = XMLHelper::getLastChildElement(e, TransportOption); + while (child) { + if (child->hasChildNodes() && XMLString::equals(child->getAttributeNS(NULL,_provider), _CURL)) { + auto_ptr_char option(child->getAttributeNS(NULL,_option)); + auto_ptr_char value(child->getFirstChild()->getNodeValue()); + if (option.get() && *option.get() && value.get() && *value.get()) { + // For libcurl, the option is an enum and the value type depends on the option. + CURLoption opt = static_cast(strtol(option.get(), NULL, 10)); + if (opt < CURLOPTTYPE_OBJECTPOINT) + success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), NULL, 10)) == CURLE_OK); +#ifdef CURLOPTTYPE_OFF_T + else if (opt < CURLOPTTYPE_OFF_T) + success = (curl_easy_setopt(fEasy, opt, value.get()) == CURLE_OK); + else if (sizeof(curl_off_t) == sizeof(long)) + success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), NULL, 10)) == CURLE_OK); + else + success = false; +#else + else + success = (curl_easy_setopt(fEasy, opt, value.get()) == CURLE_OK); +#endif + if (!success) + fLog.error("failed to set transport option (%s)", option.get()); + } + } + child = XMLHelper::getPreviousSiblingElement(child, TransportOption); + } + } + + // Add easy handle to the multi stack + curl_multi_add_handle(fMulti, fEasy); + + fLog.debug("libcurl trying to fetch %s", fURL.c_str()); + + // Start reading, to get the content type + while(fBufferHeadPtr == fBuffer) { + int runningHandles = 0; + try { + readMore(&runningHandles); + } + catch (XMLException& ex) { + curl_multi_remove_handle(fMulti, fEasy); + curl_easy_cleanup(fEasy); + fEasy = NULL; + curl_multi_cleanup(fMulti); + fMulti = NULL; + auto_ptr_char msg(ex.getMessage()); + throw IOException(msg.get()); + } + if(runningHandles == 0) break; + } -size_t CurlURLInputStream::staticWriteCallback(void* ptr, size_t size, size_t nmemb, void* stream) + // Find the content type + char* contentType8 = NULL; + curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8); + if(contentType8) + fContentType = XMLString::transcode(contentType8); +} + + +size_t CurlURLInputStream::staticWriteCallback(char* buffer, size_t size, size_t nitems, void* outstream) { - size_t len = size*nmemb; - reinterpret_cast(stream)->write(reinterpret_cast(ptr),len); - return len; + return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems); } +size_t CurlURLInputStream::writeCallback(char* buffer, size_t size, size_t nitems) +{ + size_t cnt = size * nitems; + size_t totalConsumed = 0; -xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead) + // Consume as many bytes as possible immediately into the buffer + size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt; + memcpy(fWritePtr, buffer, consume); + fWritePtr += consume; + fBytesRead += consume; + fTotalBytesRead += consume; + fBytesToRead -= consume; + + //fLog.debug("write callback consuming %d bytes", consume); + + // If bytes remain, rebuffer as many as possible into our holding buffer + buffer += consume; + totalConsumed += consume; + cnt -= consume; + if (cnt > 0) + { + size_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer); + consume = (cnt > bufAvail) ? bufAvail : cnt; + memcpy(fBufferHeadPtr, buffer, consume); + fBufferHeadPtr += consume; + buffer += consume; + totalConsumed += consume; + //fLog.debug("write callback rebuffering %d bytes", consume); + } + + // Return the total amount we've consumed. If we don't consume all the bytes + // then an error will be generated. Since our buffer size is equal to the + // maximum size that curl will write, this should never happen unless there + // is a logic error somewhere here. + return totalConsumed; +} + +bool CurlURLInputStream::readMore(int* runningHandles) { - if (!fInputStream) { - // Allocate the curl easy handle. - CURL* fEasy = curl_easy_init(); - if (!fEasy) - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, "unable to allocate libcurl handle", fMemoryManager); - - m_log.debug("libcurl trying to fetch %s", fURL.get()); - - // Set URL option - curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get()); - curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, &fUnderlyingStream); - curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); - curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 30); - curl_easy_setopt(fEasy, CURLOPT_TIMEOUT, 60); - curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0); - curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0); - curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1); - curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1); - curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1); - - char curl_errorbuf[CURL_ERROR_SIZE]; - curl_errorbuf[0]=0; - curl_easy_setopt(fEasy,CURLOPT_ERRORBUFFER,curl_errorbuf); - - // Fetch the data. - if (curl_easy_perform(fEasy) != CURLE_OK) { - curl_easy_cleanup(fEasy); - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, curl_errorbuf, fMemoryManager); - } + // Ask the curl to do some work + CURLMcode curlResult = curl_multi_perform(fMulti, runningHandles); - curl_easy_cleanup(fEasy); + // Process messages from curl + int msgsInQueue = 0; + for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; ) + { + //fLog.debug("msg %d, %d from curl", msg->msg, msg->data.result); + + if (msg->msg != CURLMSG_DONE) + return true; - /* switch (msg->data.result) { case CURLE_OK: @@ -106,31 +293,96 @@ xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t break; case CURLE_UNSUPPORTED_PROTOCOL: - ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager); + ThrowXML(MalformedURLException, XMLExcepts::URL_UnsupportedProto); break; case CURLE_COULDNT_RESOLVE_HOST: case CURLE_COULDNT_RESOLVE_PROXY: - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager); + ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURL.c_str()); break; case CURLE_COULDNT_CONNECT: - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager); + ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str()); + break; case CURLE_RECV_ERROR: - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager); + ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURL.c_str()); break; default: - m_log.error("curl NetAccessor encountered error from libcurl (%d)", msg->data.result); - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager); + fLog.error("error while fetching %s: (%d) %s", fURL.c_str(), msg->data.result, fError); + ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURL.c_str()); break; } - */ + } + + // If nothing is running any longer, bail out + if(*runningHandles == 0) + return false; + + // If there is no further data to read, and we haven't + // read any yet on this invocation, call select to wait for data + if (curlResult != CURLM_CALL_MULTI_PERFORM && fBytesRead == 0) + { + fd_set readSet; + fd_set writeSet; + fd_set exceptSet; + int fdcnt=0; + + FD_ZERO(&readSet); + FD_ZERO(&writeSet); + FD_ZERO(&exceptSet); + + // Ask curl for the file descriptors to wait on + curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt); + + // Wait on the file descriptors + timeval tv; + tv.tv_sec = 2; + tv.tv_usec = 0; + select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv); + } + + return curlResult == CURLM_CALL_MULTI_PERFORM; +} + +xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead) +{ + fBytesRead = 0; + fBytesToRead = maxToRead; + fWritePtr = toFill; + + for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); ) + { + // First, any buffered data we have available + size_t bufCnt = fBufferHeadPtr - fBufferTailPtr; + bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt; + if (bufCnt > 0) + { + memcpy(fWritePtr, fBufferTailPtr, bufCnt); + fWritePtr += bufCnt; + fBytesRead += bufCnt; + fTotalBytesRead += bufCnt; + fBytesToRead -= bufCnt; + + fBufferTailPtr += bufCnt; + if (fBufferTailPtr == fBufferHeadPtr) + fBufferHeadPtr = fBufferTailPtr = fBuffer; + + //fLog.debug("consuming %d buffered bytes", bufCnt); + + tryAgain = true; + continue; + } - fInputStream = new (fMemoryManager) StreamInputSource::StreamBinInputStream(fUnderlyingStream); + // Ask the curl to do some work + int runningHandles = 0; + tryAgain = readMore(&runningHandles); + + // If nothing is running any longer, bail out + if (runningHandles == 0) + break; } - // Defer to the stream wrapper. - return fInputStream->readBytes(toFill, maxToRead); + return fBytesRead; } diff --git a/xmltooling/util/CurlURLInputStream.h b/xmltooling/util/CurlURLInputStream.h new file mode 100644 index 0000000..e397331 --- /dev/null +++ b/xmltooling/util/CurlURLInputStream.h @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file xmltooling/util/CurlURLInputStream.h + * + * Asynchronous use of curl to fetch data from a URL. + */ + +#if !defined(__xmltooling_curlinstr_h__) && !defined(XMLTOOLING_LITE) +#define __xmltooling_curlinstr_h__ + +#include + +#include +#include + +namespace xmltooling { + + /** + * Adapted from Xerces-C as a more advanced input stream implementation + * for subsequent use in parsing remote documents. + */ + class XMLTOOL_API CurlURLInputStream : public xercesc::BinInputStream + { + public : + /** + * Constructor. + * + * @param url the URL of the resource to fetch + */ + CurlURLInputStream(const char* url); + + /** + * Constructor. + * + * @param url the URL of the resource to fetch + */ + CurlURLInputStream(const XMLCh* url); + + /** + * Constructor taking a DOM element supporting the following content: + * + *
+ *
uri | url
+ *
identifies the remote resource
+ *
verifyHost
+ *
true iff name of host should be matched against TLS/SSL certificate
+ *
TransportOption elements, like so:
+ *
<TransportOption provider="CURL" option="150">0</TransportOption>
+ *
+ * + * @param e DOM to supply configuration + */ + CurlURLInputStream(const xercesc::DOMElement* e); + + ~CurlURLInputStream(); + +#ifdef XMLTOOLING_XERCESC_64BITSAFE + XMLFilePos +#else + unsigned int +#endif + curPos() const { + return fTotalBytesRead; + } + +#ifdef XMLTOOLING_XERCESC_INPUTSTREAM_HAS_CONTENTTYPE + const XMLCh* getContentType() const { + return fContentType; + } +#endif + + xsecsize_t readBytes(XMLByte* const toFill, const xsecsize_t maxToRead); + + private : + CurlURLInputStream(const CurlURLInputStream&); + CurlURLInputStream& operator=(const CurlURLInputStream&); + + // libcurl callbacks for data read/write + static size_t staticWriteCallback(char *buffer, size_t size, size_t nitems, void *outstream); + size_t writeCallback(char *buffer, size_t size, size_t nitems); + + void init(const xercesc::DOMElement* e=NULL); + bool readMore(int *runningHandles); + + logging::Category& fLog; + std::string fURL; + + CURLM* fMulti; + CURL* fEasy; + + unsigned long fTotalBytesRead; + XMLByte* fWritePtr; + xsecsize_t fBytesRead; + xsecsize_t fBytesToRead; + bool fDataAvailable; + + // Overflow buffer for when curl writes more data to us + // than we've asked for. + XMLByte fBuffer[CURL_MAX_WRITE_SIZE]; + XMLByte* fBufferHeadPtr; + XMLByte* fBufferTailPtr; + + XMLCh* fContentType; + + char fError[CURL_ERROR_SIZE]; + }; +}; + +#endif // __xmltooling_curlinstr_h__ diff --git a/xmltooling/util/CurlURLInputStream.hpp b/xmltooling/util/CurlURLInputStream.hpp deleted file mode 100644 index 9b708c1..0000000 --- a/xmltooling/util/CurlURLInputStream.hpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * $Id$ - */ - -#if !defined(XERCESC_INCLUDE_GUARD_CURLURLINPUTSTREAM_HPP) && !defined(XMLTOOLING_LITE) -#define XERCESC_INCLUDE_GUARD_CURLURLINPUTSTREAM_HPP - -#include -#include - -#include - -#include -#include -#include -#include -#include - -namespace xmltooling { - -// -// This class implements the BinInputStream interface specified by the XML -// parser. -// - -class XMLTOOL_API CurlURLInputStream : public xercesc::BinInputStream -{ -public : - CurlURLInputStream(const xercesc::XMLURL& urlSource, const xercesc::XMLNetHTTPInfo* httpInfo=0); - ~CurlURLInputStream(); - -#ifdef XMLTOOLING_XERCESC_64BITSAFE - XMLFilePos -#else - unsigned int -#endif - curPos() const; - xsecsize_t readBytes(XMLByte* const toFill, const xsecsize_t maxToRead); - -#ifdef XMLTOOLING_XERCESC_INPUTSTREAM_HAS_CONTENTTYPE - const XMLCh* getContentType() const { - return NULL; - } -#endif - -private : - // ----------------------------------------------------------------------- - // Unimplemented constructors and operators - // ----------------------------------------------------------------------- - CurlURLInputStream(const CurlURLInputStream&); - CurlURLInputStream& operator=(const CurlURLInputStream&); - - static size_t staticWriteCallback(void* ptr, size_t size, size_t nmemb, void* stream); - - std::stringstream fUnderlyingStream; - xercesc::MemoryManager* fMemoryManager; - xercesc::XMLURL fURLSource; - xercesc::ArrayJanitor fURL; - StreamInputSource::StreamBinInputStream* fInputStream; - logging::Category& m_log; - -}; // CurlURLInputStream - - -inline -#ifdef XMLTOOLING_XERCESC_64BITSAFE - XMLFilePos -#else - unsigned int -#endif -CurlURLInputStream::curPos() const -{ - return fInputStream ? fInputStream->curPos() : 0; -} - -}; - -#endif // CURLURLINPUTSTREAM_HPP diff --git a/xmltooling/util/ParserPool.cpp b/xmltooling/util/ParserPool.cpp index 9397978..ee64178 100644 --- a/xmltooling/util/ParserPool.cpp +++ b/xmltooling/util/ParserPool.cpp @@ -23,6 +23,7 @@ #include "internal.h" #include "exceptions.h" #include "logging.h" +#include "util/CurlURLInputStream.h" #include "util/NDC.h" #include "util/ParserPool.h" #include "util/XMLHelper.h" @@ -523,3 +524,49 @@ xsecsize_t StreamInputSource::StreamBinInputStream::readBytes(XMLByte* const toF } return bytes_read; } + +#ifdef XMLTOOLING_LITE + +URLInputSource::URLInputSource(const XMLCh* url, const char* systemId) : InputSource(systemId), m_url(url) +{ +} + +URLInputSource::URLInputSource(const DOMElement* e, const char* systemId) : InputSource(systemId) +{ + static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i); + static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l); + + const XMLCh* attr = e->getAttributeNS(NULL, url); + if (!attr || !*attr) { + attr = e->getAttributeNS(NULL, uri); + if (!attr || !*attr) + throw IOException("No URL supplied via DOM to URLInputSource constructor."); + } + + m_url.setURL(attr); +} + +BinInputStream* URLInputSource::makeStream() const +{ + // Ask the URL to create us an appropriate input stream + return m_url.makeNewStream(); +} + +#else + +URLInputSource::URLInputSource(const XMLCh* url, const char* systemId) + : InputSource(systemId), m_url(url), m_root(NULL) +{ +} + +URLInputSource::URLInputSource(const DOMElement* e, const char* systemId) + : InputSource(systemId), m_root(e) +{ +} + +BinInputStream* URLInputSource::makeStream() const +{ + return m_root ? new CurlURLInputStream(m_root) : new CurlURLInputStream(m_url.get()); +} + +#endif diff --git a/xmltooling/util/ParserPool.h b/xmltooling/util/ParserPool.h index ec9b5b5..fefdddc 100644 --- a/xmltooling/util/ParserPool.h +++ b/xmltooling/util/ParserPool.h @@ -33,6 +33,7 @@ #include #include #include +#include #ifndef XMLTOOLING_NO_XMLSEC # include @@ -216,6 +217,51 @@ namespace xmltooling { private: std::istream& m_is; }; + + /** + * A URL-based parser source that supports a more advanced input stream. + */ + class XMLTOOL_API URLInputSource : public xercesc::InputSource + { + MAKE_NONCOPYABLE(URLInputSource); + public: + /** + * Constructor. + * + * @param url source of input + * @param systemId optional system identifier to attach to the source + */ + URLInputSource(const XMLCh* url, const char* systemId=NULL); + + /** + * Constructor taking a DOM element supporting the following content: + * + *
+ *
uri | url
+ *
identifies the remote resource
+ *
verifyHost
+ *
true iff name of host should be matched against TLS/SSL certificate
+ *
TransportOption elements, like so:
+ *
<TransportOption provider="CURL" option="150">0</TransportOption>
+ *
+ * + * @param e DOM to supply configuration + * @param systemId optional system identifier to attach to the source + */ + URLInputSource(const xercesc::DOMElement* e, const char* systemId=NULL); + + /// @cond off + virtual xercesc::BinInputStream* makeStream() const; + /// @endcond + + private: +#ifdef XMLTOOLING_LITE + xercesc::XMLURL m_url; +#else + xmltooling::auto_ptr_char m_url; + const xercesc::DOMElement* m_root; +#endif + }; }; #if defined (_MSC_VER) diff --git a/xmltooling/util/ReloadableXMLFile.cpp b/xmltooling/util/ReloadableXMLFile.cpp index ad43f6f..0aad73b 100644 --- a/xmltooling/util/ReloadableXMLFile.cpp +++ b/xmltooling/util/ReloadableXMLFile.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2001-2007 Internet2 + * Copyright 2001-2009 Internet2 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,6 @@ #include #include -#include #include using namespace xmltooling::logging; @@ -157,8 +156,8 @@ pair ReloadableXMLFile::load(bool backup) m_log.debug("loading configuration from external resource..."); DOMDocument* doc=NULL; - auto_ptr_XMLCh widenit(backup ? m_backing.c_str() : m_source.c_str()); if (m_local || backup) { + auto_ptr_XMLCh widenit(backup ? m_backing.c_str() : m_source.c_str()); LocalFileInputSource src(widenit.get()); Wrapper4InputSource dsrc(&src,false); if (m_validate) @@ -167,7 +166,7 @@ pair ReloadableXMLFile::load(bool backup) doc=XMLToolingConfig::getConfig().getParser().parse(dsrc); } else { - URLInputSource src(widenit.get()); + URLInputSource src(m_root); Wrapper4InputSource dsrc(&src,false); if (m_validate) doc=XMLToolingConfig::getConfig().getValidatingParser().parse(dsrc); diff --git a/xmltooling/util/ReloadableXMLFile.h b/xmltooling/util/ReloadableXMLFile.h index cb4c5c3..d8dda9e 100644 --- a/xmltooling/util/ReloadableXMLFile.h +++ b/xmltooling/util/ReloadableXMLFile.h @@ -1,5 +1,5 @@ /* - * Copyright 2001-2007 Internet2 + * Copyright 2001-2009 Internet2 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,6 +53,10 @@ namespace xmltooling { *
use a validating parser
*
reloadChanges
*
enables monitoring of local file for changes
+ *
reloadInterval
+ *
enables periodic refresh of remote file
+ *
backingFilePath
+ *
location for backup of remote resource
* * * @param e DOM to supply configuration diff --git a/xmltooling/xmltooling.vcproj b/xmltooling/xmltooling.vcproj index 1cb272f..b6f8676 100644 --- a/xmltooling/xmltooling.vcproj +++ b/xmltooling/xmltooling.vcproj @@ -392,10 +392,6 @@ Name="util" > - - @@ -734,11 +730,7 @@ Name="util" > - -