-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/**
+ * Licensed to the University Corporation for Advanced Internet
+ * Development, Inc. (UCAID) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for
+ * additional information regarding copyright ownership.
+ *
+ * UCAID licenses this file to you under the Apache License,
+ * Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the
+ * License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
*/
-/*
- * $Id$
+/**
+ * xmltooling/util/CurlURLInputStream.cpp
+ *
+ * Asynchronous use of curl to fetch data from a URL.
*/
#include "internal.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif
-//#include <sys/types.h>
-//#include <sys/time.h>
+#include <xmltooling/util/CurlURLInputStream.h>
+#include <xmltooling/util/ParserPool.h>
+#include <xmltooling/util/XMLHelper.h>
+#include <openssl/ssl.h>
#include <xercesc/util/XercesDefs.hpp>
#include <xercesc/util/XMLNetAccessor.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/TranscodingException.hpp>
#include <xercesc/util/PlatformUtils.hpp>
-#include <xmltooling/util/CurlURLInputStream.hpp>
-
using namespace xmltooling;
+using namespace xercesc;
+using namespace std;
+
+namespace {
+ static const XMLCh _CURL[] = UNICODE_LITERAL_4(C,U,R,L);
+ static const XMLCh _OpenSSL[] = UNICODE_LITERAL_7(O,p,e,n,S,S,L);
+ static const XMLCh _option[] = UNICODE_LITERAL_6(o,p,t,i,o,n);
+ static const XMLCh _provider[] = UNICODE_LITERAL_8(p,r,o,v,i,d,e,r);
+ static const XMLCh TransportOption[] = UNICODE_LITERAL_15(T,r,a,n,s,p,o,r,t,O,p,t,i,o,n);
+ static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
+ static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l);
+ static const XMLCh verifyHost[] = UNICODE_LITERAL_10(v,e,r,i,f,y,H,o,s,t);
+
+ // callback to invoke a caller-defined SSL callback
+ CURLcode ssl_ctx_callback(CURL* curl, SSL_CTX* ssl_ctx, void* userptr)
+ {
+ CurlURLInputStream* str = reinterpret_cast<CurlURLInputStream*>(userptr);
+
+ // Default flags manually disable SSLv2 so we're not dependent on libcurl to do it.
+ // Also disable the ticket option where implemented, since this breaks a variety
+ // of servers. Newer libcurl also does this for us.
+#ifdef SSL_OP_NO_TICKET
+ SSL_CTX_set_options(ssl_ctx, str->getOpenSSLOps()|SSL_OP_NO_TICKET);
+#else
+ SSL_CTX_set_options(ssl_ctx, str->getOpenSSLOps());
+#endif
+
+ return CURLE_OK;
+ }
+
+ size_t curl_header_hook(void* ptr, size_t size, size_t nmemb, void* stream)
+ {
+ // only handle single-byte data
+ if (size!=1 || nmemb<5 || !stream)
+ return nmemb;
+ string* cacheTag = reinterpret_cast<string*>(stream);
+ const char* hdr = reinterpret_cast<char*>(ptr);
+ if (strncmp(hdr, "ETag:", 5) == 0) {
+ hdr += 5;
+ size_t remaining = nmemb - 5;
+ // skip leading spaces
+ while (remaining > 0) {
+ if (*hdr == ' ') {
+ ++hdr;
+ --remaining;
+ continue;
+ }
+ break;
+ }
+ // append until whitespace
+ cacheTag->erase();
+ while (remaining > 0) {
+ if (!isspace(*hdr)) {
+ (*cacheTag) += *hdr++;
+ --remaining;
+ continue;
+ }
+ break;
+ }
+
+ if (!cacheTag->empty())
+ *cacheTag = "If-None-Match: " + *cacheTag;
+ }
+ else if (cacheTag->empty() && strncmp(hdr, "Last-Modified:", 14) == 0) {
+ hdr += 14;
+ size_t remaining = nmemb - 14;
+ // skip leading spaces
+ while (remaining > 0) {
+ if (*hdr == ' ') {
+ ++hdr;
+ --remaining;
+ continue;
+ }
+ break;
+ }
+ // append until whitespace
+ while (remaining > 0) {
+ if (!isspace(*hdr)) {
+ (*cacheTag) += *hdr++;
+ --remaining;
+ continue;
+ }
+ break;
+ }
+
+ if (!cacheTag->empty())
+ *cacheTag = "If-Modified-Since: " + *cacheTag;
+ }
+
+ return nmemb;
+ }
+}
+CurlURLInputStream::CurlURLInputStream(const char* url, string* cacheTag)
+ : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
+ , fCacheTag(cacheTag)
+ , fURL(url ? url : "")
+ , fOpenSSLOps(SSL_OP_ALL|SSL_OP_NO_SSLv2)
+ , fMulti(0)
+ , fEasy(0)
+ , fHeaders(0)
+ , fTotalBytesRead(0)
+ , fWritePtr(0)
+ , fBytesRead(0)
+ , fBytesToRead(0)
+ , fDataAvailable(false)
+ , fBuffer(0)
+ , fBufferHeadPtr(0)
+ , fBufferTailPtr(0)
+ , fBufferSize(0)
+ , fContentType(0)
+ , fStatusCode(200)
+{
+ if (fURL.empty())
+ throw IOException("No URL supplied to CurlURLInputStream constructor.");
+ init();
+}
+
+CurlURLInputStream::CurlURLInputStream(const XMLCh* url, string* cacheTag)
+ : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
+ , fCacheTag(cacheTag)
+ , fOpenSSLOps(SSL_OP_ALL|SSL_OP_NO_SSLv2)
+ , fMulti(0)
+ , fEasy(0)
+ , fHeaders(0)
+ , fTotalBytesRead(0)
+ , fWritePtr(0)
+ , fBytesRead(0)
+ , fBytesToRead(0)
+ , fDataAvailable(false)
+ , fBuffer(0)
+ , fBufferHeadPtr(0)
+ , fBufferTailPtr(0)
+ , fBufferSize(0)
+ , fContentType(0)
+ , fStatusCode(200)
+{
+ if (url) {
+ auto_ptr_char temp(url);
+ fURL = temp.get();
+ }
+ if (fURL.empty())
+ throw IOException("No URL supplied to CurlURLInputStream constructor.");
+ init();
+}
-CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
- : fMulti(0)
- , fEasy(0)
- , fMemoryManager(urlSource.getMemoryManager())
- , fURLSource(urlSource)
- , fURL(0)
- , fTotalBytesRead(0)
- , fWritePtr(0)
- , fBytesRead(0)
- , fBytesToRead(0)
- , fDataAvailable(false)
- , fBufferHeadPtr(fBuffer)
- , fBufferTailPtr(fBuffer)
- , m_log(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.NetAccessor"))
+CurlURLInputStream::CurlURLInputStream(const DOMElement* e, string* cacheTag)
+ : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
+ , fCacheTag(cacheTag)
+ , fOpenSSLOps(SSL_OP_ALL|SSL_OP_NO_SSLv2)
+ , fMulti(0)
+ , fEasy(0)
+ , fHeaders(0)
+ , fTotalBytesRead(0)
+ , fWritePtr(0)
+ , fBytesRead(0)
+ , fBytesToRead(0)
+ , fDataAvailable(false)
+ , fBuffer(0)
+ , fBufferHeadPtr(0)
+ , fBufferTailPtr(0)
+ , fBufferSize(0)
+ , fContentType(0)
+ , fStatusCode(200)
{
- // Allocate the curl multi handle
- fMulti = curl_multi_init();
-
- // Allocate the curl easy handle
- fEasy = curl_easy_init();
-
- // Get the text of the URL we're going to use
- fURL.reset(XMLString::transcode(fURLSource.getURLText(), fMemoryManager), fMemoryManager);
-
- m_log.debug("libcurl trying to fetch %s", fURL.get());
-
- // Set URL option
- curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get());
- curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function
- curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function
- curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 30);
+ const XMLCh* attr = e->getAttributeNS(nullptr, url);
+ if (!attr || !*attr) {
+ attr = e->getAttributeNS(nullptr, uri);
+ if (!attr || !*attr)
+ throw IOException("No URL supplied via DOM to CurlURLInputStream constructor.");
+ }
+
+ auto_ptr_char temp(attr);
+ fURL = temp.get();
+ init(e);
+}
+
+CurlURLInputStream::~CurlURLInputStream()
+{
+ if (fEasy) {
+ // Remove the easy handle from the multi stack
+ curl_multi_remove_handle(fMulti, fEasy);
+
+ // Cleanup the easy handle
+ curl_easy_cleanup(fEasy);
+ }
+
+ if (fMulti) {
+ // Cleanup the multi handle
+ curl_multi_cleanup(fMulti);
+ }
+
+ if (fHeaders) {
+ curl_slist_free_all(fHeaders);
+ }
+
+ XMLString::release(&fContentType);
+ free(fBuffer);
+}
+
+void CurlURLInputStream::init(const DOMElement* e)
+{
+ // Allocate the curl multi handle
+ fMulti = curl_multi_init();
+
+ // Allocate the curl easy handle
+ fEasy = curl_easy_init();
+
+ if (!fMulti || !fEasy)
+ throw IOException("Failed to allocate libcurl handles.");
+
+ curl_easy_setopt(fEasy, CURLOPT_URL, fURL.c_str());
+
+ // Set up a way to recieve the data
+ curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function
+ curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function
+
+ // Do redirects
+ curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, 1);
+ curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, 6);
+
+ // Default settings.
+ curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 10);
curl_easy_setopt(fEasy, CURLOPT_TIMEOUT, 60);
- curl_easy_setopt(fEasy, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3);
- curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0);
+ curl_easy_setopt(fEasy, CURLOPT_HTTPAUTH, 0);
+ curl_easy_setopt(fEasy, CURLOPT_USERPWD, nullptr);
+ curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 2);
curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0);
+ curl_easy_setopt(fEasy, CURLOPT_CAINFO, nullptr);
+ curl_easy_setopt(fEasy, CURLOPT_SSL_CIPHER_LIST, "ALL:!aNULL:!LOW:!EXPORT:!SSLv2");
curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1);
curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1);
-
- // Add easy handle to the multi stack
- curl_multi_add_handle(fMulti, fEasy);
-}
+ curl_easy_setopt(fEasy, CURLOPT_ENCODING, "");
+ // Install SSL callback.
+ curl_easy_setopt(fEasy, CURLOPT_SSL_CTX_FUNCTION, ssl_ctx_callback);
+ curl_easy_setopt(fEasy, CURLOPT_SSL_CTX_DATA, this);
-CurlURLInputStream::~CurlURLInputStream()
-{
- // Remove the easy handle from the multi stack
- curl_multi_remove_handle(fMulti, fEasy);
-
- // Cleanup the easy handle
- curl_easy_cleanup(fEasy);
-
- // Cleanup the multi handle
- curl_multi_cleanup(fMulti);
+ fError[0] = 0;
+ curl_easy_setopt(fEasy, CURLOPT_ERRORBUFFER, fError);
+
+ // Check for cache tag.
+ if (fCacheTag) {
+ // Outgoing tag.
+ if (!fCacheTag->empty()) {
+ fHeaders = curl_slist_append(fHeaders, fCacheTag->c_str());
+ }
+ // Incoming tag.
+ curl_easy_setopt(fEasy, CURLOPT_HEADERFUNCTION, curl_header_hook);
+ curl_easy_setopt(fEasy, CURLOPT_HEADERDATA, fCacheTag);
+ }
+
+ // Add User-Agent as a header for now. TODO: Add private member to hold the
+ // value for the standard UA option.
+ string ua = string("User-Agent: ") + XMLToolingConfig::getConfig().user_agent +
+ " libcurl/" + LIBCURL_VERSION + ' ' + OPENSSL_VERSION_TEXT;
+ fHeaders = curl_slist_append(fHeaders, ua.c_str());
+
+ // Add User-Agent and cache headers.
+ curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, fHeaders);
+
+ if (e) {
+ const XMLCh* flag = e->getAttributeNS(nullptr, verifyHost);
+ if (flag && (*flag == chLatin_f || *flag == chDigit_0))
+ curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0);
+
+ // Process TransportOption elements.
+ bool success;
+ DOMElement* child = XMLHelper::getLastChildElement(e, TransportOption);
+ while (child) {
+ if (child->hasChildNodes() && XMLString::equals(child->getAttributeNS(nullptr,_provider), _OpenSSL)) {
+ auto_ptr_char option(child->getAttributeNS(nullptr,_option));
+ auto_ptr_char value(child->getFirstChild()->getNodeValue());
+ if (option.get() && value.get() && !strcmp(option.get(), "SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION") &&
+ (*value.get()=='1' || *value.get()=='t')) {
+ // If the new option to enable buggy rengotiation is available, set it.
+ // Otherwise, signal false if this is newer than 0.9.8k, because that
+ // means it's 0.9.8l, which blocks renegotiation, and therefore will
+ // not honor this request. Older versions are buggy, so behave as though
+ // the flag was set anyway, so we signal true.
+#if defined(SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION)
+ fOpenSSLOps |= SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION;
+ success = true;
+#elif (OPENSSL_VERSION_NUMBER > 0x009080bfL)
+ success = false;
+#else
+ success = true;
+#endif
+ }
+ else {
+ success = false;
+ }
+ if (!success)
+ fLog.error("failed to set OpenSSL transport option (%s)", option.get());
+ }
+ else if (child->hasChildNodes() && XMLString::equals(child->getAttributeNS(nullptr,_provider), _CURL)) {
+ auto_ptr_char option(child->getAttributeNS(nullptr,_option));
+ auto_ptr_char value(child->getFirstChild()->getNodeValue());
+ if (option.get() && *option.get() && value.get() && *value.get()) {
+ // For libcurl, the option is an enum and the value type depends on the option.
+ CURLoption opt = static_cast<CURLoption>(strtol(option.get(), nullptr, 10));
+ if (opt < CURLOPTTYPE_OBJECTPOINT)
+ success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), nullptr, 10)) == CURLE_OK);
+#ifdef CURLOPTTYPE_OFF_T
+ else if (opt < CURLOPTTYPE_OFF_T) {
+ fSavedOptions.push_back(value.get());
+ success = (curl_easy_setopt(fEasy, opt, fSavedOptions.back().c_str()) == CURLE_OK);
+ }
+# ifdef HAVE_CURL_OFF_T
+ else if (sizeof(curl_off_t) == sizeof(long))
+ success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), nullptr, 10)) == CURLE_OK);
+# else
+ else if (sizeof(off_t) == sizeof(long))
+ success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), nullptr, 10)) == CURLE_OK);
+# endif
+ else
+ success = false;
+#else
+ else {
+ fSavedOptions.push_back(value.get());
+ success = (curl_easy_setopt(fEasy, opt, fSavedOptions.back().c_str()) == CURLE_OK);
+ }
+#endif
+ if (!success)
+ fLog.error("failed to set CURL transport option (%s)", option.get());
+ }
+ }
+ child = XMLHelper::getPreviousSiblingElement(child, TransportOption);
+ }
+ }
+
+ // Add easy handle to the multi stack
+ curl_multi_add_handle(fMulti, fEasy);
+
+ fLog.debug("libcurl trying to fetch %s", fURL.c_str());
+
+ // Start reading, to get the content type
+ while(fBufferHeadPtr == fBuffer) {
+ int runningHandles = 0;
+ try {
+ readMore(&runningHandles);
+ }
+ catch (XMLException&) {
+ curl_multi_remove_handle(fMulti, fEasy);
+ curl_easy_cleanup(fEasy);
+ fEasy = nullptr;
+ curl_multi_cleanup(fMulti);
+ fMulti = nullptr;
+ throw;
+ }
+ if(runningHandles == 0) break;
+ }
+
+ // Check for a response code.
+ if (curl_easy_getinfo(fEasy, CURLINFO_RESPONSE_CODE, &fStatusCode) == CURLE_OK) {
+ if (fStatusCode >= 300 ) {
+ // Short-circuit usual processing by storing a special XML document in the buffer.
+ ostringstream specialdoc;
+ specialdoc << '<' << URLInputSource::asciiStatusCodeElementName << " xmlns=\"http://www.opensaml.org/xmltooling\">"
+ << fStatusCode
+ << "</" << URLInputSource::asciiStatusCodeElementName << '>';
+ string specialxml = specialdoc.str();
+ fBufferTailPtr = fBuffer = reinterpret_cast<XMLByte*>(malloc(specialxml.length()));
+ if (!fBuffer) {
+ curl_multi_remove_handle(fMulti, fEasy);
+ curl_easy_cleanup(fEasy);
+ fEasy = nullptr;
+ curl_multi_cleanup(fMulti);
+ fMulti = nullptr;
+ throw bad_alloc();
+ }
+ memcpy(fBuffer, specialxml.c_str(), specialxml.length());
+ fBufferHeadPtr = fBuffer + specialxml.length();
+ }
+ }
+ else {
+ fStatusCode = 200; // reset to 200 to ensure no special processing occurs
+ }
+
+ // Find the content type
+ char* contentType8 = nullptr;
+ if(curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8) == CURLE_OK && contentType8)
+ fContentType = XMLString::transcode(contentType8);
}
-size_t
-CurlURLInputStream::staticWriteCallback(char *buffer,
- size_t size,
- size_t nitems,
- void *outstream)
+size_t CurlURLInputStream::staticWriteCallback(char* buffer, size_t size, size_t nitems, void* outstream)
{
- return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
+ return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
}
+size_t CurlURLInputStream::writeCallback(char* buffer, size_t size, size_t nitems)
+{
+ size_t cnt = size * nitems;
+ size_t totalConsumed = 0;
+
+ // Consume as many bytes as possible immediately into the buffer
+ size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
+ memcpy(fWritePtr, buffer, consume);
+ fWritePtr += consume;
+ fBytesRead += consume;
+ fTotalBytesRead += consume;
+ fBytesToRead -= consume;
+ fLog.debug("write callback consuming %u bytes", consume);
-size_t
-CurlURLInputStream::writeCallback(char *buffer,
- size_t size,
- size_t nitems)
-{
- size_t cnt = size * nitems;
- size_t totalConsumed = 0;
-
- // Consume as many bytes as possible immediately into the buffer
- size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
- memcpy(fWritePtr, buffer, consume);
- fWritePtr += consume;
- fBytesRead += consume;
- fTotalBytesRead += consume;
- fBytesToRead -= consume;
-
- //m_log.debug("write callback consuming %d bytes", consume);
-
- // If bytes remain, rebuffer as many as possible into our holding buffer
- buffer += consume;
- totalConsumed += consume;
- cnt -= consume;
- if (cnt > 0)
- {
- size_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);
- consume = (cnt > bufAvail) ? bufAvail : cnt;
- memcpy(fBufferHeadPtr, buffer, consume);
- fBufferHeadPtr += consume;
- buffer += consume;
- totalConsumed += consume;
- //m_log.debug("write callback rebuffering %d bytes", consume);
- }
-
- // Return the total amount we've consumed. If we don't consume all the bytes
- // then an error will be generated. Since our buffer size is equal to the
- // maximum size that curl will write, this should never happen unless there
- // is a logic error somewhere here.
- return totalConsumed;
-}
+ // If bytes remain, rebuffer as many as possible into our holding buffer
+ buffer += consume;
+ totalConsumed += consume;
+ cnt -= consume;
+ if (cnt > 0)
+ {
+ size_t bufAvail = fBufferSize - (fBufferHeadPtr - fBuffer);
+ if (bufAvail < cnt) {
+ // Enlarge the buffer. TODO: limit max size
+ XMLByte* newbuf = reinterpret_cast<XMLByte*>(realloc(fBuffer, fBufferSize + (cnt - bufAvail)));
+ if (newbuf) {
+ fBufferSize = fBufferSize + (cnt - bufAvail);
+ fLog.debug("enlarged buffer to %u bytes", fBufferSize);
+ fBufferHeadPtr = newbuf + (fBufferHeadPtr - fBuffer);
+ fBuffer = fBufferTailPtr = newbuf;
+ }
+ }
+ memcpy(fBufferHeadPtr, buffer, cnt);
+ fBufferHeadPtr += cnt;
+ buffer += cnt;
+ totalConsumed += cnt;
+ fLog.debug("write callback rebuffering %u bytes", cnt);
+ }
+ // Return the total amount we've consumed. If we don't consume all the bytes
+ // then an error will be generated. Since our buffer size is equal to the
+ // maximum size that curl will write, this should never happen unless there
+ // is a logic error somewhere here.
+ return totalConsumed;
+}
-unsigned int
-CurlURLInputStream::readBytes(XMLByte* const toFill
- , const unsigned int maxToRead)
+bool CurlURLInputStream::readMore(int* runningHandles)
{
- fBytesRead = 0;
- fBytesToRead = maxToRead;
- fWritePtr = toFill;
-
- for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
- {
- // First, any buffered data we have available
- size_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
- bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
- if (bufCnt > 0)
- {
- memcpy(fWritePtr, fBufferTailPtr, bufCnt);
- fWritePtr += bufCnt;
- fBytesRead += bufCnt;
- fTotalBytesRead += bufCnt;
- fBytesToRead -= bufCnt;
-
- fBufferTailPtr += bufCnt;
- if (fBufferTailPtr == fBufferHeadPtr)
- fBufferHeadPtr = fBufferTailPtr = fBuffer;
-
- //m_log.debug("consuming %d buffered bytes", bufCnt);
-
- tryAgain = true;
- continue;
- }
-
- // Ask the curl to do some work
- int runningHandles = 0;
- CURLMcode curlResult = curl_multi_perform(fMulti, &runningHandles);
- //m_log.debug("curl_multi_perform returned %d", curlResult);
- tryAgain = (curlResult == CURLM_CALL_MULTI_PERFORM);
-
- // Process messages from curl
- int msgsInQueue = 0;
- for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )
- {
- m_log.debug("msg %d, %d from curl", msg->msg, msg->data.result);
-
- if (msg->msg != CURLMSG_DONE)
- continue;
-
- switch (msg->data.result)
- {
- case CURLE_OK:
- // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
- break;
-
- case CURLE_UNSUPPORTED_PROTOCOL:
- ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);
- break;
+ // Ask the curl to do some work
+ CURLMcode curlResult = curl_multi_perform(fMulti, runningHandles);
- case CURLE_COULDNT_RESOLVE_HOST:
- case CURLE_COULDNT_RESOLVE_PROXY:
- ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager);
- break;
-
- case CURLE_COULDNT_CONNECT:
- ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager);
-
- case CURLE_RECV_ERROR:
- ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager);
- break;
+ // Process messages from curl
+ int msgsInQueue = 0;
+ for (CURLMsg* msg = nullptr; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != nullptr; )
+ {
+ fLog.debug("msg %d, %d from curl", msg->msg, msg->data.result);
+
+ if (msg->msg != CURLMSG_DONE)
+ return true;
- default:
- m_log.error("curl NetAccessor encountered error from libcurl (%d)", msg->data.result);
- ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager);
- break;
- }
- }
-
- // If nothing is running any longer, bail out
- if (runningHandles == 0) {
- //m_log.debug("libcurl indicated no running handles");
- break;
+ switch (msg->data.result)
+ {
+ case CURLE_OK:
+ // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
+ break;
+
+ case CURLE_UNSUPPORTED_PROTOCOL:
+ ThrowXML(MalformedURLException, XMLExcepts::URL_UnsupportedProto);
+ break;
+
+ case CURLE_COULDNT_RESOLVE_HOST:
+ case CURLE_COULDNT_RESOLVE_PROXY:
+ ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURL.c_str());
+ break;
+
+ case CURLE_COULDNT_CONNECT:
+ ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str());
+ break;
+
+ case CURLE_OPERATION_TIMEDOUT:
+ ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str());
+ break;
+
+ case CURLE_RECV_ERROR:
+ ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURL.c_str());
+ break;
+
+ default:
+ fLog.error("error while fetching %s: (%d) %s", fURL.c_str(), msg->data.result, fError);
+ ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURL.c_str());
+ break;
}
-
- // If there is no further data to read, and we haven't
- // read any yet on this invocation, call select to wait for data
- if (!tryAgain && fBytesRead == 0)
- {
- fd_set readSet;
- fd_set writeSet;
- fd_set exceptSet;
- int fdcnt=0;
-
- // Ask curl for the file descriptors to wait on
- FD_ZERO(&readSet);
- FD_ZERO(&writeSet);
- FD_ZERO(&exceptSet);
- (void) curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
-
- // Wait on the file descriptors
- timeval tv;
- tv.tv_sec = 2;
- tv.tv_usec = 0;
- (void) select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
- }
- }
-
- //m_log.debug("returning with %d bytes to parser", fBytesRead);
- return fBytesRead;
+ }
+
+ // If nothing is running any longer, bail out
+ if(*runningHandles == 0)
+ return false;
+
+ // If there is no further data to read, and we haven't
+ // read any yet on this invocation, call select to wait for data
+ if (curlResult != CURLM_CALL_MULTI_PERFORM && fBytesRead == 0)
+ {
+ fd_set readSet;
+ fd_set writeSet;
+ fd_set exceptSet;
+ int fdcnt=0;
+
+ FD_ZERO(&readSet);
+ FD_ZERO(&writeSet);
+ FD_ZERO(&exceptSet);
+
+ // Ask curl for the file descriptors to wait on
+ curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
+
+ // Wait on the file descriptors
+ timeval tv;
+ tv.tv_sec = 2;
+ tv.tv_usec = 0;
+ select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
+ }
+
+ return curlResult == CURLM_CALL_MULTI_PERFORM;
}
+xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
+{
+ fBytesRead = 0;
+ fBytesToRead = maxToRead;
+ fWritePtr = toFill;
+
+ for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
+ {
+ // First, any buffered data we have available
+ size_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
+ bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
+ if (bufCnt > 0)
+ {
+ memcpy(fWritePtr, fBufferTailPtr, bufCnt);
+ fWritePtr += bufCnt;
+ fBytesRead += bufCnt;
+ fTotalBytesRead += bufCnt;
+ fBytesToRead -= bufCnt;
+
+ fBufferTailPtr += bufCnt;
+ if (fBufferTailPtr == fBufferHeadPtr)
+ fBufferHeadPtr = fBufferTailPtr = fBuffer;
+
+ fLog.debug("consuming %d buffered bytes", bufCnt);
+
+ tryAgain = true;
+ continue;
+ }
+
+ // Check for a non-2xx status that means to ignore the curl response.
+ if (fStatusCode >= 300)
+ break;
+
+ // Ask the curl to do some work
+ int runningHandles = 0;
+ tryAgain = readMore(&runningHandles);
+
+ // If nothing is running any longer, bail out
+ if (runningHandles == 0)
+ break;
+ }
+
+ return fBytesRead;
+}