2 * Licensed to the University Corporation for Advanced Internet
3 * Development, Inc. (UCAID) under one or more contributor license
4 * agreements. See the NOTICE file distributed with this work for
5 * additional information regarding copyright ownership.
7 * UCAID licenses this file to you under the Apache License,
8 * Version 2.0 (the "License"); you may not use this file except
9 * in compliance with the License. You may obtain a copy of the
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
17 * either express or implied. See the License for the specific
18 * language governing permissions and limitations under the License.
22 * xmltooling/util/CurlURLInputStream.cpp
24 * Asynchronous use of curl to fetch data from a URL.
29 #include <xmltooling/util/CurlURLInputStream.h>
30 #include <xmltooling/util/ParserPool.h>
31 #include <xmltooling/util/XMLHelper.h>
33 #include <openssl/ssl.h>
34 #include <xercesc/util/XercesDefs.hpp>
35 #include <xercesc/util/XMLNetAccessor.hpp>
36 #include <xercesc/util/XMLString.hpp>
37 #include <xercesc/util/XMLExceptMsgs.hpp>
38 #include <xercesc/util/Janitor.hpp>
39 #include <xercesc/util/XMLUniDefs.hpp>
40 #include <xercesc/util/TransService.hpp>
41 #include <xercesc/util/TranscodingException.hpp>
42 #include <xercesc/util/PlatformUtils.hpp>
44 using namespace xmltooling;
45 using namespace xercesc;
49 static const XMLCh _CURL[] = UNICODE_LITERAL_4(C,U,R,L);
50 static const XMLCh _OpenSSL[] = UNICODE_LITERAL_7(O,p,e,n,S,S,L);
51 static const XMLCh _option[] = UNICODE_LITERAL_6(o,p,t,i,o,n);
52 static const XMLCh _provider[] = UNICODE_LITERAL_8(p,r,o,v,i,d,e,r);
53 static const XMLCh TransportOption[] = UNICODE_LITERAL_15(T,r,a,n,s,p,o,r,t,O,p,t,i,o,n);
54 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
55 static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l);
56 static const XMLCh verifyHost[] = UNICODE_LITERAL_10(v,e,r,i,f,y,H,o,s,t);
58 // callback to invoke a caller-defined SSL callback
59 CURLcode ssl_ctx_callback(CURL* curl, SSL_CTX* ssl_ctx, void* userptr)
61 CurlURLInputStream* str = reinterpret_cast<CurlURLInputStream*>(userptr);
63 // Default flags manually disable SSLv2 so we're not dependent on libcurl to do it.
64 // Also disable the ticket option where implemented, since this breaks a variety
65 // of servers. Newer libcurl also does this for us.
66 #ifdef SSL_OP_NO_TICKET
67 SSL_CTX_set_options(ssl_ctx, str->getOpenSSLOps()|SSL_OP_NO_TICKET);
69 SSL_CTX_set_options(ssl_ctx, str->getOpenSSLOps());
75 size_t curl_header_hook(void* ptr, size_t size, size_t nmemb, void* stream)
77 // only handle single-byte data
78 if (size!=1 || nmemb<5 || !stream)
80 string* cacheTag = reinterpret_cast<string*>(stream);
81 const char* hdr = reinterpret_cast<char*>(ptr);
82 if (strncmp(hdr, "ETag:", 5) == 0) {
84 size_t remaining = nmemb - 5;
85 // skip leading spaces
86 while (remaining > 0) {
94 // append until whitespace
96 while (remaining > 0) {
98 (*cacheTag) += *hdr++;
105 if (!cacheTag->empty())
106 *cacheTag = "If-None-Match: " + *cacheTag;
108 else if (cacheTag->empty() && strncmp(hdr, "Last-Modified:", 14) == 0) {
110 size_t remaining = nmemb - 14;
111 // skip leading spaces
112 while (remaining > 0) {
120 // append until whitespace
121 while (remaining > 0) {
122 if (!isspace(*hdr)) {
123 (*cacheTag) += *hdr++;
130 if (!cacheTag->empty())
131 *cacheTag = "If-Modified-Since: " + *cacheTag;
138 CurlURLInputStream::CurlURLInputStream(const char* url, string* cacheTag)
139 : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
140 , fCacheTag(cacheTag)
141 , fURL(url ? url : "")
142 , fOpenSSLOps(SSL_OP_ALL|SSL_OP_NO_SSLv2)
150 , fDataAvailable(false)
159 throw IOException("No URL supplied to CurlURLInputStream constructor.");
163 CurlURLInputStream::CurlURLInputStream(const XMLCh* url, string* cacheTag)
164 : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
165 , fCacheTag(cacheTag)
166 , fOpenSSLOps(SSL_OP_ALL|SSL_OP_NO_SSLv2)
174 , fDataAvailable(false)
183 auto_ptr_char temp(url);
187 throw IOException("No URL supplied to CurlURLInputStream constructor.");
191 CurlURLInputStream::CurlURLInputStream(const DOMElement* e, string* cacheTag)
192 : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
193 , fCacheTag(cacheTag)
194 , fOpenSSLOps(SSL_OP_ALL|SSL_OP_NO_SSLv2)
202 , fDataAvailable(false)
210 const XMLCh* attr = e->getAttributeNS(nullptr, url);
211 if (!attr || !*attr) {
212 attr = e->getAttributeNS(nullptr, uri);
214 throw IOException("No URL supplied via DOM to CurlURLInputStream constructor.");
217 auto_ptr_char temp(attr);
222 CurlURLInputStream::~CurlURLInputStream()
225 // Remove the easy handle from the multi stack
226 curl_multi_remove_handle(fMulti, fEasy);
228 // Cleanup the easy handle
229 curl_easy_cleanup(fEasy);
233 // Cleanup the multi handle
234 curl_multi_cleanup(fMulti);
238 curl_slist_free_all(fHeaders);
241 XMLString::release(&fContentType);
245 void CurlURLInputStream::init(const DOMElement* e)
247 // Allocate the curl multi handle
248 fMulti = curl_multi_init();
250 // Allocate the curl easy handle
251 fEasy = curl_easy_init();
253 if (!fMulti || !fEasy)
254 throw IOException("Failed to allocate libcurl handles.");
256 curl_easy_setopt(fEasy, CURLOPT_URL, fURL.c_str());
258 // Set up a way to recieve the data
259 curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function
260 curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function
263 curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, 1);
264 curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, 6);
267 curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 10);
268 curl_easy_setopt(fEasy, CURLOPT_TIMEOUT, 60);
269 curl_easy_setopt(fEasy, CURLOPT_HTTPAUTH, 0);
270 curl_easy_setopt(fEasy, CURLOPT_USERPWD, nullptr);
271 curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 2);
272 curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0);
273 curl_easy_setopt(fEasy, CURLOPT_CAINFO, nullptr);
274 curl_easy_setopt(fEasy, CURLOPT_SSL_CIPHER_LIST, "ALL:!aNULL:!LOW:!EXPORT:!SSLv2");
275 curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1);
276 curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1);
277 curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1);
278 curl_easy_setopt(fEasy, CURLOPT_ENCODING, "");
280 // Install SSL callback.
281 curl_easy_setopt(fEasy, CURLOPT_SSL_CTX_FUNCTION, ssl_ctx_callback);
282 curl_easy_setopt(fEasy, CURLOPT_SSL_CTX_DATA, this);
285 curl_easy_setopt(fEasy, CURLOPT_ERRORBUFFER, fError);
287 // Check for cache tag.
290 if (!fCacheTag->empty()) {
291 fHeaders = curl_slist_append(fHeaders, fCacheTag->c_str());
294 curl_easy_setopt(fEasy, CURLOPT_HEADERFUNCTION, curl_header_hook);
295 curl_easy_setopt(fEasy, CURLOPT_HEADERDATA, fCacheTag);
298 // Add User-Agent as a header for now. TODO: Add private member to hold the
299 // value for the standard UA option.
300 string ua = string("User-Agent: ") + XMLToolingConfig::getConfig().user_agent +
301 " libcurl/" + LIBCURL_VERSION + ' ' + OPENSSL_VERSION_TEXT;
302 fHeaders = curl_slist_append(fHeaders, ua.c_str());
304 // Add User-Agent and cache headers.
305 curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, fHeaders);
308 const XMLCh* flag = e->getAttributeNS(nullptr, verifyHost);
309 if (flag && (*flag == chLatin_f || *flag == chDigit_0))
310 curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0);
312 // Process TransportOption elements.
314 DOMElement* child = XMLHelper::getLastChildElement(e, TransportOption);
316 if (child->hasChildNodes() && XMLString::equals(child->getAttributeNS(nullptr,_provider), _OpenSSL)) {
317 auto_ptr_char option(child->getAttributeNS(nullptr,_option));
318 auto_ptr_char value(child->getFirstChild()->getNodeValue());
319 if (option.get() && value.get() && !strcmp(option.get(), "SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION") &&
320 (*value.get()=='1' || *value.get()=='t')) {
321 // If the new option to enable buggy rengotiation is available, set it.
322 // Otherwise, signal false if this is newer than 0.9.8k, because that
323 // means it's 0.9.8l, which blocks renegotiation, and therefore will
324 // not honor this request. Older versions are buggy, so behave as though
325 // the flag was set anyway, so we signal true.
326 #if defined(SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION)
327 fOpenSSLOps |= SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION;
329 #elif (OPENSSL_VERSION_NUMBER > 0x009080bfL)
339 fLog.error("failed to set OpenSSL transport option (%s)", option.get());
341 else if (child->hasChildNodes() && XMLString::equals(child->getAttributeNS(nullptr,_provider), _CURL)) {
342 auto_ptr_char option(child->getAttributeNS(nullptr,_option));
343 auto_ptr_char value(child->getFirstChild()->getNodeValue());
344 if (option.get() && *option.get() && value.get() && *value.get()) {
345 // For libcurl, the option is an enum and the value type depends on the option.
346 CURLoption opt = static_cast<CURLoption>(strtol(option.get(), nullptr, 10));
347 if (opt < CURLOPTTYPE_OBJECTPOINT)
348 success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), nullptr, 10)) == CURLE_OK);
349 #ifdef CURLOPTTYPE_OFF_T
350 else if (opt < CURLOPTTYPE_OFF_T) {
351 fSavedOptions.push_back(value.get());
352 success = (curl_easy_setopt(fEasy, opt, fSavedOptions.back().c_str()) == CURLE_OK);
354 # ifdef HAVE_CURL_OFF_T
355 else if (sizeof(curl_off_t) == sizeof(long))
356 success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), nullptr, 10)) == CURLE_OK);
358 else if (sizeof(off_t) == sizeof(long))
359 success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), nullptr, 10)) == CURLE_OK);
365 fSavedOptions.push_back(value.get());
366 success = (curl_easy_setopt(fEasy, opt, fSavedOptions.back().c_str()) == CURLE_OK);
370 fLog.error("failed to set CURL transport option (%s)", option.get());
373 child = XMLHelper::getPreviousSiblingElement(child, TransportOption);
377 // Add easy handle to the multi stack
378 curl_multi_add_handle(fMulti, fEasy);
380 fLog.debug("libcurl trying to fetch %s", fURL.c_str());
382 // Start reading, to get the content type
383 while(fBufferHeadPtr == fBuffer) {
384 int runningHandles = 0;
386 readMore(&runningHandles);
388 catch (XMLException&) {
389 curl_multi_remove_handle(fMulti, fEasy);
390 curl_easy_cleanup(fEasy);
392 curl_multi_cleanup(fMulti);
396 if(runningHandles == 0) break;
399 // Check for a response code.
400 if (curl_easy_getinfo(fEasy, CURLINFO_RESPONSE_CODE, &fStatusCode) == CURLE_OK) {
401 if (fStatusCode >= 300 ) {
402 // Short-circuit usual processing by storing a special XML document in the buffer.
403 ostringstream specialdoc;
404 specialdoc << '<' << URLInputSource::asciiStatusCodeElementName << " xmlns=\"http://www.opensaml.org/xmltooling\">"
406 << "</" << URLInputSource::asciiStatusCodeElementName << '>';
407 string specialxml = specialdoc.str();
408 fBufferTailPtr = fBuffer = reinterpret_cast<XMLByte*>(malloc(specialxml.length()));
410 curl_multi_remove_handle(fMulti, fEasy);
411 curl_easy_cleanup(fEasy);
413 curl_multi_cleanup(fMulti);
417 memcpy(fBuffer, specialxml.c_str(), specialxml.length());
418 fBufferHeadPtr = fBuffer + specialxml.length();
422 fStatusCode = 200; // reset to 200 to ensure no special processing occurs
425 // Find the content type
426 char* contentType8 = nullptr;
427 if(curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8) == CURLE_OK && contentType8)
428 fContentType = XMLString::transcode(contentType8);
432 size_t CurlURLInputStream::staticWriteCallback(char* buffer, size_t size, size_t nitems, void* outstream)
434 return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
437 size_t CurlURLInputStream::writeCallback(char* buffer, size_t size, size_t nitems)
439 size_t cnt = size * nitems;
440 size_t totalConsumed = 0;
442 // Consume as many bytes as possible immediately into the buffer
443 size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
444 memcpy(fWritePtr, buffer, consume);
445 fWritePtr += consume;
446 fBytesRead += consume;
447 fTotalBytesRead += consume;
448 fBytesToRead -= consume;
450 fLog.debug("write callback consuming %u bytes", consume);
452 // If bytes remain, rebuffer as many as possible into our holding buffer
454 totalConsumed += consume;
458 size_t bufAvail = fBufferSize - (fBufferHeadPtr - fBuffer);
459 if (bufAvail < cnt) {
460 // Enlarge the buffer. TODO: limit max size
461 XMLByte* newbuf = reinterpret_cast<XMLByte*>(realloc(fBuffer, fBufferSize + (cnt - bufAvail)));
463 fBufferSize = fBufferSize + (cnt - bufAvail);
464 fLog.debug("enlarged buffer to %u bytes", fBufferSize);
465 fBufferHeadPtr = newbuf + (fBufferHeadPtr - fBuffer);
466 fBuffer = fBufferTailPtr = newbuf;
469 memcpy(fBufferHeadPtr, buffer, cnt);
470 fBufferHeadPtr += cnt;
472 totalConsumed += cnt;
473 fLog.debug("write callback rebuffering %u bytes", cnt);
476 // Return the total amount we've consumed. If we don't consume all the bytes
477 // then an error will be generated. Since our buffer size is equal to the
478 // maximum size that curl will write, this should never happen unless there
479 // is a logic error somewhere here.
480 return totalConsumed;
483 bool CurlURLInputStream::readMore(int* runningHandles)
485 // Ask the curl to do some work
486 CURLMcode curlResult = curl_multi_perform(fMulti, runningHandles);
488 // Process messages from curl
490 for (CURLMsg* msg = nullptr; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != nullptr; )
492 fLog.debug("msg %d, %d from curl", msg->msg, msg->data.result);
494 if (msg->msg != CURLMSG_DONE)
497 switch (msg->data.result)
500 // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
503 case CURLE_UNSUPPORTED_PROTOCOL:
504 ThrowXML(MalformedURLException, XMLExcepts::URL_UnsupportedProto);
507 case CURLE_COULDNT_RESOLVE_HOST:
508 case CURLE_COULDNT_RESOLVE_PROXY:
509 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURL.c_str());
512 case CURLE_COULDNT_CONNECT:
513 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str());
516 case CURLE_OPERATION_TIMEDOUT:
517 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str());
520 case CURLE_RECV_ERROR:
521 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURL.c_str());
525 fLog.error("error while fetching %s: (%d) %s", fURL.c_str(), msg->data.result, fError);
526 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURL.c_str());
531 // If nothing is running any longer, bail out
532 if(*runningHandles == 0)
535 // If there is no further data to read, and we haven't
536 // read any yet on this invocation, call select to wait for data
537 if (curlResult != CURLM_CALL_MULTI_PERFORM && fBytesRead == 0)
548 // Ask curl for the file descriptors to wait on
549 curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
551 // Wait on the file descriptors
555 select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
558 return curlResult == CURLM_CALL_MULTI_PERFORM;
561 xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
564 fBytesToRead = maxToRead;
567 for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
569 // First, any buffered data we have available
570 size_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
571 bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
574 memcpy(fWritePtr, fBufferTailPtr, bufCnt);
576 fBytesRead += bufCnt;
577 fTotalBytesRead += bufCnt;
578 fBytesToRead -= bufCnt;
580 fBufferTailPtr += bufCnt;
581 if (fBufferTailPtr == fBufferHeadPtr)
582 fBufferHeadPtr = fBufferTailPtr = fBuffer;
584 fLog.debug("consuming %d buffered bytes", bufCnt);
590 // Check for a non-2xx status that means to ignore the curl response.
591 if (fStatusCode >= 300)
594 // Ask the curl to do some work
595 int runningHandles = 0;
596 tryAgain = readMore(&runningHandles);
598 // If nothing is running any longer, bail out
599 if (runningHandles == 0)