2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 * xmltooling/util/CurlURLInputStream.cpp
21 * Asynchronous use of curl to fetch data from a URL.
26 #include <xmltooling/util/CurlURLInputStream.h>
27 #include <xmltooling/util/ParserPool.h>
28 #include <xmltooling/util/XMLHelper.h>
30 #include <openssl/ssl.h>
31 #include <xercesc/util/XercesDefs.hpp>
32 #include <xercesc/util/XMLNetAccessor.hpp>
33 #include <xercesc/util/XMLString.hpp>
34 #include <xercesc/util/XMLExceptMsgs.hpp>
35 #include <xercesc/util/Janitor.hpp>
36 #include <xercesc/util/XMLUniDefs.hpp>
37 #include <xercesc/util/TransService.hpp>
38 #include <xercesc/util/TranscodingException.hpp>
39 #include <xercesc/util/PlatformUtils.hpp>
41 using namespace xmltooling;
42 using namespace xercesc;
46 static const XMLCh _CURL[] = UNICODE_LITERAL_4(C,U,R,L);
47 static const XMLCh _OpenSSL[] = UNICODE_LITERAL_7(O,p,e,n,S,S,L);
48 static const XMLCh _option[] = UNICODE_LITERAL_6(o,p,t,i,o,n);
49 static const XMLCh _provider[] = UNICODE_LITERAL_8(p,r,o,v,i,d,e,r);
50 static const XMLCh TransportOption[] = UNICODE_LITERAL_15(T,r,a,n,s,p,o,r,t,O,p,t,i,o,n);
51 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
52 static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l);
53 static const XMLCh verifyHost[] = UNICODE_LITERAL_10(v,e,r,i,f,y,H,o,s,t);
55 // callback to invoke a caller-defined SSL callback
56 CURLcode ssl_ctx_callback(CURL* curl, SSL_CTX* ssl_ctx, void* userptr)
58 CurlURLInputStream* str = reinterpret_cast<CurlURLInputStream*>(userptr);
60 // Default flags manually disable SSLv2 so we're not dependent on libcurl to do it.
61 // Also disable the ticket option where implemented, since this breaks a variety
62 // of servers. Newer libcurl also does this for us.
63 #ifdef SSL_OP_NO_TICKET
64 SSL_CTX_set_options(ssl_ctx, str->getOpenSSLOps()|SSL_OP_NO_TICKET);
66 SSL_CTX_set_options(ssl_ctx, str->getOpenSSLOps());
72 size_t curl_header_hook(void* ptr, size_t size, size_t nmemb, void* stream)
74 // only handle single-byte data
75 if (size!=1 || nmemb<5 || !stream)
77 string* cacheTag = reinterpret_cast<string*>(stream);
78 const char* hdr = reinterpret_cast<char*>(ptr);
79 if (strncmp(hdr, "ETag:", 5) == 0) {
81 size_t remaining = nmemb - 5;
82 // skip leading spaces
83 while (remaining > 0) {
91 // append until whitespace
93 while (remaining > 0) {
95 (*cacheTag) += *hdr++;
102 if (!cacheTag->empty())
103 *cacheTag = "If-None-Match: " + *cacheTag;
105 else if (cacheTag->empty() && strncmp(hdr, "Last-Modified:", 14) == 0) {
107 size_t remaining = nmemb - 14;
108 // skip leading spaces
109 while (remaining > 0) {
117 // append until whitespace
118 while (remaining > 0) {
119 if (!isspace(*hdr)) {
120 (*cacheTag) += *hdr++;
127 if (!cacheTag->empty())
128 *cacheTag = "If-Modified-Since: " + *cacheTag;
135 CurlURLInputStream::CurlURLInputStream(const char* url, string* cacheTag)
136 : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
137 , fCacheTag(cacheTag)
138 , fOpenSSLOps(SSL_OP_ALL|SSL_OP_NO_SSLv2)
139 , fURL(url ? url : "")
147 , fDataAvailable(false)
148 , fBufferHeadPtr(fBuffer)
149 , fBufferTailPtr(fBuffer)
154 throw IOException("No URL supplied to CurlURLInputStream constructor.");
158 CurlURLInputStream::CurlURLInputStream(const XMLCh* url, string* cacheTag)
159 : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
160 , fCacheTag(cacheTag)
161 , fOpenSSLOps(SSL_OP_ALL|SSL_OP_NO_SSLv2)
169 , fDataAvailable(false)
170 , fBufferHeadPtr(fBuffer)
171 , fBufferTailPtr(fBuffer)
176 auto_ptr_char temp(url);
180 throw IOException("No URL supplied to CurlURLInputStream constructor.");
184 CurlURLInputStream::CurlURLInputStream(const DOMElement* e, string* cacheTag)
185 : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
186 , fCacheTag(cacheTag)
187 , fOpenSSLOps(SSL_OP_ALL|SSL_OP_NO_SSLv2)
195 , fDataAvailable(false)
196 , fBufferHeadPtr(fBuffer)
197 , fBufferTailPtr(fBuffer)
201 const XMLCh* attr = e->getAttributeNS(NULL, url);
202 if (!attr || !*attr) {
203 attr = e->getAttributeNS(NULL, uri);
205 throw IOException("No URL supplied via DOM to CurlURLInputStream constructor.");
208 auto_ptr_char temp(attr);
213 CurlURLInputStream::~CurlURLInputStream()
216 // Remove the easy handle from the multi stack
217 curl_multi_remove_handle(fMulti, fEasy);
219 // Cleanup the easy handle
220 curl_easy_cleanup(fEasy);
224 // Cleanup the multi handle
225 curl_multi_cleanup(fMulti);
229 curl_slist_free_all(fHeaders);
232 XMLString::release(&fContentType);
235 void CurlURLInputStream::init(const DOMElement* e)
237 // Allocate the curl multi handle
238 fMulti = curl_multi_init();
240 // Allocate the curl easy handle
241 fEasy = curl_easy_init();
243 if (!fMulti || !fEasy)
244 throw IOException("Failed to allocate libcurl handles.");
246 curl_easy_setopt(fEasy, CURLOPT_URL, fURL.c_str());
248 // Set up a way to recieve the data
249 curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function
250 curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function
253 curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, 1);
254 curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, 6);
257 curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT,10);
258 curl_easy_setopt(fEasy, CURLOPT_TIMEOUT,60);
259 curl_easy_setopt(fEasy, CURLOPT_HTTPAUTH,0);
260 curl_easy_setopt(fEasy, CURLOPT_USERPWD,NULL);
261 curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 2);
262 curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0);
263 curl_easy_setopt(fEasy, CURLOPT_CAINFO, NULL);
264 curl_easy_setopt(fEasy, CURLOPT_SSL_CIPHER_LIST, "ALL:!aNULL:!LOW:!EXPORT:!SSLv2");
265 curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1);
266 curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1);
267 curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1);
269 // Install SSL callback.
270 curl_easy_setopt(fEasy, CURLOPT_SSL_CTX_FUNCTION, ssl_ctx_callback);
271 curl_easy_setopt(fEasy, CURLOPT_SSL_CTX_DATA, this);
274 curl_easy_setopt(fEasy, CURLOPT_ERRORBUFFER, fError);
276 // Check for cache tag.
279 if (!fCacheTag->empty()) {
280 fHeaders = curl_slist_append(fHeaders, fCacheTag->c_str());
281 curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, fHeaders);
284 curl_easy_setopt(fEasy, CURLOPT_HEADERFUNCTION, curl_header_hook);
285 curl_easy_setopt(fEasy, CURLOPT_HEADERDATA, fCacheTag);
289 const XMLCh* flag = e->getAttributeNS(NULL, verifyHost);
290 if (flag && (*flag == chLatin_f || *flag == chDigit_0))
291 curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0);
293 // Process TransportOption elements.
295 DOMElement* child = XMLHelper::getLastChildElement(e, TransportOption);
297 if (child->hasChildNodes() && XMLString::equals(child->getAttributeNS(NULL,_provider), _OpenSSL)) {
298 auto_ptr_char option(child->getAttributeNS(NULL,_option));
299 auto_ptr_char value(child->getFirstChild()->getNodeValue());
300 if (option.get() && value.get() && !strcmp(option.get(), "SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION") &&
301 (*value.get()=='1' || *value.get()=='t')) {
302 // If the new option to enable buggy rengotiation is available, set it.
303 // Otherwise, signal false if this is newer than 0.9.8k, because that
304 // means it's 0.9.8l, which blocks renegotiation, and therefore will
305 // not honor this request. Older versions are buggy, so behave as though
306 // the flag was set anyway, so we signal true.
307 #if defined(SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION)
308 fOpenSSLOps |= SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION;
310 #elif (OPENSSL_VERSION_NUMBER > 0x009080bfL)
320 fLog.error("failed to set OpenSSL transport option (%s)", option.get());
322 else if (child->hasChildNodes() && XMLString::equals(child->getAttributeNS(NULL,_provider), _CURL)) {
323 auto_ptr_char option(child->getAttributeNS(NULL,_option));
324 auto_ptr_char value(child->getFirstChild()->getNodeValue());
325 if (option.get() && *option.get() && value.get() && *value.get()) {
326 // For libcurl, the option is an enum and the value type depends on the option.
327 CURLoption opt = static_cast<CURLoption>(strtol(option.get(), NULL, 10));
328 if (opt < CURLOPTTYPE_OBJECTPOINT)
329 success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), NULL, 10)) == CURLE_OK);
330 #ifdef CURLOPTTYPE_OFF_T
331 else if (opt < CURLOPTTYPE_OFF_T) {
332 fSavedOptions.push_back(value.get());
333 success = (curl_easy_setopt(fEasy, opt, fSavedOptions.back().c_str()) == CURLE_OK);
335 # ifdef HAVE_CURL_OFF_T
336 else if (sizeof(curl_off_t) == sizeof(long))
337 success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), NULL, 10)) == CURLE_OK);
339 else if (sizeof(off_t) == sizeof(long))
340 success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), NULL, 10)) == CURLE_OK);
346 fSavedOptions.push_back(value.get());
347 success = (curl_easy_setopt(fEasy, opt, fSavedOptions.back().c_str()) == CURLE_OK);
351 fLog.error("failed to set CURL transport option (%s)", option.get());
354 child = XMLHelper::getPreviousSiblingElement(child, TransportOption);
358 // Add easy handle to the multi stack
359 curl_multi_add_handle(fMulti, fEasy);
361 fLog.debug("libcurl trying to fetch %s", fURL.c_str());
363 // Start reading, to get the content type
364 while(fBufferHeadPtr == fBuffer) {
365 int runningHandles = 0;
367 readMore(&runningHandles);
369 catch (XMLException&) {
370 curl_multi_remove_handle(fMulti, fEasy);
371 curl_easy_cleanup(fEasy);
373 curl_multi_cleanup(fMulti);
377 if(runningHandles == 0) break;
380 // Check for a response code.
381 if (curl_easy_getinfo(fEasy, CURLINFO_RESPONSE_CODE, &fStatusCode) == CURLE_OK) {
382 if (fStatusCode >= 300 ) {
383 // Short-circuit usual processing by storing a special XML document in the buffer.
384 ostringstream specialdoc;
385 specialdoc << '<' << URLInputSource::asciiStatusCodeElementName << " xmlns=\"http://www.opensaml.org/xmltooling\">"
387 << "</" << URLInputSource::asciiStatusCodeElementName << '>';
388 string specialxml = specialdoc.str();
389 memcpy(fBuffer, specialxml.c_str(), specialxml.length());
390 fBufferHeadPtr += specialxml.length();
394 fStatusCode = 200; // reset to 200 to ensure no special processing occurs
397 // Find the content type
398 char* contentType8 = NULL;
399 if(curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8) == CURLE_OK && contentType8)
400 fContentType = XMLString::transcode(contentType8);
404 size_t CurlURLInputStream::staticWriteCallback(char* buffer, size_t size, size_t nitems, void* outstream)
406 return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
409 size_t CurlURLInputStream::writeCallback(char* buffer, size_t size, size_t nitems)
411 size_t cnt = size * nitems;
412 size_t totalConsumed = 0;
414 // Consume as many bytes as possible immediately into the buffer
415 size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
416 memcpy(fWritePtr, buffer, consume);
417 fWritePtr += consume;
418 fBytesRead += consume;
419 fTotalBytesRead += consume;
420 fBytesToRead -= consume;
422 //fLog.debug("write callback consuming %d bytes", consume);
424 // If bytes remain, rebuffer as many as possible into our holding buffer
426 totalConsumed += consume;
430 size_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);
431 consume = (cnt > bufAvail) ? bufAvail : cnt;
432 memcpy(fBufferHeadPtr, buffer, consume);
433 fBufferHeadPtr += consume;
435 totalConsumed += consume;
436 //fLog.debug("write callback rebuffering %d bytes", consume);
439 // Return the total amount we've consumed. If we don't consume all the bytes
440 // then an error will be generated. Since our buffer size is equal to the
441 // maximum size that curl will write, this should never happen unless there
442 // is a logic error somewhere here.
443 return totalConsumed;
446 bool CurlURLInputStream::readMore(int* runningHandles)
448 // Ask the curl to do some work
449 CURLMcode curlResult = curl_multi_perform(fMulti, runningHandles);
451 // Process messages from curl
453 for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )
455 //fLog.debug("msg %d, %d from curl", msg->msg, msg->data.result);
457 if (msg->msg != CURLMSG_DONE)
460 switch (msg->data.result)
463 // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
466 case CURLE_UNSUPPORTED_PROTOCOL:
467 ThrowXML(MalformedURLException, XMLExcepts::URL_UnsupportedProto);
470 case CURLE_COULDNT_RESOLVE_HOST:
471 case CURLE_COULDNT_RESOLVE_PROXY:
472 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURL.c_str());
475 case CURLE_COULDNT_CONNECT:
476 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str());
479 case CURLE_OPERATION_TIMEDOUT:
480 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str());
483 case CURLE_RECV_ERROR:
484 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURL.c_str());
488 fLog.error("error while fetching %s: (%d) %s", fURL.c_str(), msg->data.result, fError);
489 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURL.c_str());
494 // If nothing is running any longer, bail out
495 if(*runningHandles == 0)
498 // If there is no further data to read, and we haven't
499 // read any yet on this invocation, call select to wait for data
500 if (curlResult != CURLM_CALL_MULTI_PERFORM && fBytesRead == 0)
511 // Ask curl for the file descriptors to wait on
512 curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
514 // Wait on the file descriptors
518 select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
521 return curlResult == CURLM_CALL_MULTI_PERFORM;
524 xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
527 fBytesToRead = maxToRead;
530 for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
532 // First, any buffered data we have available
533 size_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
534 bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
537 memcpy(fWritePtr, fBufferTailPtr, bufCnt);
539 fBytesRead += bufCnt;
540 fTotalBytesRead += bufCnt;
541 fBytesToRead -= bufCnt;
543 fBufferTailPtr += bufCnt;
544 if (fBufferTailPtr == fBufferHeadPtr)
545 fBufferHeadPtr = fBufferTailPtr = fBuffer;
547 //fLog.debug("consuming %d buffered bytes", bufCnt);
553 // Check for a non-2xx status that means to ignore the curl response.
554 if (fStatusCode >= 300)
557 // Ask the curl to do some work
558 int runningHandles = 0;
559 tryAgain = readMore(&runningHandles);
561 // If nothing is running any longer, bail out
562 if (runningHandles == 0)