2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 * xmltooling/util/CurlURLInputStream.cpp
21 * Asynchronous use of curl to fetch data from a URL.
26 #include <xmltooling/util/CurlURLInputStream.h>
27 #include <xmltooling/util/XMLHelper.h>
29 #include <xercesc/util/XercesDefs.hpp>
30 #include <xercesc/util/XMLNetAccessor.hpp>
31 #include <xercesc/util/XMLString.hpp>
32 #include <xercesc/util/XMLExceptMsgs.hpp>
33 #include <xercesc/util/Janitor.hpp>
34 #include <xercesc/util/XMLUniDefs.hpp>
35 #include <xercesc/util/TransService.hpp>
36 #include <xercesc/util/TranscodingException.hpp>
37 #include <xercesc/util/PlatformUtils.hpp>
39 using namespace xmltooling;
40 using namespace xercesc;
43 static const XMLCh _CURL[] = UNICODE_LITERAL_4(C,U,R,L);
44 static const XMLCh _option[] = UNICODE_LITERAL_6(o,p,t,i,o,n);
45 static const XMLCh _provider[] = UNICODE_LITERAL_8(p,r,o,v,i,d,e,r);
46 static const XMLCh TransportOption[] = UNICODE_LITERAL_15(T,r,a,n,s,p,o,r,t,O,p,t,i,o,n);
47 static const XMLCh uri[] = UNICODE_LITERAL_3(u,r,i);
48 static const XMLCh url[] = UNICODE_LITERAL_3(u,r,l);
49 static const XMLCh verifyHost[] = UNICODE_LITERAL_10(v,e,r,i,f,y,H,o,s,t);
52 CurlURLInputStream::CurlURLInputStream(const char* url)
53 : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
61 , fDataAvailable(false)
62 , fBufferHeadPtr(fBuffer)
63 , fBufferTailPtr(fBuffer)
69 CurlURLInputStream::CurlURLInputStream(const XMLCh* url)
70 : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
77 , fDataAvailable(false)
78 , fBufferHeadPtr(fBuffer)
79 , fBufferTailPtr(fBuffer)
82 auto_ptr_char temp(url);
87 CurlURLInputStream::CurlURLInputStream(const DOMElement* e)
88 : fLog(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.InputStream"))
95 , fDataAvailable(false)
96 , fBufferHeadPtr(fBuffer)
97 , fBufferTailPtr(fBuffer)
100 const XMLCh* attr = e->getAttributeNS(NULL, url);
101 if (!attr || !*attr) {
102 attr = e->getAttributeNS(NULL, uri);
104 throw IOException("No URL supplied via DOM to CurlURLInputStream constructor.");
107 auto_ptr_char temp(attr);
112 CurlURLInputStream::~CurlURLInputStream()
115 // Remove the easy handle from the multi stack
116 curl_multi_remove_handle(fMulti, fEasy);
118 // Cleanup the easy handle
119 curl_easy_cleanup(fEasy);
123 // Cleanup the multi handle
124 curl_multi_cleanup(fMulti);
127 XMLString::release(&fContentType);
130 void CurlURLInputStream::init(const DOMElement* e)
132 // Allocate the curl multi handle
133 fMulti = curl_multi_init();
135 // Allocate the curl easy handle
136 fEasy = curl_easy_init();
138 if (!fMulti || !fEasy)
139 throw IOException("Failed to allocate libcurl handles.");
141 curl_easy_setopt(fEasy, CURLOPT_URL, fURL.c_str());
143 // Set up a way to recieve the data
144 curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function
145 curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function
148 curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, 1);
149 curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, 6);
152 curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT,15);
153 curl_easy_setopt(fEasy, CURLOPT_TIMEOUT,30);
154 curl_easy_setopt(fEasy, CURLOPT_HTTPAUTH,0);
155 curl_easy_setopt(fEasy, CURLOPT_USERPWD,NULL);
156 curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 2);
157 curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0);
158 curl_easy_setopt(fEasy, CURLOPT_SSL_CIPHER_LIST, "ALL:!aNULL:!LOW:!EXPORT:!SSLv2");
159 curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1);
160 curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1);
161 curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1);
164 curl_easy_setopt(fEasy, CURLOPT_ERRORBUFFER, fError);
167 const XMLCh* flag = e->getAttributeNS(NULL, verifyHost);
168 if (flag && (*flag == chLatin_f || *flag == chDigit_0))
169 curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 2);
171 // Process TransportOption elements.
173 DOMElement* child = XMLHelper::getLastChildElement(e, TransportOption);
175 if (child->hasChildNodes() && XMLString::equals(child->getAttributeNS(NULL,_provider), _CURL)) {
176 auto_ptr_char option(child->getAttributeNS(NULL,_option));
177 auto_ptr_char value(child->getFirstChild()->getNodeValue());
178 if (option.get() && *option.get() && value.get() && *value.get()) {
179 // For libcurl, the option is an enum and the value type depends on the option.
180 CURLoption opt = static_cast<CURLoption>(strtol(option.get(), NULL, 10));
181 if (opt < CURLOPTTYPE_OBJECTPOINT)
182 success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), NULL, 10)) == CURLE_OK);
183 #ifdef CURLOPTTYPE_OFF_T
184 else if (opt < CURLOPTTYPE_OFF_T)
185 success = (curl_easy_setopt(fEasy, opt, value.get()) == CURLE_OK);
186 else if (sizeof(curl_off_t) == sizeof(long))
187 success = (curl_easy_setopt(fEasy, opt, strtol(value.get(), NULL, 10)) == CURLE_OK);
192 success = (curl_easy_setopt(fEasy, opt, value.get()) == CURLE_OK);
195 fLog.error("failed to set transport option (%s)", option.get());
198 child = XMLHelper::getPreviousSiblingElement(child, TransportOption);
202 // Add easy handle to the multi stack
203 curl_multi_add_handle(fMulti, fEasy);
205 fLog.debug("libcurl trying to fetch %s", fURL.c_str());
207 // Start reading, to get the content type
208 while(fBufferHeadPtr == fBuffer) {
209 int runningHandles = 0;
211 readMore(&runningHandles);
213 catch (XMLException& ex) {
214 curl_multi_remove_handle(fMulti, fEasy);
215 curl_easy_cleanup(fEasy);
217 curl_multi_cleanup(fMulti);
219 auto_ptr_char msg(ex.getMessage());
220 throw IOException(msg.get());
222 if(runningHandles == 0) break;
225 // Find the content type
226 char* contentType8 = NULL;
227 curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8);
229 fContentType = XMLString::transcode(contentType8);
233 size_t CurlURLInputStream::staticWriteCallback(char* buffer, size_t size, size_t nitems, void* outstream)
235 return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
238 size_t CurlURLInputStream::writeCallback(char* buffer, size_t size, size_t nitems)
240 size_t cnt = size * nitems;
241 size_t totalConsumed = 0;
243 // Consume as many bytes as possible immediately into the buffer
244 size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
245 memcpy(fWritePtr, buffer, consume);
246 fWritePtr += consume;
247 fBytesRead += consume;
248 fTotalBytesRead += consume;
249 fBytesToRead -= consume;
251 //fLog.debug("write callback consuming %d bytes", consume);
253 // If bytes remain, rebuffer as many as possible into our holding buffer
255 totalConsumed += consume;
259 size_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);
260 consume = (cnt > bufAvail) ? bufAvail : cnt;
261 memcpy(fBufferHeadPtr, buffer, consume);
262 fBufferHeadPtr += consume;
264 totalConsumed += consume;
265 //fLog.debug("write callback rebuffering %d bytes", consume);
268 // Return the total amount we've consumed. If we don't consume all the bytes
269 // then an error will be generated. Since our buffer size is equal to the
270 // maximum size that curl will write, this should never happen unless there
271 // is a logic error somewhere here.
272 return totalConsumed;
275 bool CurlURLInputStream::readMore(int* runningHandles)
277 // Ask the curl to do some work
278 CURLMcode curlResult = curl_multi_perform(fMulti, runningHandles);
280 // Process messages from curl
282 for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )
284 //fLog.debug("msg %d, %d from curl", msg->msg, msg->data.result);
286 if (msg->msg != CURLMSG_DONE)
289 switch (msg->data.result)
292 // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
295 case CURLE_UNSUPPORTED_PROTOCOL:
296 ThrowXML(MalformedURLException, XMLExcepts::URL_UnsupportedProto);
299 case CURLE_COULDNT_RESOLVE_HOST:
300 case CURLE_COULDNT_RESOLVE_PROXY:
301 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURL.c_str());
304 case CURLE_COULDNT_CONNECT:
305 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURL.c_str());
308 case CURLE_RECV_ERROR:
309 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURL.c_str());
313 fLog.error("error while fetching %s: (%d) %s", fURL.c_str(), msg->data.result, fError);
314 ThrowXML1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURL.c_str());
319 // If nothing is running any longer, bail out
320 if(*runningHandles == 0)
323 // If there is no further data to read, and we haven't
324 // read any yet on this invocation, call select to wait for data
325 if (curlResult != CURLM_CALL_MULTI_PERFORM && fBytesRead == 0)
336 // Ask curl for the file descriptors to wait on
337 curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
339 // Wait on the file descriptors
343 select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
346 return curlResult == CURLM_CALL_MULTI_PERFORM;
349 xsecsize_t CurlURLInputStream::readBytes(XMLByte* const toFill, const xsecsize_t maxToRead)
352 fBytesToRead = maxToRead;
355 for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
357 // First, any buffered data we have available
358 size_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
359 bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
362 memcpy(fWritePtr, fBufferTailPtr, bufCnt);
364 fBytesRead += bufCnt;
365 fTotalBytesRead += bufCnt;
366 fBytesToRead -= bufCnt;
368 fBufferTailPtr += bufCnt;
369 if (fBufferTailPtr == fBufferHeadPtr)
370 fBufferHeadPtr = fBufferTailPtr = fBuffer;
372 //fLog.debug("consuming %d buffered bytes", bufCnt);
378 // Ask the curl to do some work
379 int runningHandles = 0;
380 tryAgain = readMore(&runningHandles);
382 // If nothing is running any longer, bail out
383 if (runningHandles == 0)