Add some logging.
[shibboleth/xmltooling.git] / xmltooling / util / CurlURLInputStream.cpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  * 
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  * 
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 /*
19  * $Id$
20  */
21
22 #include "internal.h"
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <errno.h>
28 #ifdef HAVE_UNISTD_H
29 # include <unistd.h>
30 #endif
31 //#include <sys/types.h>
32 //#include <sys/time.h>
33
34 #include <xercesc/util/XercesDefs.hpp>
35 #include <xercesc/util/XMLNetAccessor.hpp>
36 #include <xercesc/util/XMLString.hpp>
37 #include <xercesc/util/XMLExceptMsgs.hpp>
38 #include <xercesc/util/Janitor.hpp>
39 #include <xercesc/util/XMLUniDefs.hpp>
40 #include <xercesc/util/TransService.hpp>
41 #include <xercesc/util/TranscodingException.hpp>
42 #include <xercesc/util/PlatformUtils.hpp>
43
44 #include <xmltooling/util/CurlURLInputStream.hpp>
45
46 using namespace xmltooling;
47
48
49 CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
50       : fMulti(0)
51       , fEasy(0)
52       , fMemoryManager(urlSource.getMemoryManager())
53       , fURLSource(urlSource)
54       , fURL(0)
55       , fTotalBytesRead(0)
56       , fWritePtr(0)
57       , fBytesRead(0)
58       , fBytesToRead(0)
59       , fDataAvailable(false)
60       , fBufferHeadPtr(fBuffer)
61       , fBufferTailPtr(fBuffer)
62       , m_log(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.NetAccessor"))
63 {
64         // Allocate the curl multi handle
65         fMulti = curl_multi_init();
66         
67         // Allocate the curl easy handle
68         fEasy = curl_easy_init();
69         
70         // Get the text of the URL we're going to use
71         fURL.reset(XMLString::transcode(fURLSource.getURLText(), fMemoryManager), fMemoryManager);
72
73         m_log.debug("libcurl trying to fetch %s", fURL.get());
74
75         // Set URL option
76         curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get());
77         curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this);                                               // Pass this pointer to write function
78         curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback);    // Our static write function
79     curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 30);
80     curl_easy_setopt(fEasy, CURLOPT_TIMEOUT, 60);
81     curl_easy_setopt(fEasy, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3);
82     curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0);
83     curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0);
84     curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1);
85     curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1);
86     curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1);
87         
88         // Add easy handle to the multi stack
89         curl_multi_add_handle(fMulti, fEasy);
90 }
91
92
93 CurlURLInputStream::~CurlURLInputStream()
94 {
95         // Remove the easy handle from the multi stack
96         curl_multi_remove_handle(fMulti, fEasy);
97         
98         // Cleanup the easy handle
99         curl_easy_cleanup(fEasy);
100         
101         // Cleanup the multi handle
102         curl_multi_cleanup(fMulti);
103 }
104
105
106 size_t
107 CurlURLInputStream::staticWriteCallback(char *buffer,
108                                       size_t size,
109                                       size_t nitems,
110                                       void *outstream)
111 {
112         return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
113 }
114
115
116
117 size_t
118 CurlURLInputStream::writeCallback(char *buffer,
119                                       size_t size,
120                                       size_t nitems)
121 {
122         size_t cnt = size * nitems;
123         size_t totalConsumed = 0;
124                 
125         // Consume as many bytes as possible immediately into the buffer
126         size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
127         memcpy(fWritePtr, buffer, consume);
128         fWritePtr               += consume;
129         fBytesRead              += consume;
130         fTotalBytesRead += consume;
131         fBytesToRead    -= consume;
132
133         //m_log.debug("write callback consuming %d bytes", consume);
134
135         // If bytes remain, rebuffer as many as possible into our holding buffer
136         buffer                  += consume;
137         totalConsumed   += consume;
138         cnt                             -= consume;
139         if (cnt > 0)
140         {
141                 size_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);
142                 consume = (cnt > bufAvail) ? bufAvail : cnt;
143                 memcpy(fBufferHeadPtr, buffer, consume);
144                 fBufferHeadPtr  += consume;
145                 buffer                  += consume;
146                 totalConsumed   += consume;
147                 //m_log.debug("write callback rebuffering %d bytes", consume);
148         }
149         
150         // Return the total amount we've consumed. If we don't consume all the bytes
151         // then an error will be generated. Since our buffer size is equal to the
152         // maximum size that curl will write, this should never happen unless there
153         // is a logic error somewhere here.
154         return totalConsumed;
155 }
156
157
158 unsigned int
159 CurlURLInputStream::readBytes(XMLByte* const          toFill
160                                      , const unsigned int maxToRead)
161 {
162         fBytesRead = 0;
163         fBytesToRead = maxToRead;
164         fWritePtr = toFill;
165         
166         for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
167         {
168                 // First, any buffered data we have available
169                 size_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
170                 bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
171                 if (bufCnt > 0)
172                 {
173                         memcpy(fWritePtr, fBufferTailPtr, bufCnt);
174                         fWritePtr               += bufCnt;
175                         fBytesRead              += bufCnt;
176                         fTotalBytesRead += bufCnt;
177                         fBytesToRead    -= bufCnt;
178                         
179                         fBufferTailPtr  += bufCnt;
180                         if (fBufferTailPtr == fBufferHeadPtr)
181                                 fBufferHeadPtr = fBufferTailPtr = fBuffer;
182                                 
183                         //m_log.debug("consuming %d buffered bytes", bufCnt);
184
185                         tryAgain = true;
186                         continue;
187                 }
188         
189                 // Ask the curl to do some work
190                 int runningHandles = 0;
191                 CURLMcode curlResult = curl_multi_perform(fMulti, &runningHandles);
192         //m_log.debug("curl_multi_perform returned %d", curlResult);
193                 tryAgain = (curlResult == CURLM_CALL_MULTI_PERFORM);
194                 
195                 // Process messages from curl
196                 int msgsInQueue = 0;
197                 for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )
198                 {
199                         m_log.debug("msg %d, %d from curl", msg->msg, msg->data.result);
200
201                         if (msg->msg != CURLMSG_DONE)
202                                 continue;
203                                 
204                         switch (msg->data.result)
205                         {
206                         case CURLE_OK:
207                                 // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
208                                 break;
209                                 
210                         case CURLE_UNSUPPORTED_PROTOCOL:
211                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);
212                 break;
213
214             case CURLE_COULDNT_RESOLVE_HOST:
215             case CURLE_COULDNT_RESOLVE_PROXY:
216                 ThrowXMLwithMemMgr1(NetAccessorException,  XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager);
217                 break;
218                 
219             case CURLE_COULDNT_CONNECT:
220                 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager);
221                 
222             case CURLE_RECV_ERROR:
223                 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager);
224                 break;
225
226             default:
227                 m_log.error("curl NetAccessor encountered error from libcurl (%d)", msg->data.result);
228                 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager);
229                                 break;
230                         }
231                 }
232                 
233                 // If nothing is running any longer, bail out
234         if (runningHandles == 0) {
235             //m_log.debug("libcurl indicated no running handles");
236                         break;
237         }
238                 
239                 // If there is no further data to read, and we haven't
240                 // read any yet on this invocation, call select to wait for data
241                 if (!tryAgain && fBytesRead == 0)
242                 {
243                         fd_set readSet;
244                         fd_set writeSet;
245                         fd_set exceptSet;
246                         int fdcnt=0;
247                         
248                         // Ask curl for the file descriptors to wait on
249             FD_ZERO(&readSet);
250             FD_ZERO(&writeSet);
251             FD_ZERO(&exceptSet);
252                         (void) curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
253                         
254                         // Wait on the file descriptors
255                         timeval tv;
256                         tv.tv_sec  = 2;
257                         tv.tv_usec = 0;
258                         (void) select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
259                 }
260         }
261         
262     //m_log.debug("returning with %d bytes to parser", fBytesRead);
263         return fBytesRead;
264 }
265