Multi-line svn commit, see body.
[shibboleth/cpp-xmltooling.git] / xmltooling / util / CurlURLInputStream.cpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  * 
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  * 
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 /*
19  * $Id$
20  */
21
22 #include "internal.h"
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <errno.h>
28 #ifdef HAVE_UNISTD_H
29 # include <unistd.h>
30 #endif
31 //#include <sys/types.h>
32 //#include <sys/time.h>
33
34 #include <xercesc/util/XercesDefs.hpp>
35 #include <xercesc/util/XMLNetAccessor.hpp>
36 #include <xercesc/util/XMLString.hpp>
37 #include <xercesc/util/XMLExceptMsgs.hpp>
38 #include <xercesc/util/Janitor.hpp>
39 #include <xercesc/util/XMLUniDefs.hpp>
40 #include <xercesc/util/TransService.hpp>
41 #include <xercesc/util/TranscodingException.hpp>
42 #include <xercesc/util/PlatformUtils.hpp>
43
44 #include <xmltooling/util/CurlURLInputStream.hpp>
45
46 using namespace xmltooling;
47
48
49 CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
50       : fMulti(0)
51       , fEasy(0)
52       , fMemoryManager(urlSource.getMemoryManager())
53       , fURLSource(urlSource)
54       , fURL(0)
55       , fTotalBytesRead(0)
56       , fWritePtr(0)
57       , fBytesRead(0)
58       , fBytesToRead(0)
59       , fDataAvailable(false)
60       , fBufferHeadPtr(fBuffer)
61       , fBufferTailPtr(fBuffer)
62 {
63         // Allocate the curl multi handle
64         fMulti = curl_multi_init();
65         
66         // Allocate the curl easy handle
67         fEasy = curl_easy_init();
68         
69         // Get the text of the URL we're going to use
70         fURL.reset(XMLString::transcode(fURLSource.getURLText(), fMemoryManager), fMemoryManager);
71
72         //printf("Curl trying to fetch %s\n", fURL.get());
73
74         // Set URL option
75         curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get());
76         curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this);                                               // Pass this pointer to write function
77         curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback);    // Our static write function
78         
79         // Add easy handle to the multi stack
80         curl_multi_add_handle(fMulti, fEasy);
81 }
82
83
84 CurlURLInputStream::~CurlURLInputStream()
85 {
86         // Remove the easy handle from the multi stack
87         curl_multi_remove_handle(fMulti, fEasy);
88         
89         // Cleanup the easy handle
90         curl_easy_cleanup(fEasy);
91         
92         // Cleanup the multi handle
93         curl_multi_cleanup(fMulti);
94 }
95
96
97 size_t
98 CurlURLInputStream::staticWriteCallback(char *buffer,
99                                       size_t size,
100                                       size_t nitems,
101                                       void *outstream)
102 {
103         return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
104 }
105
106
107
108 size_t
109 CurlURLInputStream::writeCallback(char *buffer,
110                                       size_t size,
111                                       size_t nitems)
112 {
113         size_t cnt = size * nitems;
114         size_t totalConsumed = 0;
115                 
116         // Consume as many bytes as possible immediately into the buffer
117         size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
118         memcpy(fWritePtr, buffer, consume);
119         fWritePtr               += consume;
120         fBytesRead              += consume;
121         fTotalBytesRead += consume;
122         fBytesToRead    -= consume;
123
124         //printf("write callback consuming %d bytes\n", consume);
125
126         // If bytes remain, rebuffer as many as possible into our holding buffer
127         buffer                  += consume;
128         totalConsumed   += consume;
129         cnt                             -= consume;
130         if (cnt > 0)
131         {
132                 size_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);
133                 consume = (cnt > bufAvail) ? bufAvail : cnt;
134                 memcpy(fBufferHeadPtr, buffer, consume);
135                 fBufferHeadPtr  += consume;
136                 buffer                  += consume;
137                 totalConsumed   += consume;
138                 //printf("write callback rebuffering %d bytes\n", consume);
139         }
140         
141         // Return the total amount we've consumed. If we don't consume all the bytes
142         // then an error will be generated. Since our buffer size is equal to the
143         // maximum size that curl will write, this should never happen unless there
144         // is a logic error somewhere here.
145         return totalConsumed;
146 }
147
148
149 unsigned int
150 CurlURLInputStream::readBytes(XMLByte* const          toFill
151                                      , const unsigned int maxToRead)
152 {
153         fBytesRead = 0;
154         fBytesToRead = maxToRead;
155         fWritePtr = toFill;
156         
157         for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
158         {
159                 // First, any buffered data we have available
160                 size_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
161                 bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
162                 if (bufCnt > 0)
163                 {
164                         memcpy(fWritePtr, fBufferTailPtr, bufCnt);
165                         fWritePtr               += bufCnt;
166                         fBytesRead              += bufCnt;
167                         fTotalBytesRead += bufCnt;
168                         fBytesToRead    -= bufCnt;
169                         
170                         fBufferTailPtr  += bufCnt;
171                         if (fBufferTailPtr == fBufferHeadPtr)
172                                 fBufferHeadPtr = fBufferTailPtr = fBuffer;
173                                 
174                         //printf("consuming %d buffered bytes\n", bufCnt);
175
176                         tryAgain = true;
177                         continue;
178                 }
179         
180                 // Ask the curl to do some work
181                 int runningHandles = 0;
182                 CURLMcode curlResult = curl_multi_perform(fMulti, &runningHandles);
183                 tryAgain = (curlResult == CURLM_CALL_MULTI_PERFORM);
184                 
185                 // Process messages from curl
186                 int msgsInQueue = 0;
187                 for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )
188                 {
189                         //printf("msg %d, %d from curl\n", msg->msg, msg->data.result);
190
191                         if (msg->msg != CURLMSG_DONE)
192                                 continue;
193                                 
194                         switch (msg->data.result)
195                         {
196                         case CURLE_OK:
197                                 // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
198                                 break;
199                                 
200                         case CURLE_UNSUPPORTED_PROTOCOL:
201                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);
202                 break;
203
204             case CURLE_COULDNT_RESOLVE_HOST:
205             case CURLE_COULDNT_RESOLVE_PROXY:
206                 ThrowXMLwithMemMgr1(NetAccessorException,  XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager);
207                 break;
208                 
209             case CURLE_COULDNT_CONNECT:
210                 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager);
211                 
212             case CURLE_RECV_ERROR:
213                 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager);
214                 break;
215
216             default:
217                 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager);
218                                 break;
219                         }
220                 }
221                 
222                 // If nothing is running any longer, bail out
223                 if (runningHandles == 0)
224                         break;
225                 
226                 // If there is no further data to read, and we haven't
227                 // read any yet on this invocation, call select to wait for data
228                 if (!tryAgain && fBytesRead == 0)
229                 {
230                         fd_set readSet[16];
231                         fd_set writeSet[16];
232                         fd_set exceptSet[16];
233                         int fdcnt = 16;
234                         
235                         // As curl for the file descriptors to wait on
236                         (void) curl_multi_fdset(fMulti, readSet, writeSet, exceptSet, &fdcnt);
237                         
238                         // Wait on the file descriptors
239                         timeval tv;
240                         tv.tv_sec  = 2;
241                         tv.tv_usec = 0;
242                         (void) select(fdcnt, readSet, writeSet, exceptSet, &tv);
243                 }
244         }
245         
246         return fBytesRead;
247 }
248