Add internal copy of the Xerces net accessor for libcurl, to get SSL support.
[shibboleth/cpp-xmltooling.git] / xmltooling / util / CurlURLInputStream.cpp
1 /*\r
2  * Licensed to the Apache Software Foundation (ASF) under one or more\r
3  * contributor license agreements.  See the NOTICE file distributed with\r
4  * this work for additional information regarding copyright ownership.\r
5  * The ASF licenses this file to You under the Apache License, Version 2.0\r
6  * (the "License"); you may not use this file except in compliance with\r
7  * the License.  You may obtain a copy of the License at\r
8  * \r
9  *      http://www.apache.org/licenses/LICENSE-2.0\r
10  * \r
11  * Unless required by applicable law or agreed to in writing, software\r
12  * distributed under the License is distributed on an "AS IS" BASIS,\r
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
14  * See the License for the specific language governing permissions and\r
15  * limitations under the License.\r
16  */\r
17 \r
18 /*\r
19  * $Id: CurlURLInputStream.cpp 471747 2006-11-06 14:31:56Z amassari $\r
20  */\r
21 \r
22 #include "internal.h"\r
23 #include "util/CurlURLInputStream.h"\r
24 \r
25 #include <stdio.h>\r
26 #include <stdlib.h>\r
27 #include <string.h>\r
28 #include <errno.h>\r
29 #ifdef HAVE_UNISTD_H\r
30 # include <unistd.h>\r
31 #endif\r
32 #include <sys/types.h>\r
33 \r
34 #include <xercesc/util/XercesDefs.hpp>\r
35 #include <xercesc/util/XMLNetAccessor.hpp>\r
36 #include <xercesc/util/XMLString.hpp>\r
37 #include <xercesc/util/XMLExceptMsgs.hpp>\r
38 #include <xercesc/util/Janitor.hpp>\r
39 #include <xercesc/util/XMLUniDefs.hpp>\r
40 #include <xercesc/util/TransService.hpp>\r
41 #include <xercesc/util/TranscodingException.hpp>\r
42 #include <xercesc/util/PlatformUtils.hpp>\r
43 \r
44 \r
45 using namespace xmltooling;\r
46 \r
47 CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)\r
48       : fMulti(0)\r
49       , fEasy(0)\r
50       , fMemoryManager(urlSource.getMemoryManager())\r
51       , fURLSource(urlSource)\r
52       , fURL(0)\r
53       , fTotalBytesRead(0)\r
54       , fWritePtr(0)\r
55       , fBytesRead(0)\r
56       , fBytesToRead(0)\r
57       , fDataAvailable(false)\r
58       , fBufferHeadPtr(fBuffer)\r
59       , fBufferTailPtr(fBuffer)\r
60 {\r
61         // Allocate the curl multi handle\r
62         fMulti = curl_multi_init();\r
63         \r
64         // Allocate the curl easy handle\r
65         fEasy = curl_easy_init();\r
66         \r
67         // Get the text of the URL we're going to use\r
68         fURL.reset(XMLString::transcode(fURLSource.getURLText(), fMemoryManager), fMemoryManager);\r
69 \r
70         //printf("Curl trying to fetch %s\n", fURL.get());\r
71 \r
72         // Set URL option\r
73         curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get());\r
74         curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this);                                               // Pass this pointer to write function\r
75         curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback);    // Our static write function\r
76         \r
77         // Add easy handle to the multi stack\r
78         curl_multi_add_handle(fMulti, fEasy);\r
79 }\r
80 \r
81 \r
82 CurlURLInputStream::~CurlURLInputStream()\r
83 {\r
84         // Remove the easy handle from the multi stack\r
85         curl_multi_remove_handle(fMulti, fEasy);\r
86         \r
87         // Cleanup the easy handle\r
88         curl_easy_cleanup(fEasy);\r
89         \r
90         // Cleanup the multi handle\r
91         curl_multi_cleanup(fMulti);\r
92 }\r
93 \r
94 \r
95 size_t\r
96 CurlURLInputStream::staticWriteCallback(char *buffer,\r
97                                       size_t size,\r
98                                       size_t nitems,\r
99                                       void *outstream)\r
100 {\r
101         return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);\r
102 }\r
103 \r
104 \r
105 \r
106 size_t\r
107 CurlURLInputStream::writeCallback(char *buffer,\r
108                                       size_t size,\r
109                                       size_t nitems)\r
110 {\r
111         XMLSize_t cnt = size * nitems;\r
112         XMLSize_t totalConsumed = 0;\r
113                 \r
114         // Consume as many bytes as possible immediately into the buffer\r
115         XMLSize_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;\r
116         memcpy(fWritePtr, buffer, consume);\r
117         fWritePtr               += consume;\r
118         fBytesRead              += consume;\r
119         fTotalBytesRead += consume;\r
120         fBytesToRead    -= consume;\r
121 \r
122         //printf("write callback consuming %d bytes\n", consume);\r
123 \r
124         // If bytes remain, rebuffer as many as possible into our holding buffer\r
125         buffer                  += consume;\r
126         totalConsumed   += consume;\r
127         cnt                             -= consume;\r
128         if (cnt > 0)\r
129         {\r
130                 XMLSize_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);\r
131                 consume = (cnt > bufAvail) ? bufAvail : cnt;\r
132                 memcpy(fBufferHeadPtr, buffer, consume);\r
133                 fBufferHeadPtr  += consume;\r
134                 buffer                  += consume;\r
135                 totalConsumed   += consume;\r
136                 //printf("write callback rebuffering %d bytes\n", consume);\r
137         }\r
138         \r
139         // Return the total amount we've consumed. If we don't consume all the bytes\r
140         // then an error will be generated. Since our buffer size is equal to the\r
141         // maximum size that curl will write, this should never happen unless there\r
142         // is a logic error somewhere here.\r
143         return totalConsumed;\r
144 }\r
145 \r
146 \r
147 \r
148 unsigned int\r
149 CurlURLInputStream::readBytes(XMLByte* const    toFill\r
150                                      , const unsigned int    maxToRead)\r
151 {\r
152         fBytesRead = 0;\r
153         fBytesToRead = maxToRead;\r
154         fWritePtr = toFill;\r
155         \r
156         for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )\r
157         {\r
158                 // First, any buffered data we have available\r
159                 XMLSize_t bufCnt = fBufferHeadPtr - fBufferTailPtr;\r
160                 bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;\r
161                 if (bufCnt > 0)\r
162                 {\r
163                         memcpy(fWritePtr, fBufferTailPtr, bufCnt);\r
164                         fWritePtr               += bufCnt;\r
165                         fBytesRead              += bufCnt;\r
166                         fTotalBytesRead += bufCnt;\r
167                         fBytesToRead    -= bufCnt;\r
168                         \r
169                         fBufferTailPtr  += bufCnt;\r
170                         if (fBufferTailPtr == fBufferHeadPtr)\r
171                                 fBufferHeadPtr = fBufferTailPtr = fBuffer;\r
172                                 \r
173                         //printf("consuming %d buffered bytes\n", bufCnt);\r
174 \r
175                         tryAgain = true;\r
176                         continue;\r
177                 }\r
178         \r
179                 // Ask the curl to do some work\r
180                 int runningHandles = 0;\r
181                 CURLMcode curlResult = curl_multi_perform(fMulti, &runningHandles);\r
182                 tryAgain = (curlResult == CURLM_CALL_MULTI_PERFORM);\r
183                 \r
184                 // Process messages from curl\r
185                 int msgsInQueue = 0;\r
186                 for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )\r
187                 {\r
188                         //printf("msg %d, %d from curl\n", msg->msg, msg->data.result);\r
189 \r
190                         if (msg->msg != CURLMSG_DONE)\r
191                                 continue;\r
192                                 \r
193                         switch (msg->data.result)\r
194                         {\r
195                         case CURLE_OK:\r
196                                 // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...\r
197                                 break;\r
198                                 \r
199                         case CURLE_UNSUPPORTED_PROTOCOL:\r
200                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);\r
201                 break;\r
202 \r
203             case CURLE_COULDNT_RESOLVE_HOST:\r
204             case CURLE_COULDNT_RESOLVE_PROXY:\r
205                 ThrowXMLwithMemMgr1(NetAccessorException,  XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager);\r
206                 break;\r
207                 \r
208             case CURLE_COULDNT_CONNECT:\r
209                 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager);\r
210                 \r
211             case CURLE_RECV_ERROR:\r
212                 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager);\r
213                 break;\r
214 \r
215             default:\r
216                 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager);\r
217                                 break;\r
218                         }\r
219                 }\r
220                 \r
221                 // If nothing is running any longer, bail out\r
222                 if (runningHandles == 0)\r
223                         break;\r
224                 \r
225                 // If there is no further data to read, and we haven't\r
226                 // read any yet on this invocation, call select to wait for data\r
227                 if (!tryAgain && fBytesRead == 0)\r
228                 {\r
229                         fd_set readSet[16];\r
230                         fd_set writeSet[16];\r
231                         fd_set exceptSet[16];\r
232                         int fdcnt = 16;\r
233                         \r
234                         // As curl for the file descriptors to wait on\r
235                         (void) curl_multi_fdset(fMulti, readSet, writeSet, exceptSet, &fdcnt);\r
236                         \r
237                         // Wait on the file descriptors\r
238                         timeval tv;\r
239                         tv.tv_sec  = 2;\r
240                         tv.tv_usec = 0;\r
241                         (void) select(fdcnt, readSet, writeSet, exceptSet, &tv);\r
242                 }\r
243         }\r
244         \r
245         return fBytesRead;\r
246 }\r