https://bugs.internet2.edu/jira/browse/CPPXT-8
authorcantor <cantor@de75baf8-a10c-0410-a50a-987c0e22f00f>
Wed, 2 Jul 2008 03:13:34 +0000 (03:13 +0000)
committercantor <cantor@de75baf8-a10c-0410-a50a-987c0e22f00f>
Wed, 2 Jul 2008 03:13:34 +0000 (03:13 +0000)
git-svn-id: https://svn.middleware.georgetown.edu/cpp-xmltooling/branches/REL_1@506 de75baf8-a10c-0410-a50a-987c0e22f00f

xmltooling/util/CurlURLInputStream.cpp
xmltooling/util/CurlURLInputStream.hpp

index ccf66a4..532d318 100644 (file)
@@ -5,9 +5,9 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 
 #include "internal.h"
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif
-//#include <sys/types.h>
-//#include <sys/time.h>
+#include <curl/curl.h>
 
 #include <xercesc/util/XercesDefs.hpp>
 #include <xercesc/util/XMLNetAccessor.hpp>
@@ -47,218 +39,98 @@ using namespace xmltooling;
 
 
 CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
-      : fMulti(0)
-      , fEasy(0)
-      , fMemoryManager(urlSource.getMemoryManager())
+      : fMemoryManager(urlSource.getMemoryManager())
       , fURLSource(urlSource)
       , fURL(0)
-      , fTotalBytesRead(0)
-      , fWritePtr(0)
-      , fBytesRead(0)
-      , fBytesToRead(0)
-      , fDataAvailable(false)
-      , fBufferHeadPtr(fBuffer)
-      , fBufferTailPtr(fBuffer)
+      , fInputStream(NULL)
       , m_log(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.NetAccessor"))
 {
-       // Allocate the curl multi handle
-       fMulti = curl_multi_init();
-       
-       // Allocate the curl easy handle
-       fEasy = curl_easy_init();
-       
        // Get the text of the URL we're going to use
        fURL.reset(XMLString::transcode(fURLSource.getURLText(), fMemoryManager), fMemoryManager);
-
-       m_log.debug("libcurl trying to fetch %s", fURL.get());
-
-       // Set URL option
-       curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get());
-       curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this);                                               // Pass this pointer to write function
-       curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback);    // Our static write function
-    curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 30);
-    curl_easy_setopt(fEasy, CURLOPT_TIMEOUT, 60);
-    curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0);
-    curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0);
-    curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1);
-    curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1);
-    curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1);
-       
-       // Add easy handle to the multi stack
-       curl_multi_add_handle(fMulti, fEasy);
 }
 
 
 CurlURLInputStream::~CurlURLInputStream()
 {
-       // Remove the easy handle from the multi stack
-       curl_multi_remove_handle(fMulti, fEasy);
-       
-       // Cleanup the easy handle
-       curl_easy_cleanup(fEasy);
-       
-       // Cleanup the multi handle
-       curl_multi_cleanup(fMulti);
+    delete fInputStream;
 }
 
 
-size_t
-CurlURLInputStream::staticWriteCallback(char *buffer,
-                                      size_t size,
-                                      size_t nitems,
-                                      void *outstream)
+size_t CurlURLInputStream::staticWriteCallback(void* ptr, size_t size, size_t nmemb, void* stream)
 {
-       return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
+    size_t len = size*nmemb;
+    reinterpret_cast<std::stringstream*>(stream)->write(reinterpret_cast<const char*>(ptr),len);
+    return len;
 }
 
 
-
-size_t
-CurlURLInputStream::writeCallback(char *buffer,
-                                      size_t size,
-                                      size_t nitems)
+unsigned int CurlURLInputStream::readBytes(XMLByte* const toFill, const unsigned int maxToRead)
 {
-       size_t cnt = size * nitems;
-       size_t totalConsumed = 0;
-               
-       // Consume as many bytes as possible immediately into the buffer
-       size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
-       memcpy(fWritePtr, buffer, consume);
-       fWritePtr               += consume;
-       fBytesRead              += consume;
-       fTotalBytesRead += consume;
-       fBytesToRead    -= consume;
-
-       //m_log.debug("write callback consuming %d bytes", consume);
+    if (!fInputStream) {
+        // Allocate the curl easy handle.
+        CURL* fEasy = curl_easy_init();
+        if (!fEasy)
+            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, "unable to allocate libcurl handle", fMemoryManager);
+
+        m_log.debug("libcurl trying to fetch %s", fURL.get());
+
+        // Set URL option
+        curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get());
+        curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, &fUnderlyingStream);
+        curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback);
+        curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 30);
+        curl_easy_setopt(fEasy, CURLOPT_TIMEOUT, 60);
+        curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0);
+        curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0);
+        curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1);
+        curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1);
+        curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1);
+
+        char curl_errorbuf[CURL_ERROR_SIZE];
+        curl_errorbuf[0]=0;
+        curl_easy_setopt(fEasy,CURLOPT_ERRORBUFFER,curl_errorbuf);
+
+        // Fetch the data.
+        if (curl_easy_perform(fEasy) != CURLE_OK) {
+            curl_easy_cleanup(fEasy);
+            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, curl_errorbuf, fMemoryManager);
+        }
 
-       // If bytes remain, rebuffer as many as possible into our holding buffer
-       buffer                  += consume;
-       totalConsumed   += consume;
-       cnt                             -= consume;
-       if (cnt > 0)
-       {
-               size_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);
-               consume = (cnt > bufAvail) ? bufAvail : cnt;
-               memcpy(fBufferHeadPtr, buffer, consume);
-               fBufferHeadPtr  += consume;
-               buffer                  += consume;
-               totalConsumed   += consume;
-               //m_log.debug("write callback rebuffering %d bytes", consume);
-       }
-       
-       // Return the total amount we've consumed. If we don't consume all the bytes
-       // then an error will be generated. Since our buffer size is equal to the
-       // maximum size that curl will write, this should never happen unless there
-       // is a logic error somewhere here.
-       return totalConsumed;
-}
+        curl_easy_cleanup(fEasy);
 
+        /*
+        switch (msg->data.result)
+        {
+        case CURLE_OK:
+            // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
+            break;
 
-unsigned int
-CurlURLInputStream::readBytes(XMLByte* const          toFill
-                                     , const unsigned int maxToRead)
-{
-       fBytesRead = 0;
-       fBytesToRead = maxToRead;
-       fWritePtr = toFill;
-       
-       for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
-       {
-               // First, any buffered data we have available
-               size_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
-               bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
-               if (bufCnt > 0)
-               {
-                       memcpy(fWritePtr, fBufferTailPtr, bufCnt);
-                       fWritePtr               += bufCnt;
-                       fBytesRead              += bufCnt;
-                       fTotalBytesRead += bufCnt;
-                       fBytesToRead    -= bufCnt;
-                       
-                       fBufferTailPtr  += bufCnt;
-                       if (fBufferTailPtr == fBufferHeadPtr)
-                               fBufferHeadPtr = fBufferTailPtr = fBuffer;
-                               
-                       //m_log.debug("consuming %d buffered bytes", bufCnt);
+        case CURLE_UNSUPPORTED_PROTOCOL:
+            ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);
+            break;
 
-                       tryAgain = true;
-                       continue;
-               }
-       
-               // Ask the curl to do some work
-               int runningHandles = 0;
-               CURLMcode curlResult = curl_multi_perform(fMulti, &runningHandles);
-        //m_log.debug("curl_multi_perform returned %d", curlResult);
-               tryAgain = (curlResult == CURLM_CALL_MULTI_PERFORM);
-               
-               // Process messages from curl
-               int msgsInQueue = 0;
-               for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )
-               {
-                       m_log.debug("msg %d, %d from curl", msg->msg, msg->data.result);
+        case CURLE_COULDNT_RESOLVE_HOST:
+        case CURLE_COULDNT_RESOLVE_PROXY:
+            ThrowXMLwithMemMgr1(NetAccessorException,  XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager);
+            break;
 
-                       if (msg->msg != CURLMSG_DONE)
-                               continue;
-                               
-                       switch (msg->data.result)
-                       {
-                       case CURLE_OK:
-                               // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
-                               break;
-                               
-                       case CURLE_UNSUPPORTED_PROTOCOL:
-                ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);
-                break;
+        case CURLE_COULDNT_CONNECT:
+            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager);
 
-            case CURLE_COULDNT_RESOLVE_HOST:
-            case CURLE_COULDNT_RESOLVE_PROXY:
-                ThrowXMLwithMemMgr1(NetAccessorException,  XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager);
-                break;
-                
-            case CURLE_COULDNT_CONNECT:
-                ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager);
-               
-            case CURLE_RECV_ERROR:
-                ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager);
-                break;
+        case CURLE_RECV_ERROR:
+            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager);
+            break;
 
-            default:
-                m_log.error("curl NetAccessor encountered error from libcurl (%d)", msg->data.result);
-                ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager);
-                               break;
-                       }
-               }
-               
-               // If nothing is running any longer, bail out
-        if (runningHandles == 0) {
-            //m_log.debug("libcurl indicated no running handles");
-                       break;
+        default:
+            m_log.error("curl NetAccessor encountered error from libcurl (%d)", msg->data.result);
+            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager);
+            break;
         }
-               
-               // If there is no further data to read, and we haven't
-               // read any yet on this invocation, call select to wait for data
-               if (!tryAgain && fBytesRead == 0)
-               {
-                       fd_set readSet;
-                       fd_set writeSet;
-                       fd_set exceptSet;
-                       int fdcnt=0;
-                       
-                       // Ask curl for the file descriptors to wait on
-            FD_ZERO(&readSet);
-            FD_ZERO(&writeSet);
-            FD_ZERO(&exceptSet);
-                       (void) curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
-                       
-                       // Wait on the file descriptors
-                       timeval tv;
-                       tv.tv_sec  = 2;
-                       tv.tv_usec = 0;
-                       (void) select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
-               }
-       }
-       
-    //m_log.debug("returning with %d bytes to parser", fBytesRead);
-       return fBytesRead;
-}
+        */
 
+        fInputStream = new (fMemoryManager) StreamInputSource::StreamBinInputStream(fUnderlyingStream);
+    }
+
+    // Defer to the stream wrapper.
+    return fInputStream->readBytes(toFill, maxToRead);
+}
index 991f775..cd6106a 100644 (file)
@@ -5,9 +5,9 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- * 
+ *
  *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #define XERCESC_INCLUDE_GUARD_CURLURLINPUTSTREAM_HPP
 
 #include <xmltooling/logging.h>
+#include <xmltooling/util/ParserPool.h>
 
-#include <curl/curl.h>
-#include <curl/multi.h>
-#include <curl/easy.h>
+#include <sstream>
 
 #include <xercesc/util/XMLURL.hpp>
 #include <xercesc/util/XMLExceptMsgs.hpp>
@@ -48,12 +47,7 @@ public :
     ~CurlURLInputStream();
 
     unsigned int curPos() const;
-    unsigned int readBytes
-    (
-                XMLByte* const  toFill
-        , const unsigned int       maxToRead
-    );
-
+    unsigned int readBytes(XMLByte* const toFill, const unsigned int maxToRead);
 
 private :
     // -----------------------------------------------------------------------
@@ -61,65 +55,24 @@ private :
     // -----------------------------------------------------------------------
     CurlURLInputStream(const CurlURLInputStream&);
     CurlURLInputStream& operator=(const CurlURLInputStream&);
-    
-    static size_t staticWriteCallback(char *buffer,
-                                      size_t size,
-                                      size_t nitems,
-                                      void *outstream);
-    size_t writeCallback(                        char *buffer,
-                                      size_t size,
-                                      size_t nitems);
 
+    static size_t staticWriteCallback(void* ptr, size_t size, size_t nmemb, void* stream);
 
-    // -----------------------------------------------------------------------
-    //  Private data members
-    //
-    //  fSocket
-    //      The socket representing the connection to the remote file.
-    //  fBytesProcessed
-    //      Its a rolling count of the number of bytes processed off this
-    //      input stream.
-    //  fBuffer
-    //      Holds the http header, plus the first part of the actual
-    //      data.  Filled at the time the stream is opened, data goes
-    //      out to user in response to readBytes().
-    //  fBufferPos, fBufferEnd
-    //      Pointers into fBuffer, showing start and end+1 of content
-    //      that readBytes must return.
-    // -----------------------------------------------------------------------
-       
-    CURLM*                             fMulti;
-    CURL*                              fEasy;
-    
+    std::stringstream   fUnderlyingStream;
     MemoryManager*      fMemoryManager;
-    
     XMLURL                             fURLSource;
     ArrayJanitor<char> fURL;
-    
-    unsigned long       fTotalBytesRead;
-    XMLByte*                   fWritePtr;
-    unsigned long              fBytesRead;
-    unsigned long              fBytesToRead;
-    bool                               fDataAvailable;
-    
-    // Overflow buffer for when curl writes more data to us
-    // than we've asked for.
-    XMLByte                            fBuffer[CURL_MAX_WRITE_SIZE];
-    XMLByte*                   fBufferHeadPtr;
-    XMLByte*                   fBufferTailPtr;
-
+    StreamInputSource::StreamBinInputStream* fInputStream;
     logging::Category&  m_log;
-    
+
 }; // CurlURLInputStream
 
 
-inline unsigned int
-CurlURLInputStream::curPos() const
+inline unsigned int CurlURLInputStream::curPos() const
 {
-    return fTotalBytesRead;
+    return fInputStream ? fInputStream->curPos() : 0;
 }
 
 };
 
 #endif // CURLURLINPUTSTREAM_HPP
-