From: cantor Date: Wed, 2 Jul 2008 03:13:34 +0000 (+0000) Subject: https://bugs.internet2.edu/jira/browse/CPPXT-8 X-Git-Tag: 1.4.1~323 X-Git-Url: http://www.project-moonshot.org/gitweb/?p=shibboleth%2Fxmltooling.git;a=commitdiff_plain;h=69ac0b0fdad43c448e1087dd17d2b84d810dce8c https://bugs.internet2.edu/jira/browse/CPPXT-8 git-svn-id: https://svn.middleware.georgetown.edu/cpp-xmltooling/branches/REL_1@506 de75baf8-a10c-0410-a50a-987c0e22f00f --- diff --git a/xmltooling/util/CurlURLInputStream.cpp b/xmltooling/util/CurlURLInputStream.cpp index ccf66a4..532d318 100644 --- a/xmltooling/util/CurlURLInputStream.cpp +++ b/xmltooling/util/CurlURLInputStream.cpp @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,15 +21,7 @@ #include "internal.h" -#include -#include -#include -#include -#ifdef HAVE_UNISTD_H -# include -#endif -//#include -//#include +#include #include #include @@ -47,218 +39,98 @@ using namespace xmltooling; CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/) - : fMulti(0) - , fEasy(0) - , fMemoryManager(urlSource.getMemoryManager()) + : fMemoryManager(urlSource.getMemoryManager()) , fURLSource(urlSource) , fURL(0) - , fTotalBytesRead(0) - , fWritePtr(0) - , fBytesRead(0) - , fBytesToRead(0) - , fDataAvailable(false) - , fBufferHeadPtr(fBuffer) - , fBufferTailPtr(fBuffer) + , fInputStream(NULL) , m_log(logging::Category::getInstance(XMLTOOLING_LOGCAT".libcurl.NetAccessor")) { - // Allocate the curl multi handle - fMulti = curl_multi_init(); - - // Allocate the curl easy handle - fEasy = curl_easy_init(); - // Get the text of the URL we're going to use fURL.reset(XMLString::transcode(fURLSource.getURLText(), fMemoryManager), fMemoryManager); - - m_log.debug("libcurl trying to fetch %s", fURL.get()); - - // Set URL option - curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get()); - curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function - curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function - curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 30); - curl_easy_setopt(fEasy, CURLOPT_TIMEOUT, 60); - curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0); - curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0); - curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1); - curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1); - curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1); - - // Add easy handle to the multi stack - curl_multi_add_handle(fMulti, fEasy); } CurlURLInputStream::~CurlURLInputStream() { - // Remove the easy handle from the multi stack - curl_multi_remove_handle(fMulti, fEasy); - - // Cleanup the easy handle - curl_easy_cleanup(fEasy); - - // Cleanup the multi handle - curl_multi_cleanup(fMulti); + delete fInputStream; } -size_t -CurlURLInputStream::staticWriteCallback(char *buffer, - size_t size, - size_t nitems, - void *outstream) +size_t CurlURLInputStream::staticWriteCallback(void* ptr, size_t size, size_t nmemb, void* stream) { - return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems); + size_t len = size*nmemb; + reinterpret_cast(stream)->write(reinterpret_cast(ptr),len); + return len; } - -size_t -CurlURLInputStream::writeCallback(char *buffer, - size_t size, - size_t nitems) +unsigned int CurlURLInputStream::readBytes(XMLByte* const toFill, const unsigned int maxToRead) { - size_t cnt = size * nitems; - size_t totalConsumed = 0; - - // Consume as many bytes as possible immediately into the buffer - size_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt; - memcpy(fWritePtr, buffer, consume); - fWritePtr += consume; - fBytesRead += consume; - fTotalBytesRead += consume; - fBytesToRead -= consume; - - //m_log.debug("write callback consuming %d bytes", consume); + if (!fInputStream) { + // Allocate the curl easy handle. + CURL* fEasy = curl_easy_init(); + if (!fEasy) + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, "unable to allocate libcurl handle", fMemoryManager); + + m_log.debug("libcurl trying to fetch %s", fURL.get()); + + // Set URL option + curl_easy_setopt(fEasy, CURLOPT_URL, fURL.get()); + curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, &fUnderlyingStream); + curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); + curl_easy_setopt(fEasy, CURLOPT_CONNECTTIMEOUT, 30); + curl_easy_setopt(fEasy, CURLOPT_TIMEOUT, 60); + curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYHOST, 0); + curl_easy_setopt(fEasy, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(fEasy, CURLOPT_NOPROGRESS, 1); + curl_easy_setopt(fEasy, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(fEasy, CURLOPT_FAILONERROR, 1); + + char curl_errorbuf[CURL_ERROR_SIZE]; + curl_errorbuf[0]=0; + curl_easy_setopt(fEasy,CURLOPT_ERRORBUFFER,curl_errorbuf); + + // Fetch the data. + if (curl_easy_perform(fEasy) != CURLE_OK) { + curl_easy_cleanup(fEasy); + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, curl_errorbuf, fMemoryManager); + } - // If bytes remain, rebuffer as many as possible into our holding buffer - buffer += consume; - totalConsumed += consume; - cnt -= consume; - if (cnt > 0) - { - size_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer); - consume = (cnt > bufAvail) ? bufAvail : cnt; - memcpy(fBufferHeadPtr, buffer, consume); - fBufferHeadPtr += consume; - buffer += consume; - totalConsumed += consume; - //m_log.debug("write callback rebuffering %d bytes", consume); - } - - // Return the total amount we've consumed. If we don't consume all the bytes - // then an error will be generated. Since our buffer size is equal to the - // maximum size that curl will write, this should never happen unless there - // is a logic error somewhere here. - return totalConsumed; -} + curl_easy_cleanup(fEasy); + /* + switch (msg->data.result) + { + case CURLE_OK: + // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below... + break; -unsigned int -CurlURLInputStream::readBytes(XMLByte* const toFill - , const unsigned int maxToRead) -{ - fBytesRead = 0; - fBytesToRead = maxToRead; - fWritePtr = toFill; - - for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); ) - { - // First, any buffered data we have available - size_t bufCnt = fBufferHeadPtr - fBufferTailPtr; - bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt; - if (bufCnt > 0) - { - memcpy(fWritePtr, fBufferTailPtr, bufCnt); - fWritePtr += bufCnt; - fBytesRead += bufCnt; - fTotalBytesRead += bufCnt; - fBytesToRead -= bufCnt; - - fBufferTailPtr += bufCnt; - if (fBufferTailPtr == fBufferHeadPtr) - fBufferHeadPtr = fBufferTailPtr = fBuffer; - - //m_log.debug("consuming %d buffered bytes", bufCnt); + case CURLE_UNSUPPORTED_PROTOCOL: + ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager); + break; - tryAgain = true; - continue; - } - - // Ask the curl to do some work - int runningHandles = 0; - CURLMcode curlResult = curl_multi_perform(fMulti, &runningHandles); - //m_log.debug("curl_multi_perform returned %d", curlResult); - tryAgain = (curlResult == CURLM_CALL_MULTI_PERFORM); - - // Process messages from curl - int msgsInQueue = 0; - for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; ) - { - m_log.debug("msg %d, %d from curl", msg->msg, msg->data.result); + case CURLE_COULDNT_RESOLVE_HOST: + case CURLE_COULDNT_RESOLVE_PROXY: + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager); + break; - if (msg->msg != CURLMSG_DONE) - continue; - - switch (msg->data.result) - { - case CURLE_OK: - // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below... - break; - - case CURLE_UNSUPPORTED_PROTOCOL: - ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager); - break; + case CURLE_COULDNT_CONNECT: + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager); - case CURLE_COULDNT_RESOLVE_HOST: - case CURLE_COULDNT_RESOLVE_PROXY: - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager); - break; - - case CURLE_COULDNT_CONNECT: - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager); - - case CURLE_RECV_ERROR: - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager); - break; + case CURLE_RECV_ERROR: + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager); + break; - default: - m_log.error("curl NetAccessor encountered error from libcurl (%d)", msg->data.result); - ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager); - break; - } - } - - // If nothing is running any longer, bail out - if (runningHandles == 0) { - //m_log.debug("libcurl indicated no running handles"); - break; + default: + m_log.error("curl NetAccessor encountered error from libcurl (%d)", msg->data.result); + ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager); + break; } - - // If there is no further data to read, and we haven't - // read any yet on this invocation, call select to wait for data - if (!tryAgain && fBytesRead == 0) - { - fd_set readSet; - fd_set writeSet; - fd_set exceptSet; - int fdcnt=0; - - // Ask curl for the file descriptors to wait on - FD_ZERO(&readSet); - FD_ZERO(&writeSet); - FD_ZERO(&exceptSet); - (void) curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt); - - // Wait on the file descriptors - timeval tv; - tv.tv_sec = 2; - tv.tv_usec = 0; - (void) select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv); - } - } - - //m_log.debug("returning with %d bytes to parser", fBytesRead); - return fBytesRead; -} + */ + fInputStream = new (fMemoryManager) StreamInputSource::StreamBinInputStream(fUnderlyingStream); + } + + // Defer to the stream wrapper. + return fInputStream->readBytes(toFill, maxToRead); +} diff --git a/xmltooling/util/CurlURLInputStream.hpp b/xmltooling/util/CurlURLInputStream.hpp index 991f775..cd6106a 100644 --- a/xmltooling/util/CurlURLInputStream.hpp +++ b/xmltooling/util/CurlURLInputStream.hpp @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,10 +23,9 @@ #define XERCESC_INCLUDE_GUARD_CURLURLINPUTSTREAM_HPP #include +#include -#include -#include -#include +#include #include #include @@ -48,12 +47,7 @@ public : ~CurlURLInputStream(); unsigned int curPos() const; - unsigned int readBytes - ( - XMLByte* const toFill - , const unsigned int maxToRead - ); - + unsigned int readBytes(XMLByte* const toFill, const unsigned int maxToRead); private : // ----------------------------------------------------------------------- @@ -61,65 +55,24 @@ private : // ----------------------------------------------------------------------- CurlURLInputStream(const CurlURLInputStream&); CurlURLInputStream& operator=(const CurlURLInputStream&); - - static size_t staticWriteCallback(char *buffer, - size_t size, - size_t nitems, - void *outstream); - size_t writeCallback( char *buffer, - size_t size, - size_t nitems); + static size_t staticWriteCallback(void* ptr, size_t size, size_t nmemb, void* stream); - // ----------------------------------------------------------------------- - // Private data members - // - // fSocket - // The socket representing the connection to the remote file. - // fBytesProcessed - // Its a rolling count of the number of bytes processed off this - // input stream. - // fBuffer - // Holds the http header, plus the first part of the actual - // data. Filled at the time the stream is opened, data goes - // out to user in response to readBytes(). - // fBufferPos, fBufferEnd - // Pointers into fBuffer, showing start and end+1 of content - // that readBytes must return. - // ----------------------------------------------------------------------- - - CURLM* fMulti; - CURL* fEasy; - + std::stringstream fUnderlyingStream; MemoryManager* fMemoryManager; - XMLURL fURLSource; ArrayJanitor fURL; - - unsigned long fTotalBytesRead; - XMLByte* fWritePtr; - unsigned long fBytesRead; - unsigned long fBytesToRead; - bool fDataAvailable; - - // Overflow buffer for when curl writes more data to us - // than we've asked for. - XMLByte fBuffer[CURL_MAX_WRITE_SIZE]; - XMLByte* fBufferHeadPtr; - XMLByte* fBufferTailPtr; - + StreamInputSource::StreamBinInputStream* fInputStream; logging::Category& m_log; - + }; // CurlURLInputStream -inline unsigned int -CurlURLInputStream::curPos() const +inline unsigned int CurlURLInputStream::curPos() const { - return fTotalBytesRead; + return fInputStream ? fInputStream->curPos() : 0; } }; #endif // CURLURLINPUTSTREAM_HPP -