diff --git a/eventmachine_httpserver.gemspec b/eventmachine_httpserver.gemspec index dbbae00..ec761c6 100644 --- a/eventmachine_httpserver.gemspec +++ b/eventmachine_httpserver.gemspec @@ -2,7 +2,7 @@ Gem::Specification.new do |s| s.name = %q{eventmachine_httpserver} - s.version = "0.2.1" + s.version = "0.3.0" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.authors = ["Francis Cianfrocca"] @@ -12,16 +12,15 @@ Gem::Specification.new do |s| s.email = %q{garbagecat10@gmail.com} s.extensions = ["ext/extconf.rb"] s.extra_rdoc_files = ["docs/COPYING", "docs/README", "docs/RELEASE_NOTES"] - s.files = ["README", "Rakefile", "docs/COPYING", "docs/README", "docs/RELEASE_NOTES", "eventmachine_httpserver.gemspec", "eventmachine_httpserver.gemspec.tmpl", "ext/extconf.rb", "ext/http.cpp", "ext/http.h", "ext/rubyhttp.cpp", "lib/evma_httpserver.rb", "lib/evma_httpserver/response.rb", "test/test_app.rb", "test/test_delegated.rb", "test/test_response.rb"] + s.files = ["README.md", "Rakefile", "docs/COPYING", "docs/README", "docs/RELEASE_NOTES", "eventmachine_httpserver.gemspec", "eventmachine_httpserver.gemspec.tmpl", "ext/extconf.rb", "ext/http.cpp", "ext/http.h", "ext/rubyhttp.cpp", "lib/evma_httpserver.rb", "lib/evma_httpserver/response.rb", "test/test_app.rb", "test/test_delegated.rb", "test/test_response.rb"] s.homepage = %q{https://github.com/eventmachine/evma_httpserver} s.rdoc_options = ["--title", "EventMachine_HttpServer", "--main", "docs/README", "--line-numbers"] s.require_paths = ["lib"] s.required_ruby_version = Gem::Requirement.new("> 0.0.0") - s.rubygems_version = %q{1.3.7} - s.summary = %q{EventMachine HTTP Server} + s.rubygems_version = "1.8.11" + s.summary = "EventMachine HTTP Server" if s.respond_to? :specification_version then - current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION s.specification_version = 1 if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then diff --git a/eventmachine_httpserver.gemspec.tmpl b/eventmachine_httpserver.gemspec.tmpl index 07adb07..38461ec 100644 --- a/eventmachine_httpserver.gemspec.tmpl +++ b/eventmachine_httpserver.gemspec.tmpl @@ -1,6 +1,6 @@ Gem::Specification.new do |s| s.name = %q{eventmachine_httpserver} - s.version = "0.2.1" + s.version = "0.2.3" s.specification_version = 1 if s.respond_to? :specification_version= diff --git a/ext/http.cpp b/ext/http.cpp index 6ee5d5b..e72c52b 100644 --- a/ext/http.cpp +++ b/ext/http.cpp @@ -45,11 +45,11 @@ using namespace std; #define strcasecmp _stricmp void setenv (const char *str, const char *value, bool replace) { - SetEnvironmentVariable (str, value); + SetEnvironmentVariable (str, value); } void unsetenv (const char *str) { - SetEnvironmentVariable (str, NULL); + SetEnvironmentVariable (str, NULL); } #endif @@ -60,21 +60,23 @@ HttpConnection_t::HttpConnection_t HttpConnection_t::HttpConnection_t() { - ProtocolState = BaseState; - _Content = NULL; - - // By default, we set the standard CGI environment strings. - // (This is primarily beneficial because it lets the caller use Ruby's CGI classes.) - // The caller can switch this off in Ruby code, which greatly improves performance. - bSetEnvironmentStrings = true; - - // This flag was added by Kirk Haines (thanks, Kirk). It preserves the original - // behavior with respect to POST content, which was to accumulate it in a buffer - // allocated and managed in this class. Kirk's mods allow callers to specify that - // POST content be submitted directly to user code piece by piece as we receive it, - // instead of buffering it here. To get the latter behavior, user code must call - // dont_accumulate_post. - bAccumulatePost = true; + ProtocolState = BaseState; + _Content = NULL; + + // By default, we set the standard CGI environment strings. + // (This is primarily beneficial because it lets the caller use Ruby's CGI classes.) + // The caller can switch this off in Ruby code, which greatly improves performance. + bSetEnvironmentStrings = true; + + // This flag was added by Kirk Haines (thanks, Kirk). It preserves the original + // behavior with respect to POST content, which was to accumulate it in a buffer + // allocated and managed in this class. Kirk's mods allow callers to specify that + // POST content be submitted directly to user code piece by piece as we receive it, + // instead of buffering it here. To get the latter behavior, user code must call + // dont_accumulate_post. + bAccumulatePost = true; + + } @@ -84,8 +86,10 @@ HttpConnection_t::~HttpConnection_t HttpConnection_t::~HttpConnection_t() { - if (_Content) - free (_Content); + if (_Content) { + free (_Content); + _Content = NULL; + } } @@ -115,17 +119,18 @@ HttpConnection_t::ProcessRequest ********************************/ void HttpConnection_t::ProcessRequest (const char *method, - const char *cookie, - const char *ifnonematch, - const char *contenttype, - const char *query_string, - const char *path_info, - const char *request_uri, - const char *protocol, - int post_length, - const char *post_content, - const char *hdrblock, - int hdrblocksize) + const char *cookie, + const char *ifnonematch, + const char *contenttype, + const char *query_string, + const char *path_info, + const char *request_uri, + const char *protocol, + int post_length, + const char *post_content, + const char *hdrblock, + int hdrblocksize, + int content_chunked) { cerr << "UNIMPLEMENTED ProcessRequest" << endl; } @@ -137,179 +142,334 @@ HttpConnection_t::ReceivePostData void HttpConnection_t::ReceivePostData (const char *data, int len) { - cerr << "UNIMPLEMENTED ReceivePostData" << endl; + cerr << "UNIMPLEMENTED ReceivePostData" << endl; } /***************************** -HttpConnection_t::ConsumeData +HttpConnection_t:: *****************************/ void HttpConnection_t::ConsumeData (const char *data, int length) { - if (ProtocolState == EndState) - return; - - if ((length > 0) && !data) - throw std::runtime_error ("bad args consuming http data"); - - while (length > 0) { - //----------------------------------- BaseState - // Initialize for a new request. Don't consume any data. - // For anal-retentive security we may want to bzero the header block. - if (ProtocolState == BaseState) { - ProtocolState = PreheaderState; - nLeadingBlanks = 0; - HeaderLinePos = 0; - HeaderBlockPos = 0; - ContentLength = 0; - ContentPos = 0; - bRequestSeen = false; - bContentLengthSeen = false; - if (_Content) { - free ((void*)_Content); - _Content = NULL; - } - RequestMethod = NULL; - #ifdef OS_WIN32 - Cookie.erase(Cookie.begin(),Cookie.end()); - IfNoneMatch.erase(IfNoneMatch.begin(),IfNoneMatch.end()); - ContentType.erase(ContentType.begin(),ContentType.end()); - PathInfo.erase(PathInfo.begin(),PathInfo.end()); - RequestUri.erase(RequestUri.begin(),RequestUri.end()); - QueryString.erase(QueryString.begin(),QueryString.end()); - Protocol.erase(Protocol.begin(),Protocol.end()); - #else - Cookie.clear(); - IfNoneMatch.clear(); - ContentType.clear(); - PathInfo.clear(); - RequestUri.clear(); - QueryString.clear(); - Protocol.clear(); - #endif - - if (bSetEnvironmentStrings) { - unsetenv ("REQUEST_METHOD"); - unsetenv ("HTTP_COOKIE"); - unsetenv ("IF_NONE_MATCH"); - unsetenv ("CONTENT_TYPE"); - unsetenv ("PATH_INFO"); - unsetenv ("REQUEST_URI"); - unsetenv ("QUERY_STRING"); - unsetenv ("PROTOCOL"); - } - } - - //----------------------------------- PreheaderState - // Consume blank lines (but not too many of them) - while ((ProtocolState == PreheaderState) && (length > 0)) { - if ((*data == '\r') || (*data == '\n')) { - data++; - length--; - nLeadingBlanks++; - if (nLeadingBlanks > MaxLeadingBlanks) { - // TODO, log this. - goto fail_connection; - } - } - else - ProtocolState = HeaderState; - } - - //----------------------------------- HeaderState - // Read HTTP headers. - // This processing depends on the fact that the end - // of the data buffer we receive will have a null terminator - // just after the last byte indicated by the length parameter. - // Cf notes in ConnectionDescriptor::Read. - while ((ProtocolState == HeaderState) && (length > 0)) { - if (*data == '\n') { - HeaderLine [HeaderLinePos] = 0; - if (!_InterpretHeaderLine (HeaderLine)) - goto send_error; - if (HeaderLinePos == 0) { - if (ContentLength > 0) { - if (_Content) - free (_Content); - _Content = NULL; - if (bAccumulatePost) { - _Content = (char*) malloc (ContentLength + 1); - if (!_Content) - throw std::runtime_error ("resource exhaustion"); - } - ContentPos = 0; - ProtocolState = ReadingContentState; - } - else - ProtocolState = DispatchState; - } - HeaderLinePos = 0; - data++; - length--; - } - else if (*data == '\r') { - // ignore \r - data++; - length--; - } - else { - const char *nl = strpbrk (data, "\r\n"); - int len = nl ? (nl - data) : length; - if ((size_t)(HeaderLinePos + len) >= sizeof(HeaderLine)) { - // TODO, log this - goto fail_connection; - } - memcpy (HeaderLine + HeaderLinePos, data, len); - data += len; - length -= len; - HeaderLinePos += len; - } - } - - - //----------------------------------- ReadingContentState - // Read POST content. - while ((ProtocolState == ReadingContentState) && (length > 0)) { - int len = ContentLength - ContentPos; - if (len > length) - len = length; - - if (bAccumulatePost) - memcpy (_Content + ContentPos, data, len); - else - ReceivePostData (data, len); - - data += len; - length -= len; - ContentPos += len; - if (ContentPos == ContentLength) { - if (bAccumulatePost) - _Content[ContentPos] = 0; - ProtocolState = DispatchState; - } - } - - - //----------------------------------- DispatchState - if (ProtocolState == DispatchState) { - ProcessRequest (RequestMethod, Cookie.c_str(), IfNoneMatch.c_str(), ContentType.c_str(), QueryString.c_str(), PathInfo.c_str(), RequestUri.c_str(), Protocol.c_str(), ContentLength, _Content, HeaderBlock, HeaderBlockPos); - ProtocolState = BaseState; - } - } - - return; - - fail_connection: - // For protocol errors or security violations- kill the connection dead. - CloseConnection (false); - ProtocolState = EndState; - return; - - send_error: - // for HTTP-level errors that will send back a response to the client. - CloseConnection (true); - ProtocolState = EndState; - return; + + if (ProtocolState == EndState) + return; + + if ((length > 0) && !data) + throw std::runtime_error ("bad args consuming http data"); + + while (length > 0) { + + //----------------------------------- BaseState + // Initialize for a new request. Don't consume any data. + // For anal-retentive security we may want to bzero the header block. + if (ProtocolState == BaseState) { + ProtocolState = PreheaderState; + nLeadingBlanks = 0; + HeaderLinePos = 0; + HeaderBlockPos = 0; + ContentLength = 0; + ContentChunked = 0; + TrailerProcessing = 0; + ContentPos = 0; + Chunk_req_received = 0; + foundsemi = 0; + foundslashr = 0; + chunklen = 0; + bRequestSeen = false; + bContentLengthSeen = false; + if (_Content) { + free ((void*)_Content); + _Content = NULL; + } + RequestMethod = NULL; + #ifdef OS_WIN32 + Cookie.erase(Cookie.begin(),Cookie.end()); + IfNoneMatch.erase(IfNoneMatch.begin(),IfNoneMatch.end()); + ContentType.erase(ContentType.begin(),ContentType.end()); + PathInfo.erase(PathInfo.begin(),PathInfo.end()); + RequestUri.erase(RequestUri.begin(),RequestUri.end()); + QueryString.erase(QueryString.begin(),QueryString.end()); + Protocol.erase(Protocol.begin(),Protocol.end()); + #else + Cookie.clear(); + IfNoneMatch.clear(); + ContentType.clear(); + PathInfo.clear(); + RequestUri.clear(); + QueryString.clear(); + Protocol.clear(); + #endif + + if (bSetEnvironmentStrings) { + unsetenv ("REQUEST_METHOD"); + unsetenv ("HTTP_COOKIE"); + unsetenv ("IF_NONE_MATCH"); + unsetenv ("CONTENT_TYPE"); + unsetenv ("PATH_INFO"); + unsetenv ("REQUEST_URI"); + unsetenv ("QUERY_STRING"); + unsetenv ("PROTOCOL"); + } + } + + //----------------------------------- PreheaderState + // Consume blank lines (but not too many of them) + while ((ProtocolState == PreheaderState) && (length > 0)) { + if ((*data == '\r') || (*data == '\n')) { + data++; + length--; + nLeadingBlanks++; + if (nLeadingBlanks > MaxLeadingBlanks) { + // TODO, log this. + goto fail_connection; + } + } + else + ProtocolState = HeaderState; + + //sleep(30); + } + + //----------------------------------- HeaderState + // Read HTTP headers. + // This processing depends on the fact that the end + // of the data buffer we receive will have a null terminator + // just after the last byte indicated by the length parameter. + // Cf notes in ConnectionDescriptor::Read. + while ((ProtocolState == HeaderState) && (length > 0)) { + if (*data == '\n') { + HeaderLine [HeaderLinePos] = 0; + if (!_InterpretHeaderLine (HeaderLine)) + goto send_error; + if (HeaderLinePos == 0) { + if (ContentLength > 0) { + if (_Content) + free (_Content); + _Content = NULL; + if (bAccumulatePost) { + _Content = (char*) malloc (ContentLength + 1); + if (!_Content) + throw std::runtime_error ("resource exhaustion"); + } + ContentPos = 0; + ProtocolState = ReadingContentState; + } + else if (ContentChunked){ + if (_Content){ + free (_Content); + } + _Content = NULL; + ContentPos = 0; + ProtocolState = ReadingChunkLen; + //clear our header buffer, reset flags + memset(chunklen_s, '\0',10); + foundsemi = 0; + foundslashr = 0; + foundslashn = 0; + chunklen = 0; + ContentChunked = 0; + } + else{ + if(TrailerProcessing){ + ContentLength = ContentPos; + Chunk_req_received = 1; + } + // We will come to here for GET or Chunked POST. + ProtocolState = DispatchState; + } + } + + HeaderLinePos = 0; + data++; + length--; + } + else if (*data == '\r') { + // ignore \r + data++; + length--; + } + else { + const char *nl = strpbrk (data, "\r\n"); + int len = nl ? (nl - data) : length; + + if ((size_t)(HeaderLinePos + len) >= sizeof(HeaderLine)) { + // TODO, log this + goto fail_connection; + } + + memcpy (HeaderLine + HeaderLinePos, data, len); + data += len; + length -= len; + HeaderLinePos += len; + } + } + + + //----------------------------------- ReadingContentState + // Read POST content. + while ((ProtocolState == ReadingContentState) && (length > 0)) { + int len = ContentLength - ContentPos; + if (len > length){ + len = length; + } + if (bAccumulatePost) + memcpy (_Content + ContentPos, data, len); + else + ReceivePostData (data, len); + + data += len; + length -= len; + ContentPos += len; + if (ContentPos == ContentLength) { + if (bAccumulatePost){ + _Content[ContentPos] = 0; + } + ProtocolState = DispatchState; + } + } + + int chunklen_pos = 0; + //----------------------------------- ReadingChunkLen + // Read POST chunked content. + while ((ProtocolState == ReadingChunkLen) && length > 0) + { + if(*data == ';') + { + //The standards say that there can be a semi-colon after the chunk length, plus some data; + foundsemi = 1; + } + else if(*data == '\r') + { + foundslashr =1; + } + else if(*data == '\n') + { + //If we find a slash n without a slash r then this message is malformatted + if(! foundslashr) + { + goto send_error; + } + foundslashn = 1; + } + else + { + if( ! foundsemi ) + { + chunklen_s[chunklen_pos] = *data; + chunklen_pos++; + } + } + + data = data+1; + length--; + if (foundslashn) + { + //convert the length to a long + chunklen = (int)strtol ( chunklen_s, NULL, 16 ); + if (chunklen == 0 ) + { + ProtocolState = HeaderState; + + } + else + { + ProtocolState = ReadingChunkedContent; + } + } + } + int readamount =0; + int readischunk=0; + //----------------------------------- ReadingChunkedContent + // Read POST chunked content. + while ((ProtocolState == ReadingChunkedContent && length > 0)) + { + if(length <= chunklen) + { + readamount = length; + chunklen-=length; + } + else + { + readischunk = 1; + readamount = chunklen; + } + + char *_temp_content = NULL; + //copy to temp variable so we don't overwrite _Content with null if we can't alloc. + _temp_content = (char*) realloc(_Content, ContentPos + readamount + 1); + + if (!_temp_content){ + // free(_Content); + // _Content = NULL; + throw std::runtime_error ("resource exhaustion"); + } + _Content = _temp_content; + + memcpy(_Content + ContentPos, data, readamount); + ContentPos += readamount; + _Content[ContentPos] = '\0'; + data += readamount; + length -= readamount; + + //If we just read the entire chunk then we can expect the next + // two characters to be \r\n + if(readischunk) + { + if(*data != '\r') + { + goto send_error; + } + + data++; + length--; + + if(*data != '\n') + { + goto send_error; + } + data++; + length--; + ProtocolState = ReadingChunkLen; + memset(chunklen_s, '\0',10); //clear our header buffer. + foundsemi = 0; + foundslashr = 0; + chunklen = 0; + foundslashn = 0; + } + } + //----------------------------------- DispatchState + if (ProtocolState == DispatchState) { + ProcessRequest (RequestMethod, + Cookie.c_str(), + IfNoneMatch.c_str(), + ContentType.c_str(), + QueryString.c_str(), + PathInfo.c_str(), + RequestUri.c_str(), + Protocol.c_str(), + ContentLength, + _Content, + HeaderBlock, + HeaderBlockPos, + Chunk_req_received); + ProtocolState = BaseState; + } + } + + return; + + fail_connection: + // For protocol errors or security violations- kill the connection dead. + CloseConnection (false); + ProtocolState = EndState; + return; + + send_error: + // for HTTP-level errors that will send back a response to the client. + CloseConnection (true); + ProtocolState = EndState; + return; } @@ -317,101 +477,124 @@ void HttpConnection_t::ConsumeData (const char *data, int length) /************************************** HttpConnection_t::_InterpretHeaderLine **************************************/ - bool HttpConnection_t::_InterpretHeaderLine (const char *header) { - /* Return T/F to indicate whether we should continue processing - * this request. Return false to indicate that we detected a fatal - * error or other condition which should cause us to drop the - * connection. - * BY DEFINITION, this doesn't define any immediate fatal errors. - * That may need to change, in which case we'll have to return - * an error code rather than T/F, so the caller will know whether - * to drop the connection gracefully or not. - * - * There's something odd and possibly undesirable about how we're - * doing this. We fully process each header (including the request) - * _as we see it,_ and not at the end when all the headers have - * been seen. This saves us the trouble of keeping them all around - * and possibly parsing them twice, but it also means that when - * we emit errors from here (that generate HTTP responses other than - * 200 and therefore close the connection), we do so _immediately_ - * and before looking at the rest of the headers. That might surprise - * and confuse some clients. - * - * Revised 27Sep06, we now store all the headers in one place, on a - * per-request basis, for the purpose of making them available to - * downstream users. At present this involves an undesirable extra - * memory copy. Eventually should rework the main header processing - * so it can be done in place. - */ - - if (!header) // an assert, really. - throw std::runtime_error ("bad arg interpreting headers"); - - if (!bRequestSeen) { - bRequestSeen = true; - return _InterpretRequest (header); - } - - if (!strncasecmp (header, "content-length:", 15)) { - if (bContentLengthSeen) { - // TODO, log this. There are some attacks that depend - // on sending more than one content-length header. - _SendError (RESPONSE_CODE_406); - return false; - } - bContentLengthSeen = true; - const char *s = header + 15; - while (*s && ((*s==' ') || (*s=='\t'))) - s++; - ContentLength = atoi (s); - if (ContentLength > MaxContentLength) { - // TODO, log this. - _SendError (RESPONSE_CODE_406); - return false; - } - } - else if (!strncasecmp (header, "cookie:", 7)) { - const char *s = header + 7; - while (*s && ((*s==' ') || (*s=='\t'))) - s++; - Cookie = s; - if (bSetEnvironmentStrings) - setenv ("HTTP_COOKIE", s, true); - } - else if (!strncasecmp (header, "If-none-match:", 14)) { - const char *s = header + 14; - while (*s && ((*s==' ') || (*s=='\t'))) - s++; - IfNoneMatch = s; - if (bSetEnvironmentStrings) - setenv ("IF_NONE_MATCH", s, true); - } - else if (!strncasecmp (header, "Content-type:", 13)) { - const char *s = header + 13; - while (*s && ((*s==' ') || (*s=='\t'))) - s++; - ContentType = s; - if (bSetEnvironmentStrings) - setenv ("CONTENT_TYPE", s, true); - } - - - // Copy the incoming header into a block - if ((HeaderBlockPos + strlen(header) + 1) < HeaderBlockSize) { - int len = strlen(header); - memcpy (HeaderBlock+HeaderBlockPos, header, len); - HeaderBlockPos += len; - HeaderBlock [HeaderBlockPos++] = 0; - } - else { - // TODO, log this. - _SendError (RESPONSE_CODE_406); - return false; - } - - return true; + /* Return T/F to indicate whether we should continue processing + * this request. Return false to indicate that we detected a fatal + * error or other condition which should cause us to drop the + * connection. + * BY DEFINITION, this doesn't define any immediate fatal errors. + * That may need to change, in which case we'll have to return + * an error code rather than T/F, so the caller will know whether + * to drop the connection gracefully or not. + * + * There's something odd and possibly undesirable about how we're + * doing this. We fully process each header (including the request) + * _as we see it,_ and not at the end when all the headers have + * been seen. This saves us the trouble of keeping them all around + * and possibly parsing them twice, but it also means that when + * we emit errors from here (that generate HTTP responses other than + * 200 and therefore close the connection), we do so _immediately_ + * and before looking at the rest of the headers. That might surprise + * and confuse some clients. + * + * Revised 27Sep06, we now store all the headers in one place, on a + * per-request basis, for the purpose of making them available to + * downstream users. At present this involves an undesirable extra + * memory copy. Eventually should rework the main header processing + * so it can be done in place. + */ + + if (!header) // an assert, really. + throw std::runtime_error ("bad arg interpreting headers"); + + if (!bRequestSeen) { + bRequestSeen = true; + return _InterpretRequest (header); + } + + if (!strncasecmp (header, "content-length:", 15)) { + if (bContentLengthSeen) { + // TODO, log this. There are some attacks that depend + // on sending more than one content-length header. + _SendError (RESPONSE_CODE_406); + return false; + } + bContentLengthSeen = true; + const char *s = header + 15; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + ContentLength = atoi (s); + if (ContentLength > MaxContentLength) { + // TODO, log this. + _SendError (RESPONSE_CODE_406); + return false; + } + } + else if (!strncasecmp (header, "cookie:", 7)) { + const char *s = header + 7; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + Cookie = s; + if (bSetEnvironmentStrings) + setenv ("HTTP_COOKIE", s, true); + } + else if (!strncasecmp (header, "If-none-match:", 14)) { + const char *s = header + 14; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + IfNoneMatch = s; + if (bSetEnvironmentStrings) + setenv ("IF_NONE_MATCH", s, true); + } + else if (!strncasecmp (header, "Content-type:", 13)) { + //If we receive this header after we have processed chunked data + //send an error. + if(TrailerProcessing){ + _SendError (RESPONSE_CODE_406); + return false; + } + const char *s = header + 13; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + ContentType = s; + if (bSetEnvironmentStrings) + setenv ("CONTENT_TYPE", s, true); + } + else if (!strncasecmp (header, "Transfer-Encoding:", 18)) { + if(TrailerProcessing){ + _SendError (RESPONSE_CODE_406); + return false; + } + const char *s = header + 18; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + if (!strncasecmp (s, "chunked", 7)){ + TrailerProcessing = 1; + ContentChunked = 1; + } + + } + else if (!strncasecmp (header, "Trailer:", 8)) { + if(TrailerProcessing){ + _SendError (RESPONSE_CODE_406); + return false; + } + } + // Copy the incoming header into a block + if ((HeaderBlockPos + strlen(header) + 1) < HeaderBlockSize) { + int len = strlen(header); + memcpy (HeaderBlock+HeaderBlockPos, header, len); + HeaderBlockPos += len; + HeaderBlock [HeaderBlockPos++] = 0; + } + else { + // TODO, log this. + _SendError (RESPONSE_CODE_406); + return false; + } + + return true; } @@ -421,108 +604,108 @@ HttpConnection_t::_InterpretRequest bool HttpConnection_t::_InterpretRequest (const char *header) { - /* Return T/F to indicate whether we should continue processing - * this request. Return false to indicate that we detected a fatal - * error or other condition which should cause us to drop the - * connection. - * Interpret the contents of the given line as an HTTP request string. - * WE ASSUME the passed-in header is not null. - * - * In preparation for a CGI-style call, we set the following - * environment strings here (other code will DEPEND ON ALL OF - * THESE BEING SET HERE in case there are no errors): - * REQUEST_METHOD, PATH_INFO, QUERY_STRING. - * - * Oh and by the way, this code sucks. It's reasonably fast - * but not terribly fast, and it's ugly. Refactor someday. - */ - - const char *blank = strchr (header, ' '); - if (!blank) { - _SendError (RESPONSE_CODE_406); - return false; - } - - if (!_DetectVerbAndSetEnvString (header, blank - header)) - return false; - - blank++; - if (*blank != '/') { - _SendError (RESPONSE_CODE_406); - return false; - } - - const char *blank2 = strchr (blank, ' '); - if (!blank2) { - _SendError (RESPONSE_CODE_406); - return false; - } - if (strcasecmp (blank2 + 1, "HTTP/1.0") && strcasecmp (blank2 + 1, "HTTP/1.1")) { - _SendError (RESPONSE_CODE_505); - return false; - } - - string prot (blank2+1); - Protocol = prot.c_str(); - - // Here, the request starts at blank and ends just before blank2. - // Find the query-string (?) and/or fragment (#,;), if either are present. - const char *questionmark = strchr (blank, '?'); - if (questionmark && (questionmark >= blank2)) - questionmark = NULL; - // const char *fragment = strpbrk ((questionmark ? (questionmark+1) : blank), "#;"); - const char *fragment = strpbrk ((questionmark ? (questionmark+1) : blank), "#"); - if (fragment && (fragment >= blank2)) - fragment = NULL; - - if (questionmark) { - string req (blank, questionmark - blank); - PathInfo = req.c_str(); - RequestUri = req.c_str(); - string qs (questionmark+1, fragment ? (fragment - (questionmark+1)) : (blank2 - (questionmark+1))); - QueryString = qs.c_str(); - - if (bSetEnvironmentStrings) { - setenv ("PATH_INFO", req.c_str(), true); - setenv ("REQUEST_URI", req.c_str(), true); - setenv ("QUERY_STRING", qs.c_str(), true); - setenv ("PROTOCOL", prot.c_str(), true); - } - } - else if (fragment) { - string req (blank, fragment - blank); - PathInfo = req.c_str(); - RequestUri = req.c_str(); - #ifdef OS_WIN32 - QueryString.erase(QueryString.begin(),QueryString.end()); - #else - QueryString.clear(); - #endif - if (bSetEnvironmentStrings) { - setenv ("PATH_INFO", req.c_str(), true); - setenv ("REQUEST_URI", req.c_str(), true); - setenv ("QUERY_STRING", "", true); - setenv ("PROTOCOL", prot.c_str(), true); - } - } - else { - string req (blank, blank2 - blank); - PathInfo = req.c_str(); - RequestUri = req.c_str(); - #ifdef OS_WIN32 - QueryString.erase(QueryString.begin(),QueryString.end()); - #else - QueryString.clear(); - #endif - if (bSetEnvironmentStrings) { - setenv ("PATH_INFO", req.c_str(), true); - setenv ("REQUEST_URI", req.c_str(), true); - setenv ("QUERY_STRING", "", true); - setenv ("PROTOCOL", prot.c_str(), true); - } - } - - return true; + /* Return T/F to indicate whether we should continue processing + * this request. Return false to indicate that we detected a fatal + * error or other condition which should cause us to drop the + * connection. + * Interpret the contents of the given line as an HTTP request string. + * WE ASSUME the passed-in header is not null. + * + * In preparation for a CGI-style call, we set the following + * environment strings here (other code will DEPEND ON ALL OF + * THESE BEING SET HERE in case there are no errors): + * REQUEST_METHOD, PATH_INFO, QUERY_STRING. + * + * Oh and by the way, this code sucks. It's reasonably fast + * but not terribly fast, and it's ugly. Refactor someday. + */ + + const char *blank = strchr (header, ' '); + if (!blank) { + _SendError (RESPONSE_CODE_406); + return false; + } + + if (!_DetectVerbAndSetEnvString (header, blank - header)) + return false; + + blank++; + if (*blank != '/') { + _SendError (RESPONSE_CODE_406); + return false; + } + + const char *blank2 = strchr (blank, ' '); + if (!blank2) { + _SendError (RESPONSE_CODE_406); + return false; + } + if (strcasecmp (blank2 + 1, "HTTP/1.0") && strcasecmp (blank2 + 1, "HTTP/1.1")) { + _SendError (RESPONSE_CODE_505); + return false; + } + + string prot (blank2+1); + Protocol = prot.c_str(); + + // Here, the request starts at blank and ends just before blank2. + // Find the query-string (?) and/or fragment (#,;), if either are present. + const char *questionmark = strchr (blank, '?'); + if (questionmark && (questionmark >= blank2)) + questionmark = NULL; + // const char *fragment = strpbrk ((questionmark ? (questionmark+1) : blank), "#;"); + const char *fragment = strpbrk ((questionmark ? (questionmark+1) : blank), "#"); + if (fragment && (fragment >= blank2)) + fragment = NULL; + + if (questionmark) { + string req (blank, questionmark - blank); + PathInfo = req.c_str(); + RequestUri = req.c_str(); + string qs (questionmark+1, fragment ? (fragment - (questionmark+1)) : (blank2 - (questionmark+1))); + QueryString = qs.c_str(); + + if (bSetEnvironmentStrings) { + setenv ("PATH_INFO", req.c_str(), true); + setenv ("REQUEST_URI", req.c_str(), true); + setenv ("QUERY_STRING", qs.c_str(), true); + setenv ("PROTOCOL", prot.c_str(), true); + } + } + else if (fragment) { + string req (blank, fragment - blank); + PathInfo = req.c_str(); + RequestUri = req.c_str(); + #ifdef OS_WIN32 + QueryString.erase(QueryString.begin(),QueryString.end()); + #else + QueryString.clear(); + #endif + if (bSetEnvironmentStrings) { + setenv ("PATH_INFO", req.c_str(), true); + setenv ("REQUEST_URI", req.c_str(), true); + setenv ("QUERY_STRING", "", true); + setenv ("PROTOCOL", prot.c_str(), true); + } + } + else { + string req (blank, blank2 - blank); + PathInfo = req.c_str(); + RequestUri = req.c_str(); + #ifdef OS_WIN32 + QueryString.erase(QueryString.begin(),QueryString.end()); + #else + QueryString.clear(); + #endif + if (bSetEnvironmentStrings) { + setenv ("PATH_INFO", req.c_str(), true); + setenv ("REQUEST_URI", req.c_str(), true); + setenv ("QUERY_STRING", "", true); + setenv ("PROTOCOL", prot.c_str(), true); + } + } + + return true; } @@ -532,54 +715,52 @@ HttpConnection_t::_DetectVerbAndSetEnvString bool HttpConnection_t::_DetectVerbAndSetEnvString (const char *request, int verblength) { - /* Helper method for _InterpretRequest. - * WE MUST SET THE ENV STRING "REQUEST_METHOD" HERE - * unless there is an error. - * The hardcoded verbs MUST be static, as we'll carry around pointers to them. - */ - - static const char *verbs[] = { - "GET", - "POST", - "PUT", - "DELETE", - "HEAD", - "OPTIONS" - }; - - int n_verbs = sizeof(verbs) / sizeof(const char*); - - // Warning, this algorithm is vulnerable to head-matches, - // so compare the longer head-matching strings first. - // We could fix this if we included the blank in the search - // string but then we'd have to lop it off in the env string. - // ALSO NOTICE the early return on success. - for (int i=0; i < n_verbs; i++) { - if (!strncasecmp (request, verbs[i], verblength) && (strlen(verbs[i]) == (size_t)verblength)) { - RequestMethod = verbs[i]; - if (bSetEnvironmentStrings) - setenv ("REQUEST_METHOD", verbs[i], 1); - return true; - } - } - - _SendError (RESPONSE_CODE_406); - return false; + /* Helper method for _InterpretRequest. + * WE MUST SET THE ENV STRING "REQUEST_METHOD" HERE + * unless there is an error. + * The hardcoded verbs MUST be static, as we'll carry around pointers to them. + */ + + static const char *verbs[] = { + "GET", + "POST", + "PUT", + "DELETE", + "HEAD", + "OPTIONS" + }; + + int n_verbs = sizeof(verbs) / sizeof(const char*); + + // Warning, this algorithm is vulnerable to head-matches, + // so compare the longer head-matching strings first. + // We could fix this if we included the blank in the search + // string but then we'd have to lop it off in the env string. + // ALSO NOTICE the early return on success. + for (int i=0; i < n_verbs; i++) { + if (!strncasecmp (request, verbs[i], verblength) && (strlen(verbs[i]) == (size_t)verblength)) { + RequestMethod = verbs[i]; + if (bSetEnvironmentStrings) + setenv ("REQUEST_METHOD", verbs[i], 1); + return true; + } + } + + _SendError (RESPONSE_CODE_406); + return false; } - - /**************************** HttpConnection_t::_SendError ****************************/ void HttpConnection_t::_SendError (const char *header) { - stringstream ss; - ss << "HTTP/1.1 " << header << "\r\n"; - ss << "Connection: close\r\n"; - ss << "Content-Type: text/plain\r\n"; - ss << "\r\n"; + stringstream ss; + ss << "HTTP/1.1 " << header << "\r\n"; + ss << "Connection: close\r\n"; + ss << "Content-Type: text/plain\r\n"; + ss << "\r\n"; - SendData (ss.str().c_str(), ss.str().length()); + SendData (ss.str().c_str(), ss.str().length()); } diff --git a/ext/http.h b/ext/http.h index e3ce03f..1998250 100644 --- a/ext/http.h +++ b/ext/http.h @@ -36,82 +36,93 @@ class HttpConnection_t class HttpConnection_t { - public: - HttpConnection_t(); - virtual ~HttpConnection_t(); - - void ConsumeData (const char*, int); - - virtual void SendData (const char*, int); - virtual void CloseConnection (bool after_writing); - virtual void ProcessRequest (const char *method, - const char *cookie, - const char *ifnonematch, - const char *content_type, - const char *query_string, - const char *path_info, - const char *request_uri, - const char *protocol, - int postlength, - const char *postdata, - const char* hdrblock, - int hdrblksize); - - virtual void ReceivePostData(const char *data, int len); - virtual void SetNoEnvironmentStrings() {bSetEnvironmentStrings = false;} - virtual void SetDontAccumulatePost() {bAccumulatePost = false;} + public: + HttpConnection_t(); + virtual ~HttpConnection_t(); + + void ConsumeData (const char*, int); + + virtual void SendData (const char*, int); + virtual void CloseConnection (bool after_writing); + virtual void ProcessRequest (const char *method, + const char *cookie, + const char *ifnonematch, + const char *content_type, + const char *query_string, + const char *path_info, + const char *request_uri, + const char *protocol, + int postlength, + const char *postdata, + const char* hdrblock, + int hdrblksize, + int content_chunked); + + virtual void ReceivePostData(const char *data, int len); + virtual void SetNoEnvironmentStrings() {bSetEnvironmentStrings = false;} + virtual void SetDontAccumulatePost() {bAccumulatePost = false;} private: - enum { - BaseState, - PreheaderState, - HeaderState, - ReadingContentState, - DispatchState, - EndState - } ProtocolState; - - enum { - MaxLeadingBlanks = 12, - MaxHeaderLineLength = 8 * 1024, - MaxContentLength = 20 * 1024 * 1024, - HeaderBlockSize = 16 * 1024 - }; - int nLeadingBlanks; - - char HeaderLine [MaxHeaderLineLength]; - int HeaderLinePos; - - char HeaderBlock [HeaderBlockSize]; - int HeaderBlockPos; - - int ContentLength; - int ContentPos; - char *_Content; - - bool bSetEnvironmentStrings; - bool bAccumulatePost; - bool bRequestSeen; - bool bContentLengthSeen; - - const char *RequestMethod; - std::string Cookie; - std::string IfNoneMatch; - std::string ContentType; - std::string PathInfo; - std::string RequestUri; - std::string QueryString; - std::string Protocol; - - private: - bool _InterpretHeaderLine (const char*); - bool _InterpretRequest (const char*); - bool _DetectVerbAndSetEnvString (const char*, int); - void _SendError (const char*); + enum { + BaseState, + PreheaderState, + HeaderState, + ReadingContentState, + ReadingChunkLen, + ReadingChunkedContent, + DispatchState, + EndState + } ProtocolState; + + enum { + MaxLeadingBlanks = 12, + MaxHeaderLineLength = 8 * 1024, + MaxContentLength = 20 * 1024 * 1024, + HeaderBlockSize = 16 * 1024 + }; + int nLeadingBlanks; + + char HeaderLine [MaxHeaderLineLength]; + int HeaderLinePos; + + char HeaderBlock [HeaderBlockSize]; + int HeaderBlockPos; + + int ContentLength; + int ContentChunked; + int TrailerProcessing; + int ContentPos; + char *_Content; + int Chunk_req_received; + + char chunklen_s[10]; + int foundsemi; + int foundslashr; + int chunklen; + int foundslashn; + + bool bSetEnvironmentStrings; + bool bAccumulatePost; + bool bRequestSeen; + bool bContentLengthSeen; +// ofstream programlog; + + const char *RequestMethod; + std::string Cookie; + std::string IfNoneMatch; + std::string ContentType; + std::string PathInfo; + std::string RequestUri; + std::string QueryString; + std::string Protocol; + + private: + int _GetChunkLength(const char *, int *, int *); + bool _InterpretHeaderLine (const char*); + bool _InterpretRequest (const char*); + bool _DetectVerbAndSetEnvString (const char*, int); + void _SendError (const char*); }; #endif // __HttpPersonality__H_ - - - diff --git a/ext/rubyhttp.cpp b/ext/rubyhttp.cpp index c2b8f72..09382c0 100644 --- a/ext/rubyhttp.cpp +++ b/ext/rubyhttp.cpp @@ -56,7 +56,8 @@ class RubyHttpConnection_t: public HttpConnection_t int postlength, const char *postdata, const char *hdrblock, - int hdrblocksize); + int hdrblocksize, + int content_chunked); virtual void ReceivePostData (const char *data, int len); private: @@ -114,7 +115,8 @@ void RubyHttpConnection_t::ProcessRequest (const char *request_method, int post_length, const char *post_content, const char *hdr_block, - int hdr_block_size) + int hdr_block_size, + int content_chunked) { VALUE post = Qnil; VALUE headers = Qnil; @@ -126,7 +128,8 @@ void RubyHttpConnection_t::ProcessRequest (const char *request_method, VALUE query_string_val = Qnil; VALUE request_uri_val = Qnil; VALUE protocol_val = Qnil; - + VALUE chunked = Qfalse; + if ((post_length > 0) && post_content) post = rb_str_new (post_content, post_length); @@ -150,8 +153,10 @@ void RubyHttpConnection_t::ProcessRequest (const char *request_method, if (request_uri && *request_uri) request_uri_val = rb_str_new (request_uri, strlen (request_uri)); if (protocol && *protocol) - protocol_val = rb_str_new (protocol, strlen (protocol)); - + protocol_val = rb_str_new (protocol, strlen (protocol)); + if(content_chunked){ + chunked = Qtrue; + } rb_ivar_set (Myself, rb_intern ("@http_request_method"), req_method); rb_ivar_set (Myself, rb_intern ("@http_cookie"), cookie_val); rb_ivar_set (Myself, rb_intern ("@http_if_none_match"), ifnonematch_val); @@ -162,6 +167,7 @@ void RubyHttpConnection_t::ProcessRequest (const char *request_method, rb_ivar_set (Myself, rb_intern ("@http_post_content"), post); rb_ivar_set (Myself, rb_intern ("@http_headers"), headers); rb_ivar_set (Myself, rb_intern ("@http_protocol"), protocol_val); + rb_ivar_set (Myself, rb_intern ("@http_chunked"), chunked); rb_funcall (Myself, rb_intern ("process_http_request"), 0); } @@ -222,6 +228,7 @@ t_receive_data static VALUE t_receive_data (VALUE self, VALUE data) { + int length = NUM2INT (rb_funcall (data, rb_intern ("length"), 0)); RubyHttpConnection_t *hc = t_get_http_connection (self); if (hc) diff --git a/test/test_app.rb b/test/test_app.rb index 63e1490..8f07569 100644 --- a/test/test_app.rb +++ b/test/test_app.rb @@ -158,10 +158,10 @@ def test_headers # a costly operation, but we should provide an optional method that # does the parsing so it doesn't need to be done by users. conn.instance_eval do - @assertions = proc do + @assertions = proc { received_header_string = @http_headers received_header_ary = @http_headers.split(/\0/).map {|line| line.split(/:\s*/, 2) } - end + } end end @@ -198,7 +198,8 @@ def test_post content_type = "text/plain" received_post_content = "" received_content_type = "" - + received_chunk="" + EventMachine.run do EventMachine.start_server(TestHost, TestPort, MyTestServer) do |conn| # In each accepted connection, set up a procedure that will copy @@ -209,6 +210,7 @@ def test_post @assertions = proc do received_post_content = @http_post_content received_content_type = ENV["CONTENT_TYPE"] + received_chunk = @http_chunked end end end @@ -232,8 +234,200 @@ def test_post EventMachine.defer cb, eb end - assert_equal( received_post_content, post_content ) + assert_equal( post_content, received_post_content) assert_equal( received_content_type, content_type ) + assert_equal( false,received_chunk ) end + + def test_chunked + received_header_string = nil + chunked_content1 = "1234567890" + chunked_content2 = "abcdefgh" + chunked_content3 = "This will be a lot longer than the previous two chunks." + content_type = "text/plain" + etag = "12345" + received_post_content = "" + received_content_type = "" + + request_parms = {} + + EventMachine.run do + EventMachine.start_server(TestHost, TestPort, MyTestServer) do |conn| + # In each accepted connection, set up a procedure that will copy + # the request parameters into a local variable visible here, so + # we can assert the values later. + # The @http_post_content variable is set automatically. + conn.instance_eval do + @assertions = proc do + parms = %w( PATH_INFO QUERY_STRING HTTP_COOKIE IF_NONE_MATCH + CONTENT_TYPE REQUEST_METHOD REQUEST_URI ) + parms.each {|parm| + # request_parms is bound to a local variable visible in this context. + request_parms[parm] = ENV[parm] + } + received_post_content = @http_post_content + + received_content_type = ENV["CONTENT_TYPE"] + end + end + end + EventMachine.add_timer(1) {raise "timed out"} # make sure the test completes + cb = proc do + tcp = TCPSocket.new TestHost, TestPort + data = [ + "POST / HTTP/1.1\r\n", + "Content-type: #{content_type}\r\n", + "Transfer-Encoding: chunked\r\n", + "\r\n", + "#{chunked_content1.length.to_s(16)};somedata\r\n", + "#{chunked_content1}\r\n", + "#{chunked_content2.length.to_s(16)}\r\n", + "#{chunked_content2}\r\n", + "#{chunked_content3.length.to_s(16)}\r\n", + "#{chunked_content3}\r\n", + "0\r\n", + "SOAPaction: \r\n", + "If-none-match: #{etag}\r\n", + "\r\n" + ].join + tcp.write(data) + received_response = tcp.read + end + eb = proc do + EventMachine.stop + end + EventMachine.defer cb, eb + end + + assert_equal( chunked_content1+chunked_content2+chunked_content3, received_post_content) + assert_equal( received_content_type, content_type ) + assert_equal( etag, request_parms["IF_NONE_MATCH"] ) + + end + + + def test_chunked_soap + received_header_string = nil + chunked_content1 = "dsflsjdlfdsllfjsdlfjldsflsdjfljlsdjlfkjlsdlflsdjlfjsdjlfjlskdf" + content_type = "text/xml; charset=UTF-8" + received_chunk="" + received_post_content = "" + received_content_type = "" + + request_parms = {} + + EventMachine.run do + EventMachine.start_server(TestHost, TestPort, MyTestServer) do |conn| + # In each accepted connection, set up a procedure that will copy + # the request parameters into a local variable visible here, so + # we can assert the values later. + # The @http_post_content variable is set automatically. + conn.instance_eval do + @assertions = proc do + parms = %w( PATH_INFO QUERY_STRING HTTP_COOKIE IF_NONE_MATCH + CONTENT_TYPE REQUEST_METHOD REQUEST_URI ) + parms.each {|parm| + # request_parms is bound to a local variable visible in this context. + request_parms[parm] = ENV[parm] + } + received_post_content = @http_post_content + + received_chunk = @http_chunked + received_content_type = ENV["CONTENT_TYPE"] + end + end + end + EventMachine.add_timer(1) {raise "timed out"} # make sure the test completes + + cb = proc do + tcp = TCPSocket.new TestHost, TestPort + data = [ + "POST /ProvisioningGateway/services/SPMLSubscriber10Service HTTP/1.1\r\n", + "Content-type: #{content_type}\r\n", + "SOAPAction: \"urn:gw:SPML:2:0/modifyRequest\"\r\n", + "User-Agent: Axis2\r\n", + "Transfer-Encoding: chunked\r\n", + "\r\n", + "#{chunked_content1.length.to_s(16)}\r\n", + "#{chunked_content1}\r\n", + "0\r\n", + "\r\n" + ].join + # puts "Message: \n#{data}" + tcp.write(data) + received_response = tcp.read + end + eb = proc do + EventMachine.stop + end + EventMachine.defer cb, eb + end + + assert_equal( chunked_content1, received_post_content) + assert_equal( true,received_chunk ) + assert_equal( received_content_type, content_type ) + + end + + def test_chunked_single + received_header_string = nil + chunked_content1 = "1234567890" + content_type = "text/plain" + etag = "12345" + received_post_content = "" + received_content_type = "" + + request_parms = {} + + EventMachine.run do + EventMachine.start_server(TestHost, TestPort, MyTestServer) do |conn| + # In each accepted connection, set up a procedure that will copy + # the request parameters into a local variable visible here, so + # we can assert the values later. + # The @http_post_content variable is set automatically. + conn.instance_eval do + @assertions = proc do + parms = %w( PATH_INFO QUERY_STRING HTTP_COOKIE IF_NONE_MATCH + CONTENT_TYPE REQUEST_METHOD REQUEST_URI ) + parms.each {|parm| + # request_parms is bound to a local variable visible in this context. + request_parms[parm] = ENV[parm] + } + received_post_content = @http_post_content + + received_content_type = ENV["CONTENT_TYPE"] + end + end + end + EventMachine.add_timer(1) {raise "timed out"} # make sure the test completes + + cb = proc do + tcp = TCPSocket.new TestHost, TestPort + data = [ + "POST / HTTP/1.1\r\n", + "Content-type: #{content_type}\r\n", + "Transfer-Encoding: chunked\r\n", + "\r\n", + "#{chunked_content1.length.to_s(16)};somedata\r\n", + "#{chunked_content1}\r\n", + "0\r\n", + "SOAPaction: \r\n", + "If-none-match: #{etag}\r\n", + "\r\n" + ].join + tcp.write(data) + received_response = tcp.read + end + eb = proc do + EventMachine.stop + end + EventMachine.defer cb, eb + end + + assert_equal( chunked_content1, received_post_content) + assert_equal( received_content_type, content_type ) + assert_equal( etag, request_parms["IF_NONE_MATCH"] ) + + end end