From 24d37d71ea253dfe70c803af3874e72572bbff22 Mon Sep 17 00:00:00 2001 From: Greg Hill Date: Wed, 22 Feb 2012 22:47:24 +0000 Subject: [PATCH 1/5] Update files to add chunked encoding functionality --- eventmachine_httpserver.gemspec | 20 ++-- ext/http.cpp | 180 ++++++++++++++++++++++++++-- ext/http.h | 8 +- ext/rubyhttp.cpp | 17 ++- test/test_app.rb | 202 +++++++++++++++++++++++++++++++- 5 files changed, 393 insertions(+), 34 deletions(-) diff --git a/eventmachine_httpserver.gemspec b/eventmachine_httpserver.gemspec index 3ed8dc4..04adaf1 100644 --- a/eventmachine_httpserver.gemspec +++ b/eventmachine_httpserver.gemspec @@ -1,27 +1,25 @@ # -*- encoding: utf-8 -*- Gem::Specification.new do |s| - s.name = %q{eventmachine_httpserver} - s.version = "0.2.1" + s.name = "eventmachine_httpserver" + s.version = "0.2.2" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= - s.authors = ["Francis Cianfrocca"] - s.cert_chain = nil - s.date = %q{2007-03-16} - s.description = %q{} - s.email = %q{garbagecat10@gmail.com} + s.authors = ["Gregory Hill"] + s.date = "2012-02-20" + s.description = "" + s.email = "garbagecat10@gmail.com" s.extensions = ["ext/extconf.rb"] s.extra_rdoc_files = ["docs/COPYING", "docs/README", "docs/RELEASE_NOTES"] s.files = ["README", "Rakefile", "docs/COPYING", "docs/README", "docs/RELEASE_NOTES", "eventmachine_httpserver.gemspec", "eventmachine_httpserver.gemspec.tmpl", "ext/extconf.rb", "ext/http.cpp", "ext/http.h", "ext/rubyhttp.cpp", "lib/evma_httpserver.rb", "lib/evma_httpserver/response.rb", "test/test_app.rb", "test/test_delegated.rb", "test/test_response.rb"] - s.homepage = %q{https://github.com/eventmachine/evma_httpserver} + s.homepage = "https://github.com/eventmachine/evma_httpserver" s.rdoc_options = ["--title", "EventMachine_HttpServer", "--main", "docs/README", "--line-numbers"] s.require_paths = ["lib"] s.required_ruby_version = Gem::Requirement.new("> 0.0.0") - s.rubygems_version = %q{1.3.7} - s.summary = %q{EventMachine HTTP Server} + s.rubygems_version = "1.8.11" + s.summary = "EventMachine HTTP Server" if s.respond_to? :specification_version then - current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION s.specification_version = 1 if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then diff --git a/ext/http.cpp b/ext/http.cpp index e426ae9..f141533 100644 --- a/ext/http.cpp +++ b/ext/http.cpp @@ -75,6 +75,8 @@ HttpConnection_t::HttpConnection_t() // instead of buffering it here. To get the latter behavior, user code must call // dont_accumulate_post. bAccumulatePost = true; + + } @@ -84,8 +86,10 @@ HttpConnection_t::~HttpConnection_t HttpConnection_t::~HttpConnection_t() { - if (_Content) + if (_Content) { free (_Content); + _Content = NULL; + } } @@ -125,7 +129,8 @@ void HttpConnection_t::ProcessRequest (const char *method, int post_length, const char *post_content, const char *hdrblock, - int hdrblocksize) + int hdrblocksize, + int content_chunked) { cerr << "UNIMPLEMENTED ProcessRequest" << endl; } @@ -146,6 +151,7 @@ HttpConnection_t::ConsumeData void HttpConnection_t::ConsumeData (const char *data, int length) { + if (ProtocolState == EndState) return; @@ -153,6 +159,7 @@ void HttpConnection_t::ConsumeData (const char *data, int length) throw std::runtime_error ("bad args consuming http data"); while (length > 0) { + //----------------------------------- BaseState // Initialize for a new request. Don't consume any data. // For anal-retentive security we may want to bzero the header block. @@ -162,7 +169,10 @@ void HttpConnection_t::ConsumeData (const char *data, int length) HeaderLinePos = 0; HeaderBlockPos = 0; ContentLength = 0; + ContentChunked = 0; + TrailerProcessing = 0; ContentPos = 0; + Chunk_req_received = 0; bRequestSeen = false; bContentLengthSeen = false; if (_Content) { @@ -214,6 +224,8 @@ void HttpConnection_t::ConsumeData (const char *data, int length) } else ProtocolState = HeaderState; + + // sleep(30); } //----------------------------------- HeaderState @@ -240,9 +252,25 @@ void HttpConnection_t::ConsumeData (const char *data, int length) ContentPos = 0; ProtocolState = ReadingContentState; } - else + else if (ContentChunked){ + if (_Content){ + free (_Content); + } + _Content = NULL; + ContentPos = 0; + ProtocolState = ReadingChunkedContent; + ContentChunked = 0; + } + else{ + if(TrailerProcessing){ + ContentLength = ContentPos; + Chunk_req_received = 1; + } + // We will come to here for GET or Chunked POST. ProtocolState = DispatchState; + } } + HeaderLinePos = 0; data++; length--; @@ -255,10 +283,12 @@ void HttpConnection_t::ConsumeData (const char *data, int length) else { const char *nl = strpbrk (data, "\r\n"); int len = nl ? (nl - data) : length; + if ((size_t)(HeaderLinePos + len) >= sizeof(HeaderLine)) { // TODO, log this goto fail_connection; } + memcpy (HeaderLine + HeaderLinePos, data, len); data += len; length -= len; @@ -271,9 +301,9 @@ void HttpConnection_t::ConsumeData (const char *data, int length) // Read POST content. while ((ProtocolState == ReadingContentState) && (length > 0)) { int len = ContentLength - ContentPos; - if (len > length) + if (len > length){ len = length; - + } if (bAccumulatePost) memcpy (_Content + ContentPos, data, len); else @@ -283,16 +313,64 @@ void HttpConnection_t::ConsumeData (const char *data, int length) length -= len; ContentPos += len; if (ContentPos == ContentLength) { - if (bAccumulatePost) + if (bAccumulatePost){ _Content[ContentPos] = 0; - ProtocolState = DispatchState; + } + ProtocolState = DispatchState; } } - + //----------------------------------- ReadingChunkedContent + // Read POST chunked content. + while ((ProtocolState == ReadingChunkedContent && length > 0)) { + char *_temp_content = NULL; + int chunkbytelen = 0; + int chunklen = _GetChunkLength(data,&length,&chunkbytelen); //Need pass length address so can modify the same copy. + data += chunkbytelen; + length -= chunkbytelen; + while(chunklen > 0) + { + //copy to temp variable so we don't overwrite _Content with null if we can't alloc. + _temp_content = (char*) realloc(_Content, ContentPos + chunklen + 1); + + if (!_temp_content){ + // free(_Content); + // _Content = NULL; + throw std::runtime_error ("resource exhaustion"); + } + _Content = _temp_content; + + memcpy(_Content + ContentPos, data, chunklen); + ContentPos += chunklen; + _Content[ContentPos] = '\0'; + data += chunklen + 2; + length -= chunklen + 2; + + //Get next chunk length + chunklen = _GetChunkLength(data,&length,&chunkbytelen); + data += chunkbytelen; + length -= chunkbytelen; + } + //Process the next trailer header lines. + ProtocolState = HeaderState; + + } //----------------------------------- DispatchState if (ProtocolState == DispatchState) { - ProcessRequest (RequestMethod, Cookie.c_str(), IfNoneMatch.c_str(), ContentType.c_str(), QueryString.c_str(), PathInfo.c_str(), RequestUri.c_str(), Protocol.c_str(), ContentLength, _Content, HeaderBlock, HeaderBlockPos); + ProcessRequest (RequestMethod, + Cookie.c_str(), + IfNoneMatch.c_str(), + ContentType.c_str(), + QueryString.c_str(), + PathInfo.c_str(), + RequestUri.c_str(), + Protocol.c_str(), + ContentLength, + _Content, + HeaderBlock, + HeaderBlockPos, + Chunk_req_received); + ProtocolState = BaseState; } } @@ -317,7 +395,6 @@ void HttpConnection_t::ConsumeData (const char *data, int length) /************************************** HttpConnection_t::_InterpretHeaderLine **************************************/ - bool HttpConnection_t::_InterpretHeaderLine (const char *header) { /* Return T/F to indicate whether we should continue processing @@ -389,6 +466,12 @@ bool HttpConnection_t::_InterpretHeaderLine (const char *header) setenv ("IF_NONE_MATCH", s, true); } else if (!strncasecmp (header, "Content-type:", 13)) { + //If we receive this header after we have processed chunked data + //send an error. + if(TrailerProcessing){ + _SendError (RESPONSE_CODE_406); + return false; + } const char *s = header + 13; while (*s && ((*s==' ') || (*s=='\t'))) s++; @@ -396,8 +479,26 @@ bool HttpConnection_t::_InterpretHeaderLine (const char *header) if (bSetEnvironmentStrings) setenv ("CONTENT_TYPE", s, true); } - - + else if (!strncasecmp (header, "Transfer-Encoding:", 18)) { + if(TrailerProcessing){ + _SendError (RESPONSE_CODE_406); + return false; + } + const char *s = header + 18; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + if (!strncasecmp (s, "chunked", 7)){ + TrailerProcessing = 1; + ContentChunked = 1; + } + + } + else if (!strncasecmp (header, "Trailer:", 8)) { + if(TrailerProcessing){ + _SendError (RESPONSE_CODE_406); + return false; + } + } // Copy the incoming header into a block if ((HeaderBlockPos + strlen(header) + 1) < HeaderBlockSize) { int len = strlen(header); @@ -582,3 +683,58 @@ void HttpConnection_t::_SendError (const char *header) SendData (ss.str().c_str(), ss.str().length()); } + +/**************************** +HttpConnection_t::_GetChunkLength +****************************/ + +int HttpConnection_t::_GetChunkLength (const char *data, int *length_p, int *chunkbytelen) +{ + char chunklen_s[10] = {0}; + + const char *nl; + int len; + + + //The standards say that there can be a semi-colon after the chunk length, plus some data; + const char * pch = strpbrk (data,";"); + + //if there is a semicolon then we have to extract the bytes upto that colon + //After that we get the position to the end of the line. + if (pch != NULL){ + len = pch ? (pch - data) : *length_p; + //copy the byte stream length into chunklen_s + memcpy (chunklen_s, data, len); + chunklen_s[len] = '\0'; + nl = strpbrk (data, "\r\n"); + len = nl ? (nl - data) : *length_p; + + //now we move the data pointed to the end of the line. + if (len > *length_p){ + len = *length_p; + } + + } + //if there is no semi colon then we have extract the bytes up to the end of the line and + //assume those bytes are the chunk length. + else{ + nl = strpbrk (data, "\r\n"); + len = nl ? (nl - data) : *length_p; + //now we move the data pointed to the end of the line. + if (len > *length_p){ + len = *length_p; + } + memcpy (chunklen_s, data, len); + chunklen_s[len] = '\0'; + } + + // Passing the length back to the calling function. Did try to modify data and length variables + // in this function but for some reason the code completely skipped the line data +=len line...weird. + + //the +2 is for the \r\n characters. + *chunkbytelen = len+2; + + + //convert the length to a long and return. + return (int)strtol ( chunklen_s, NULL, 16 ); +} diff --git a/ext/http.h b/ext/http.h index e3ce03f..65647f6 100644 --- a/ext/http.h +++ b/ext/http.h @@ -55,7 +55,8 @@ class HttpConnection_t int postlength, const char *postdata, const char* hdrblock, - int hdrblksize); + int hdrblksize, + int content_chunked); virtual void ReceivePostData(const char *data, int len); virtual void SetNoEnvironmentStrings() {bSetEnvironmentStrings = false;} @@ -68,6 +69,7 @@ class HttpConnection_t PreheaderState, HeaderState, ReadingContentState, + ReadingChunkedContent, DispatchState, EndState } ProtocolState; @@ -87,8 +89,11 @@ class HttpConnection_t int HeaderBlockPos; int ContentLength; + int ContentChunked; + int TrailerProcessing; int ContentPos; char *_Content; + int Chunk_req_received; bool bSetEnvironmentStrings; bool bAccumulatePost; @@ -105,6 +110,7 @@ class HttpConnection_t std::string Protocol; private: + int _GetChunkLength(const char *, int *, int *); bool _InterpretHeaderLine (const char*); bool _InterpretRequest (const char*); bool _DetectVerbAndSetEnvString (const char*, int); diff --git a/ext/rubyhttp.cpp b/ext/rubyhttp.cpp index c2b8f72..09382c0 100644 --- a/ext/rubyhttp.cpp +++ b/ext/rubyhttp.cpp @@ -56,7 +56,8 @@ class RubyHttpConnection_t: public HttpConnection_t int postlength, const char *postdata, const char *hdrblock, - int hdrblocksize); + int hdrblocksize, + int content_chunked); virtual void ReceivePostData (const char *data, int len); private: @@ -114,7 +115,8 @@ void RubyHttpConnection_t::ProcessRequest (const char *request_method, int post_length, const char *post_content, const char *hdr_block, - int hdr_block_size) + int hdr_block_size, + int content_chunked) { VALUE post = Qnil; VALUE headers = Qnil; @@ -126,7 +128,8 @@ void RubyHttpConnection_t::ProcessRequest (const char *request_method, VALUE query_string_val = Qnil; VALUE request_uri_val = Qnil; VALUE protocol_val = Qnil; - + VALUE chunked = Qfalse; + if ((post_length > 0) && post_content) post = rb_str_new (post_content, post_length); @@ -150,8 +153,10 @@ void RubyHttpConnection_t::ProcessRequest (const char *request_method, if (request_uri && *request_uri) request_uri_val = rb_str_new (request_uri, strlen (request_uri)); if (protocol && *protocol) - protocol_val = rb_str_new (protocol, strlen (protocol)); - + protocol_val = rb_str_new (protocol, strlen (protocol)); + if(content_chunked){ + chunked = Qtrue; + } rb_ivar_set (Myself, rb_intern ("@http_request_method"), req_method); rb_ivar_set (Myself, rb_intern ("@http_cookie"), cookie_val); rb_ivar_set (Myself, rb_intern ("@http_if_none_match"), ifnonematch_val); @@ -162,6 +167,7 @@ void RubyHttpConnection_t::ProcessRequest (const char *request_method, rb_ivar_set (Myself, rb_intern ("@http_post_content"), post); rb_ivar_set (Myself, rb_intern ("@http_headers"), headers); rb_ivar_set (Myself, rb_intern ("@http_protocol"), protocol_val); + rb_ivar_set (Myself, rb_intern ("@http_chunked"), chunked); rb_funcall (Myself, rb_intern ("process_http_request"), 0); } @@ -222,6 +228,7 @@ t_receive_data static VALUE t_receive_data (VALUE self, VALUE data) { + int length = NUM2INT (rb_funcall (data, rb_intern ("length"), 0)); RubyHttpConnection_t *hc = t_get_http_connection (self); if (hc) diff --git a/test/test_app.rb b/test/test_app.rb index 63e1490..b4126d2 100644 --- a/test/test_app.rb +++ b/test/test_app.rb @@ -158,10 +158,10 @@ def test_headers # a costly operation, but we should provide an optional method that # does the parsing so it doesn't need to be done by users. conn.instance_eval do - @assertions = proc do + @assertions = proc { received_header_string = @http_headers received_header_ary = @http_headers.split(/\0/).map {|line| line.split(/:\s*/, 2) } - end + } end end @@ -198,7 +198,8 @@ def test_post content_type = "text/plain" received_post_content = "" received_content_type = "" - + received_chunk="" + EventMachine.run do EventMachine.start_server(TestHost, TestPort, MyTestServer) do |conn| # In each accepted connection, set up a procedure that will copy @@ -209,6 +210,7 @@ def test_post @assertions = proc do received_post_content = @http_post_content received_content_type = ENV["CONTENT_TYPE"] + received_chunk = @http_chunked end end end @@ -232,8 +234,198 @@ def test_post EventMachine.defer cb, eb end - assert_equal( received_post_content, post_content ) + assert_equal( post_content, received_post_content) assert_equal( received_content_type, content_type ) + assert_equal( false,received_chunk ) end + + def test_chunked + received_header_string = nil + chunked_content1 = "1234567890" + chunked_content2 = "abcdefgh" + chunked_content3 = "This will be a lot longer than the previous two chunks." + content_type = "text/plain" + etag = "12345" + received_post_content = "" + received_content_type = "" + + request_parms = {} -end + EventMachine.run do + EventMachine.start_server(TestHost, TestPort, MyTestServer) do |conn| + # In each accepted connection, set up a procedure that will copy + # the request parameters into a local variable visible here, so + # we can assert the values later. + # The @http_post_content variable is set automatically. + conn.instance_eval do + @assertions = proc do + parms = %w( PATH_INFO QUERY_STRING HTTP_COOKIE IF_NONE_MATCH + CONTENT_TYPE REQUEST_METHOD REQUEST_URI ) + parms.each {|parm| + # request_parms is bound to a local variable visible in this context. + request_parms[parm] = ENV[parm] + } + received_post_content = @http_post_content + + received_content_type = ENV["CONTENT_TYPE"] + end + end + end + EventMachine.add_timer(1) {raise "timed out"} # make sure the test completes + + cb = proc do + tcp = TCPSocket.new TestHost, TestPort + data = [ + "POST / HTTP/1.1\r\n", + "Content-type: #{content_type}\r\n", + "Transfer-Encoding: chunked\r\n", + "\r\n", + "#{chunked_content1.length.to_s(16)};somedata\r\n", + "#{chunked_content1}\r\n", + "#{chunked_content2.length.to_s(16)}\r\n", + "#{chunked_content2}\r\n", + "#{chunked_content3.length.to_s(16)}\r\n", + "#{chunked_content3}\r\n", + "0\r\n", + "SOAPaction: \r\n", + "If-none-match: #{etag}\r\n", + "\r\n" + ].join + tcp.write(data) + received_response = tcp.read + end + eb = proc do + EventMachine.stop + end + EventMachine.defer cb, eb + end + + assert_equal( chunked_content1+chunked_content2+chunked_content3, received_post_content) + assert_equal( received_content_type, content_type ) + assert_equal( etag, request_parms["IF_NONE_MATCH"] ) + + end + + + def test_chunked_soap + received_header_string = nil + chunked_content1 = "SUBSCRIBER_v10Subscriberidentifier" + content_type = "text/plain; charset=UTF-8" + received_chunk="" + received_post_content = "" + received_content_type = "" + + request_parms = {} + + EventMachine.run do + EventMachine.start_server(TestHost, TestPort, MyTestServer) do |conn| + # In each accepted connection, set up a procedure that will copy + # the request parameters into a local variable visible here, so + # we can assert the values later. + # The @http_post_content variable is set automatically. + conn.instance_eval do + @assertions = proc do + parms = %w( PATH_INFO QUERY_STRING HTTP_COOKIE IF_NONE_MATCH + CONTENT_TYPE REQUEST_METHOD REQUEST_URI ) + parms.each {|parm| + # request_parms is bound to a local variable visible in this context. + request_parms[parm] = ENV[parm] + } + received_post_content = @http_post_content + received_chunk = @http_chunked + received_content_type = ENV["CONTENT_TYPE"] + end + end + end + EventMachine.add_timer(1) {raise "timed out"} # make sure the test completes + + cb = proc do + tcp = TCPSocket.new TestHost, TestPort + data = [ + "POST / HTTP/1.1\r\n", + "User-Agent: Axis2\r\n", + "SOAPAction: \"urn:siemens:names:prov:gw:SPML:2:0/searchRequest\"\r\n", + "Content-type: #{content_type}\r\n", + "Transfer-Encoding: chunked\r\n", + "\r\n", + "#{chunked_content1.length.to_s(16)}\r\n", + "#{chunked_content1}\r\n", + "0\r\n", + "\r\n" + ].join + tcp.write(data) + received_response = tcp.read + end + eb = proc do + EventMachine.stop + end + EventMachine.defer cb, eb + end + + assert_equal( chunked_content1, received_post_content) + assert_equal( true,received_chunk ) + assert_equal( received_content_type, content_type ) + + end + + def test_chunked_single + received_header_string = nil + chunked_content1 = "1234567890" + content_type = "text/plain" + etag = "12345" + received_post_content = "" + received_content_type = "" + + request_parms = {} + + EventMachine.run do + EventMachine.start_server(TestHost, TestPort, MyTestServer) do |conn| + # In each accepted connection, set up a procedure that will copy + # the request parameters into a local variable visible here, so + # we can assert the values later. + # The @http_post_content variable is set automatically. + conn.instance_eval do + @assertions = proc do + parms = %w( PATH_INFO QUERY_STRING HTTP_COOKIE IF_NONE_MATCH + CONTENT_TYPE REQUEST_METHOD REQUEST_URI ) + parms.each {|parm| + # request_parms is bound to a local variable visible in this context. + request_parms[parm] = ENV[parm] + } + received_post_content = @http_post_content + + received_content_type = ENV["CONTENT_TYPE"] + end + end + end + EventMachine.add_timer(1) {raise "timed out"} # make sure the test completes + + cb = proc do + tcp = TCPSocket.new TestHost, TestPort + data = [ + "POST / HTTP/1.1\r\n", + "Content-type: #{content_type}\r\n", + "Transfer-Encoding: chunked\r\n", + "\r\n", + "#{chunked_content1.length.to_s(16)};somedata\r\n", + "#{chunked_content1}\r\n", + "0\r\n", + "SOAPaction: \r\n", + "If-none-match: #{etag}\r\n", + "\r\n" + ].join + tcp.write(data) + received_response = tcp.read + end + eb = proc do + EventMachine.stop + end + EventMachine.defer cb, eb + end + + assert_equal( chunked_content1, received_post_content) + assert_equal( received_content_type, content_type ) + assert_equal( etag, request_parms["IF_NONE_MATCH"] ) + + end +end \ No newline at end of file From 4fa8f94aca1f3660c18e7691978e0f623ed67c61 Mon Sep 17 00:00:00 2001 From: Greg Hill Date: Fri, 13 Apr 2012 19:26:46 +0100 Subject: [PATCH 2/5] Lastest chunked encoding code to hangle packets over mtu boundaries --- eventmachine_httpserver.gemspec | 2 +- ext/http.cpp | 200 ++++++++++++++++++-------------- ext/http.h | 8 ++ test/test_app.rb | 14 ++- 4 files changed, 133 insertions(+), 91 deletions(-) diff --git a/eventmachine_httpserver.gemspec b/eventmachine_httpserver.gemspec index 04adaf1..647b4fd 100644 --- a/eventmachine_httpserver.gemspec +++ b/eventmachine_httpserver.gemspec @@ -2,7 +2,7 @@ Gem::Specification.new do |s| s.name = "eventmachine_httpserver" - s.version = "0.2.2" + s.version = "0.2.3" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.authors = ["Gregory Hill"] diff --git a/ext/http.cpp b/ext/http.cpp index f141533..13b2dd5 100644 --- a/ext/http.cpp +++ b/ext/http.cpp @@ -146,7 +146,7 @@ void HttpConnection_t::ReceivePostData (const char *data, int len) } /***************************** -HttpConnection_t::ConsumeData +HttpConnection_t:: *****************************/ void HttpConnection_t::ConsumeData (const char *data, int length) @@ -173,6 +173,9 @@ void HttpConnection_t::ConsumeData (const char *data, int length) TrailerProcessing = 0; ContentPos = 0; Chunk_req_received = 0; + foundsemi = 0; + foundslashr = 0; + chunklen = 0; bRequestSeen = false; bContentLengthSeen = false; if (_Content) { @@ -225,7 +228,7 @@ void HttpConnection_t::ConsumeData (const char *data, int length) else ProtocolState = HeaderState; - // sleep(30); + //sleep(30); } //----------------------------------- HeaderState @@ -258,7 +261,13 @@ void HttpConnection_t::ConsumeData (const char *data, int length) } _Content = NULL; ContentPos = 0; - ProtocolState = ReadingChunkedContent; + ProtocolState = ReadingChunkLen; + //clear our header buffer, reset flags + memset(chunklen_s, '\0',10); + foundsemi = 0; + foundslashr = 0; + foundslashn = 0; + chunklen = 0; ContentChunked = 0; } else{ @@ -319,40 +328,118 @@ void HttpConnection_t::ConsumeData (const char *data, int length) ProtocolState = DispatchState; } } + + int chunklen_pos = 0; + //----------------------------------- ReadingChunkLen + // Read POST chunked content. + while ((ProtocolState == ReadingChunkLen) && length > 0) + { + if(*data == ';') + { + //The standards say that there can be a semi-colon after the chunk length, plus some data; + foundsemi = 1; + } + else if(*data == '\r') + { + foundslashr =1; + } + else if(*data == '\n') + { + //If we find a slash n without a slash r then this message is malformatted + if(! foundslashr) + { + goto send_error; + } + foundslashn = 1; + } + else + { + if( ! foundsemi ) + { + chunklen_s[chunklen_pos] = *data; + chunklen_pos++; + } + } + + data = data+1; + length--; + if (foundslashn) + { + //convert the length to a long + chunklen = (int)strtol ( chunklen_s, NULL, 16 ); + if (chunklen == 0 ) + { + ProtocolState = HeaderState; + + } + else + { + ProtocolState = ReadingChunkedContent; + } + } + } + int readamount =0; + int readischunk=0; //----------------------------------- ReadingChunkedContent // Read POST chunked content. - while ((ProtocolState == ReadingChunkedContent && length > 0)) { - char *_temp_content = NULL; - int chunkbytelen = 0; - int chunklen = _GetChunkLength(data,&length,&chunkbytelen); //Need pass length address so can modify the same copy. - data += chunkbytelen; - length -= chunkbytelen; - while(chunklen > 0) + while ((ProtocolState == ReadingChunkedContent && length > 0)) + { + + if(length <= chunklen) { - //copy to temp variable so we don't overwrite _Content with null if we can't alloc. - _temp_content = (char*) realloc(_Content, ContentPos + chunklen + 1); + readamount = length; + chunklen-=length; - if (!_temp_content){ - // free(_Content); - // _Content = NULL; - throw std::runtime_error ("resource exhaustion"); + } + else + { + readischunk = 1; + readamount = chunklen; + } + + char *_temp_content = NULL; + //copy to temp variable so we don't overwrite _Content with null if we can't alloc. + _temp_content = (char*) realloc(_Content, ContentPos + readamount + 1); + + if (!_temp_content){ + // free(_Content); + // _Content = NULL; + throw std::runtime_error ("resource exhaustion"); + } + _Content = _temp_content; + + memcpy(_Content + ContentPos, data, readamount); + ContentPos += readamount; + _Content[ContentPos] = '\0'; + data += readamount; + length -= readamount; + + //If we just read the entire chunk then we can expect the next + // two characters to be \r\n + if(readischunk) + { + if(*data != '\r') + { + goto send_error; } - _Content = _temp_content; - memcpy(_Content + ContentPos, data, chunklen); - ContentPos += chunklen; - _Content[ContentPos] = '\0'; - data += chunklen + 2; - length -= chunklen + 2; + data++; + length--; - //Get next chunk length - chunklen = _GetChunkLength(data,&length,&chunkbytelen); - data += chunkbytelen; - length -= chunkbytelen; + if(*data != '\n') + { + goto send_error; + } + data++; + length--; + ProtocolState = ReadingChunkLen; + memset(chunklen_s, '\0',10); //clear our header buffer. + foundsemi = 0; + foundslashr = 0; + chunklen = 0; + foundslashn = 0; } - //Process the next trailer header lines. - ProtocolState = HeaderState; - + } //----------------------------------- DispatchState @@ -683,58 +770,3 @@ void HttpConnection_t::_SendError (const char *header) SendData (ss.str().c_str(), ss.str().length()); } - -/**************************** -HttpConnection_t::_GetChunkLength -****************************/ - -int HttpConnection_t::_GetChunkLength (const char *data, int *length_p, int *chunkbytelen) -{ - char chunklen_s[10] = {0}; - - const char *nl; - int len; - - - //The standards say that there can be a semi-colon after the chunk length, plus some data; - const char * pch = strpbrk (data,";"); - - //if there is a semicolon then we have to extract the bytes upto that colon - //After that we get the position to the end of the line. - if (pch != NULL){ - len = pch ? (pch - data) : *length_p; - //copy the byte stream length into chunklen_s - memcpy (chunklen_s, data, len); - chunklen_s[len] = '\0'; - nl = strpbrk (data, "\r\n"); - len = nl ? (nl - data) : *length_p; - - //now we move the data pointed to the end of the line. - if (len > *length_p){ - len = *length_p; - } - - } - //if there is no semi colon then we have extract the bytes up to the end of the line and - //assume those bytes are the chunk length. - else{ - nl = strpbrk (data, "\r\n"); - len = nl ? (nl - data) : *length_p; - //now we move the data pointed to the end of the line. - if (len > *length_p){ - len = *length_p; - } - memcpy (chunklen_s, data, len); - chunklen_s[len] = '\0'; - } - - // Passing the length back to the calling function. Did try to modify data and length variables - // in this function but for some reason the code completely skipped the line data +=len line...weird. - - //the +2 is for the \r\n characters. - *chunkbytelen = len+2; - - - //convert the length to a long and return. - return (int)strtol ( chunklen_s, NULL, 16 ); -} diff --git a/ext/http.h b/ext/http.h index 65647f6..cd9d987 100644 --- a/ext/http.h +++ b/ext/http.h @@ -69,6 +69,7 @@ class HttpConnection_t PreheaderState, HeaderState, ReadingContentState, + ReadingChunkLen, ReadingChunkedContent, DispatchState, EndState @@ -94,11 +95,18 @@ class HttpConnection_t int ContentPos; char *_Content; int Chunk_req_received; + + char chunklen_s[10]; + int foundsemi; + int foundslashr; + int chunklen; + int foundslashn; bool bSetEnvironmentStrings; bool bAccumulatePost; bool bRequestSeen; bool bContentLengthSeen; +// ofstream programlog; const char *RequestMethod; std::string Cookie; diff --git a/test/test_app.rb b/test/test_app.rb index b4126d2..8f07569 100644 --- a/test/test_app.rb +++ b/test/test_app.rb @@ -309,8 +309,8 @@ def test_chunked def test_chunked_soap received_header_string = nil - chunked_content1 = "SUBSCRIBER_v10Subscriberidentifier" - content_type = "text/plain; charset=UTF-8" + chunked_content1 = "dsflsjdlfdsllfjsdlfjldsflsdjfljlsdjlfkjlsdlflsdjlfjsdjlfjlskdf" + content_type = "text/xml; charset=UTF-8" received_chunk="" received_post_content = "" received_content_type = "" @@ -332,6 +332,7 @@ def test_chunked_soap request_parms[parm] = ENV[parm] } received_post_content = @http_post_content + received_chunk = @http_chunked received_content_type = ENV["CONTENT_TYPE"] end @@ -342,10 +343,10 @@ def test_chunked_soap cb = proc do tcp = TCPSocket.new TestHost, TestPort data = [ - "POST / HTTP/1.1\r\n", - "User-Agent: Axis2\r\n", - "SOAPAction: \"urn:siemens:names:prov:gw:SPML:2:0/searchRequest\"\r\n", + "POST /ProvisioningGateway/services/SPMLSubscriber10Service HTTP/1.1\r\n", "Content-type: #{content_type}\r\n", + "SOAPAction: \"urn:gw:SPML:2:0/modifyRequest\"\r\n", + "User-Agent: Axis2\r\n", "Transfer-Encoding: chunked\r\n", "\r\n", "#{chunked_content1.length.to_s(16)}\r\n", @@ -353,6 +354,7 @@ def test_chunked_soap "0\r\n", "\r\n" ].join + # puts "Message: \n#{data}" tcp.write(data) received_response = tcp.read end @@ -428,4 +430,4 @@ def test_chunked_single assert_equal( etag, request_parms["IF_NONE_MATCH"] ) end -end \ No newline at end of file +end From d6119907692a45d8c732d2edae13ecde5b453002 Mon Sep 17 00:00:00 2001 From: Greg Hill Date: Sat, 7 Feb 2015 14:36:50 -0800 Subject: [PATCH 3/5] Replace README with README.md in gemspec --- eventmachine_httpserver.gemspec | 4 ++-- eventmachine_httpserver.gemspec.tmpl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/eventmachine_httpserver.gemspec b/eventmachine_httpserver.gemspec index dbbae00..c9368b9 100644 --- a/eventmachine_httpserver.gemspec +++ b/eventmachine_httpserver.gemspec @@ -2,7 +2,7 @@ Gem::Specification.new do |s| s.name = %q{eventmachine_httpserver} - s.version = "0.2.1" + s.version = "0.2.3" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.authors = ["Francis Cianfrocca"] @@ -12,7 +12,7 @@ Gem::Specification.new do |s| s.email = %q{garbagecat10@gmail.com} s.extensions = ["ext/extconf.rb"] s.extra_rdoc_files = ["docs/COPYING", "docs/README", "docs/RELEASE_NOTES"] - s.files = ["README", "Rakefile", "docs/COPYING", "docs/README", "docs/RELEASE_NOTES", "eventmachine_httpserver.gemspec", "eventmachine_httpserver.gemspec.tmpl", "ext/extconf.rb", "ext/http.cpp", "ext/http.h", "ext/rubyhttp.cpp", "lib/evma_httpserver.rb", "lib/evma_httpserver/response.rb", "test/test_app.rb", "test/test_delegated.rb", "test/test_response.rb"] + s.files = ["README.md", "Rakefile", "docs/COPYING", "docs/README", "docs/RELEASE_NOTES", "eventmachine_httpserver.gemspec", "eventmachine_httpserver.gemspec.tmpl", "ext/extconf.rb", "ext/http.cpp", "ext/http.h", "ext/rubyhttp.cpp", "lib/evma_httpserver.rb", "lib/evma_httpserver/response.rb", "test/test_app.rb", "test/test_delegated.rb", "test/test_response.rb"] s.homepage = %q{https://github.com/eventmachine/evma_httpserver} s.rdoc_options = ["--title", "EventMachine_HttpServer", "--main", "docs/README", "--line-numbers"] s.require_paths = ["lib"] diff --git a/eventmachine_httpserver.gemspec.tmpl b/eventmachine_httpserver.gemspec.tmpl index 07adb07..38461ec 100644 --- a/eventmachine_httpserver.gemspec.tmpl +++ b/eventmachine_httpserver.gemspec.tmpl @@ -1,6 +1,6 @@ Gem::Specification.new do |s| s.name = %q{eventmachine_httpserver} - s.version = "0.2.1" + s.version = "0.2.3" s.specification_version = 1 if s.respond_to? :specification_version= From 76eca6a16eb8b3405d552295fe69decd0d69c1d1 Mon Sep 17 00:00:00 2001 From: Greg Hill Date: Sat, 7 Feb 2015 14:43:51 -0800 Subject: [PATCH 4/5] Updated Gemspec version --- eventmachine_httpserver.gemspec | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/eventmachine_httpserver.gemspec b/eventmachine_httpserver.gemspec index c21f6e3..ec761c6 100644 --- a/eventmachine_httpserver.gemspec +++ b/eventmachine_httpserver.gemspec @@ -1,9 +1,8 @@ # -*- encoding: utf-8 -*- Gem::Specification.new do |s| -<<<<<<< HEAD s.name = %q{eventmachine_httpserver} - s.version = "0.2.3" + s.version = "0.3.0" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.authors = ["Francis Cianfrocca"] From 591070ada0d7e6383abd7664642e16d959fb2fe8 Mon Sep 17 00:00:00 2001 From: Greg Hill Date: Sat, 7 Feb 2015 17:06:06 -0800 Subject: [PATCH 5/5] removes tabs from file --- ext/http.cpp | 961 +++++++++++++++++++++++++-------------------------- ext/http.h | 133 ++++--- 2 files changed, 542 insertions(+), 552 deletions(-) diff --git a/ext/http.cpp b/ext/http.cpp index 20287e0..e72c52b 100644 --- a/ext/http.cpp +++ b/ext/http.cpp @@ -45,11 +45,11 @@ using namespace std; #define strcasecmp _stricmp void setenv (const char *str, const char *value, bool replace) { - SetEnvironmentVariable (str, value); + SetEnvironmentVariable (str, value); } void unsetenv (const char *str) { - SetEnvironmentVariable (str, NULL); + SetEnvironmentVariable (str, NULL); } #endif @@ -60,23 +60,23 @@ HttpConnection_t::HttpConnection_t HttpConnection_t::HttpConnection_t() { - ProtocolState = BaseState; - _Content = NULL; - - // By default, we set the standard CGI environment strings. - // (This is primarily beneficial because it lets the caller use Ruby's CGI classes.) - // The caller can switch this off in Ruby code, which greatly improves performance. - bSetEnvironmentStrings = true; - - // This flag was added by Kirk Haines (thanks, Kirk). It preserves the original - // behavior with respect to POST content, which was to accumulate it in a buffer - // allocated and managed in this class. Kirk's mods allow callers to specify that - // POST content be submitted directly to user code piece by piece as we receive it, - // instead of buffering it here. To get the latter behavior, user code must call - // dont_accumulate_post. - bAccumulatePost = true; - - + ProtocolState = BaseState; + _Content = NULL; + + // By default, we set the standard CGI environment strings. + // (This is primarily beneficial because it lets the caller use Ruby's CGI classes.) + // The caller can switch this off in Ruby code, which greatly improves performance. + bSetEnvironmentStrings = true; + + // This flag was added by Kirk Haines (thanks, Kirk). It preserves the original + // behavior with respect to POST content, which was to accumulate it in a buffer + // allocated and managed in this class. Kirk's mods allow callers to specify that + // POST content be submitted directly to user code piece by piece as we receive it, + // instead of buffering it here. To get the latter behavior, user code must call + // dont_accumulate_post. + bAccumulatePost = true; + + } @@ -86,8 +86,8 @@ HttpConnection_t::~HttpConnection_t HttpConnection_t::~HttpConnection_t() { - if (_Content) { - free (_Content); + if (_Content) { + free (_Content); _Content = NULL; } } @@ -119,17 +119,17 @@ HttpConnection_t::ProcessRequest ********************************/ void HttpConnection_t::ProcessRequest (const char *method, - const char *cookie, - const char *ifnonematch, - const char *contenttype, - const char *query_string, - const char *path_info, - const char *request_uri, - const char *protocol, - int post_length, - const char *post_content, - const char *hdrblock, - int hdrblocksize, + const char *cookie, + const char *ifnonematch, + const char *contenttype, + const char *query_string, + const char *path_info, + const char *request_uri, + const char *protocol, + int post_length, + const char *post_content, + const char *hdrblock, + int hdrblocksize, int content_chunked) { cerr << "UNIMPLEMENTED ProcessRequest" << endl; @@ -142,198 +142,198 @@ HttpConnection_t::ReceivePostData void HttpConnection_t::ReceivePostData (const char *data, int len) { - cerr << "UNIMPLEMENTED ReceivePostData" << endl; + cerr << "UNIMPLEMENTED ReceivePostData" << endl; } /***************************** -HttpConnection_t:: +HttpConnection_t:: *****************************/ void HttpConnection_t::ConsumeData (const char *data, int length) { - if (ProtocolState == EndState) - return; - - if ((length > 0) && !data) - throw std::runtime_error ("bad args consuming http data"); - - while (length > 0) { - - //----------------------------------- BaseState - // Initialize for a new request. Don't consume any data. - // For anal-retentive security we may want to bzero the header block. - if (ProtocolState == BaseState) { - ProtocolState = PreheaderState; - nLeadingBlanks = 0; - HeaderLinePos = 0; - HeaderBlockPos = 0; - ContentLength = 0; + if (ProtocolState == EndState) + return; + + if ((length > 0) && !data) + throw std::runtime_error ("bad args consuming http data"); + + while (length > 0) { + + //----------------------------------- BaseState + // Initialize for a new request. Don't consume any data. + // For anal-retentive security we may want to bzero the header block. + if (ProtocolState == BaseState) { + ProtocolState = PreheaderState; + nLeadingBlanks = 0; + HeaderLinePos = 0; + HeaderBlockPos = 0; + ContentLength = 0; ContentChunked = 0; TrailerProcessing = 0; - ContentPos = 0; + ContentPos = 0; Chunk_req_received = 0; foundsemi = 0; foundslashr = 0; chunklen = 0; - bRequestSeen = false; - bContentLengthSeen = false; - if (_Content) { - free ((void*)_Content); - _Content = NULL; - } - RequestMethod = NULL; - #ifdef OS_WIN32 - Cookie.erase(Cookie.begin(),Cookie.end()); - IfNoneMatch.erase(IfNoneMatch.begin(),IfNoneMatch.end()); - ContentType.erase(ContentType.begin(),ContentType.end()); - PathInfo.erase(PathInfo.begin(),PathInfo.end()); - RequestUri.erase(RequestUri.begin(),RequestUri.end()); - QueryString.erase(QueryString.begin(),QueryString.end()); - Protocol.erase(Protocol.begin(),Protocol.end()); - #else - Cookie.clear(); - IfNoneMatch.clear(); - ContentType.clear(); - PathInfo.clear(); - RequestUri.clear(); - QueryString.clear(); - Protocol.clear(); - #endif - - if (bSetEnvironmentStrings) { - unsetenv ("REQUEST_METHOD"); - unsetenv ("HTTP_COOKIE"); - unsetenv ("IF_NONE_MATCH"); - unsetenv ("CONTENT_TYPE"); - unsetenv ("PATH_INFO"); - unsetenv ("REQUEST_URI"); - unsetenv ("QUERY_STRING"); - unsetenv ("PROTOCOL"); - } - } - - //----------------------------------- PreheaderState - // Consume blank lines (but not too many of them) - while ((ProtocolState == PreheaderState) && (length > 0)) { - if ((*data == '\r') || (*data == '\n')) { - data++; - length--; - nLeadingBlanks++; - if (nLeadingBlanks > MaxLeadingBlanks) { - // TODO, log this. - goto fail_connection; - } - } - else - ProtocolState = HeaderState; - + bRequestSeen = false; + bContentLengthSeen = false; + if (_Content) { + free ((void*)_Content); + _Content = NULL; + } + RequestMethod = NULL; + #ifdef OS_WIN32 + Cookie.erase(Cookie.begin(),Cookie.end()); + IfNoneMatch.erase(IfNoneMatch.begin(),IfNoneMatch.end()); + ContentType.erase(ContentType.begin(),ContentType.end()); + PathInfo.erase(PathInfo.begin(),PathInfo.end()); + RequestUri.erase(RequestUri.begin(),RequestUri.end()); + QueryString.erase(QueryString.begin(),QueryString.end()); + Protocol.erase(Protocol.begin(),Protocol.end()); + #else + Cookie.clear(); + IfNoneMatch.clear(); + ContentType.clear(); + PathInfo.clear(); + RequestUri.clear(); + QueryString.clear(); + Protocol.clear(); + #endif + + if (bSetEnvironmentStrings) { + unsetenv ("REQUEST_METHOD"); + unsetenv ("HTTP_COOKIE"); + unsetenv ("IF_NONE_MATCH"); + unsetenv ("CONTENT_TYPE"); + unsetenv ("PATH_INFO"); + unsetenv ("REQUEST_URI"); + unsetenv ("QUERY_STRING"); + unsetenv ("PROTOCOL"); + } + } + + //----------------------------------- PreheaderState + // Consume blank lines (but not too many of them) + while ((ProtocolState == PreheaderState) && (length > 0)) { + if ((*data == '\r') || (*data == '\n')) { + data++; + length--; + nLeadingBlanks++; + if (nLeadingBlanks > MaxLeadingBlanks) { + // TODO, log this. + goto fail_connection; + } + } + else + ProtocolState = HeaderState; + //sleep(30); - } - - //----------------------------------- HeaderState - // Read HTTP headers. - // This processing depends on the fact that the end - // of the data buffer we receive will have a null terminator - // just after the last byte indicated by the length parameter. - // Cf notes in ConnectionDescriptor::Read. - while ((ProtocolState == HeaderState) && (length > 0)) { - if (*data == '\n') { - HeaderLine [HeaderLinePos] = 0; - if (!_InterpretHeaderLine (HeaderLine)) - goto send_error; - if (HeaderLinePos == 0) { - if (ContentLength > 0) { - if (_Content) - free (_Content); - _Content = NULL; - if (bAccumulatePost) { - _Content = (char*) malloc (ContentLength + 1); - if (!_Content) - throw std::runtime_error ("resource exhaustion"); - } - ContentPos = 0; - ProtocolState = ReadingContentState; - } + } + + //----------------------------------- HeaderState + // Read HTTP headers. + // This processing depends on the fact that the end + // of the data buffer we receive will have a null terminator + // just after the last byte indicated by the length parameter. + // Cf notes in ConnectionDescriptor::Read. + while ((ProtocolState == HeaderState) && (length > 0)) { + if (*data == '\n') { + HeaderLine [HeaderLinePos] = 0; + if (!_InterpretHeaderLine (HeaderLine)) + goto send_error; + if (HeaderLinePos == 0) { + if (ContentLength > 0) { + if (_Content) + free (_Content); + _Content = NULL; + if (bAccumulatePost) { + _Content = (char*) malloc (ContentLength + 1); + if (!_Content) + throw std::runtime_error ("resource exhaustion"); + } + ContentPos = 0; + ProtocolState = ReadingContentState; + } else if (ContentChunked){ - if (_Content){ - free (_Content); - } - _Content = NULL; - ContentPos = 0; - ProtocolState = ReadingChunkLen; - //clear our header buffer, reset flags - memset(chunklen_s, '\0',10); - foundsemi = 0; - foundslashr = 0; - foundslashn = 0; - chunklen = 0; - ContentChunked = 0; - } - else{ + if (_Content){ + free (_Content); + } + _Content = NULL; + ContentPos = 0; + ProtocolState = ReadingChunkLen; + //clear our header buffer, reset flags + memset(chunklen_s, '\0',10); + foundsemi = 0; + foundslashr = 0; + foundslashn = 0; + chunklen = 0; + ContentChunked = 0; + } + else{ if(TrailerProcessing){ ContentLength = ContentPos; Chunk_req_received = 1; } // We will come to here for GET or Chunked POST. - ProtocolState = DispatchState; + ProtocolState = DispatchState; } - } - - HeaderLinePos = 0; - data++; - length--; - } - else if (*data == '\r') { - // ignore \r - data++; - length--; - } - else { - const char *nl = strpbrk (data, "\r\n"); - int len = nl ? (nl - data) : length; - - if ((size_t)(HeaderLinePos + len) >= sizeof(HeaderLine)) { - // TODO, log this - goto fail_connection; - } - - memcpy (HeaderLine + HeaderLinePos, data, len); - data += len; - length -= len; - HeaderLinePos += len; - } - } - - - //----------------------------------- ReadingContentState - // Read POST content. - while ((ProtocolState == ReadingContentState) && (length > 0)) { - int len = ContentLength - ContentPos; - if (len > length){ - len = length; + } + + HeaderLinePos = 0; + data++; + length--; + } + else if (*data == '\r') { + // ignore \r + data++; + length--; + } + else { + const char *nl = strpbrk (data, "\r\n"); + int len = nl ? (nl - data) : length; + + if ((size_t)(HeaderLinePos + len) >= sizeof(HeaderLine)) { + // TODO, log this + goto fail_connection; + } + + memcpy (HeaderLine + HeaderLinePos, data, len); + data += len; + length -= len; + HeaderLinePos += len; + } + } + + + //----------------------------------- ReadingContentState + // Read POST content. + while ((ProtocolState == ReadingContentState) && (length > 0)) { + int len = ContentLength - ContentPos; + if (len > length){ + len = length; } - if (bAccumulatePost) - memcpy (_Content + ContentPos, data, len); - else - ReceivePostData (data, len); - - data += len; - length -= len; - ContentPos += len; - if (ContentPos == ContentLength) { - if (bAccumulatePost){ - _Content[ContentPos] = 0; - } + if (bAccumulatePost) + memcpy (_Content + ContentPos, data, len); + else + ReceivePostData (data, len); + + data += len; + length -= len; + ContentPos += len; + if (ContentPos == ContentLength) { + if (bAccumulatePost){ + _Content[ContentPos] = 0; + } ProtocolState = DispatchState; - } - } - + } + } + int chunklen_pos = 0; - //----------------------------------- ReadingChunkLen - // Read POST chunked content. + //----------------------------------- ReadingChunkLen + // Read POST chunked content. while ((ProtocolState == ReadingChunkLen) && length > 0) - { + { if(*data == ';') { //The standards say that there can be a semi-colon after the chunk length, plus some data; @@ -359,18 +359,18 @@ void HttpConnection_t::ConsumeData (const char *data, int length) chunklen_s[chunklen_pos] = *data; chunklen_pos++; } - } - + } + data = data+1; length--; if (foundslashn) { //convert the length to a long - chunklen = (int)strtol ( chunklen_s, NULL, 16 ); + chunklen = (int)strtol ( chunklen_s, NULL, 16 ); if (chunklen == 0 ) { ProtocolState = HeaderState; - + } else { @@ -380,18 +380,16 @@ void HttpConnection_t::ConsumeData (const char *data, int length) } int readamount =0; int readischunk=0; - //----------------------------------- ReadingChunkedContent - // Read POST chunked content. + //----------------------------------- ReadingChunkedContent + // Read POST chunked content. while ((ProtocolState == ReadingChunkedContent && length > 0)) { - if(length <= chunklen) { readamount = length; chunklen-=length; - } - else + else { readischunk = 1; readamount = chunklen; @@ -400,32 +398,32 @@ void HttpConnection_t::ConsumeData (const char *data, int length) char *_temp_content = NULL; //copy to temp variable so we don't overwrite _Content with null if we can't alloc. _temp_content = (char*) realloc(_Content, ContentPos + readamount + 1); - + if (!_temp_content){ // free(_Content); // _Content = NULL; throw std::runtime_error ("resource exhaustion"); } _Content = _temp_content; - + memcpy(_Content + ContentPos, data, readamount); ContentPos += readamount; _Content[ContentPos] = '\0'; data += readamount; length -= readamount; - //If we just read the entire chunk then we can expect the next + //If we just read the entire chunk then we can expect the next // two characters to be \r\n if(readischunk) { if(*data != '\r') { - goto send_error; + goto send_error; } - + data++; length--; - + if(*data != '\n') { goto send_error; @@ -439,42 +437,39 @@ void HttpConnection_t::ConsumeData (const char *data, int length) chunklen = 0; foundslashn = 0; } - } - - //----------------------------------- DispatchState - if (ProtocolState == DispatchState) { - ProcessRequest (RequestMethod, - Cookie.c_str(), - IfNoneMatch.c_str(), - ContentType.c_str(), - QueryString.c_str(), - PathInfo.c_str(), - RequestUri.c_str(), - Protocol.c_str(), - ContentLength, - _Content, - HeaderBlock, - HeaderBlockPos, + //----------------------------------- DispatchState + if (ProtocolState == DispatchState) { + ProcessRequest (RequestMethod, + Cookie.c_str(), + IfNoneMatch.c_str(), + ContentType.c_str(), + QueryString.c_str(), + PathInfo.c_str(), + RequestUri.c_str(), + Protocol.c_str(), + ContentLength, + _Content, + HeaderBlock, + HeaderBlockPos, Chunk_req_received); - - ProtocolState = BaseState; - } - } + ProtocolState = BaseState; + } + } - return; + return; - fail_connection: - // For protocol errors or security violations- kill the connection dead. - CloseConnection (false); - ProtocolState = EndState; - return; + fail_connection: + // For protocol errors or security violations- kill the connection dead. + CloseConnection (false); + ProtocolState = EndState; + return; - send_error: - // for HTTP-level errors that will send back a response to the client. - CloseConnection (true); - ProtocolState = EndState; - return; + send_error: + // for HTTP-level errors that will send back a response to the client. + CloseConnection (true); + ProtocolState = EndState; + return; } @@ -484,122 +479,122 @@ HttpConnection_t::_InterpretHeaderLine **************************************/ bool HttpConnection_t::_InterpretHeaderLine (const char *header) { - /* Return T/F to indicate whether we should continue processing - * this request. Return false to indicate that we detected a fatal - * error or other condition which should cause us to drop the - * connection. - * BY DEFINITION, this doesn't define any immediate fatal errors. - * That may need to change, in which case we'll have to return - * an error code rather than T/F, so the caller will know whether - * to drop the connection gracefully or not. - * - * There's something odd and possibly undesirable about how we're - * doing this. We fully process each header (including the request) - * _as we see it,_ and not at the end when all the headers have - * been seen. This saves us the trouble of keeping them all around - * and possibly parsing them twice, but it also means that when - * we emit errors from here (that generate HTTP responses other than - * 200 and therefore close the connection), we do so _immediately_ - * and before looking at the rest of the headers. That might surprise - * and confuse some clients. - * - * Revised 27Sep06, we now store all the headers in one place, on a - * per-request basis, for the purpose of making them available to - * downstream users. At present this involves an undesirable extra - * memory copy. Eventually should rework the main header processing - * so it can be done in place. - */ - - if (!header) // an assert, really. - throw std::runtime_error ("bad arg interpreting headers"); - - if (!bRequestSeen) { - bRequestSeen = true; - return _InterpretRequest (header); - } - - if (!strncasecmp (header, "content-length:", 15)) { - if (bContentLengthSeen) { - // TODO, log this. There are some attacks that depend - // on sending more than one content-length header. - _SendError (RESPONSE_CODE_406); - return false; - } - bContentLengthSeen = true; - const char *s = header + 15; - while (*s && ((*s==' ') || (*s=='\t'))) - s++; - ContentLength = atoi (s); - if (ContentLength > MaxContentLength) { - // TODO, log this. - _SendError (RESPONSE_CODE_406); - return false; - } - } - else if (!strncasecmp (header, "cookie:", 7)) { - const char *s = header + 7; - while (*s && ((*s==' ') || (*s=='\t'))) - s++; - Cookie = s; - if (bSetEnvironmentStrings) - setenv ("HTTP_COOKIE", s, true); - } - else if (!strncasecmp (header, "If-none-match:", 14)) { - const char *s = header + 14; - while (*s && ((*s==' ') || (*s=='\t'))) - s++; - IfNoneMatch = s; - if (bSetEnvironmentStrings) - setenv ("IF_NONE_MATCH", s, true); - } - else if (!strncasecmp (header, "Content-type:", 13)) { + /* Return T/F to indicate whether we should continue processing + * this request. Return false to indicate that we detected a fatal + * error or other condition which should cause us to drop the + * connection. + * BY DEFINITION, this doesn't define any immediate fatal errors. + * That may need to change, in which case we'll have to return + * an error code rather than T/F, so the caller will know whether + * to drop the connection gracefully or not. + * + * There's something odd and possibly undesirable about how we're + * doing this. We fully process each header (including the request) + * _as we see it,_ and not at the end when all the headers have + * been seen. This saves us the trouble of keeping them all around + * and possibly parsing them twice, but it also means that when + * we emit errors from here (that generate HTTP responses other than + * 200 and therefore close the connection), we do so _immediately_ + * and before looking at the rest of the headers. That might surprise + * and confuse some clients. + * + * Revised 27Sep06, we now store all the headers in one place, on a + * per-request basis, for the purpose of making them available to + * downstream users. At present this involves an undesirable extra + * memory copy. Eventually should rework the main header processing + * so it can be done in place. + */ + + if (!header) // an assert, really. + throw std::runtime_error ("bad arg interpreting headers"); + + if (!bRequestSeen) { + bRequestSeen = true; + return _InterpretRequest (header); + } + + if (!strncasecmp (header, "content-length:", 15)) { + if (bContentLengthSeen) { + // TODO, log this. There are some attacks that depend + // on sending more than one content-length header. + _SendError (RESPONSE_CODE_406); + return false; + } + bContentLengthSeen = true; + const char *s = header + 15; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + ContentLength = atoi (s); + if (ContentLength > MaxContentLength) { + // TODO, log this. + _SendError (RESPONSE_CODE_406); + return false; + } + } + else if (!strncasecmp (header, "cookie:", 7)) { + const char *s = header + 7; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + Cookie = s; + if (bSetEnvironmentStrings) + setenv ("HTTP_COOKIE", s, true); + } + else if (!strncasecmp (header, "If-none-match:", 14)) { + const char *s = header + 14; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + IfNoneMatch = s; + if (bSetEnvironmentStrings) + setenv ("IF_NONE_MATCH", s, true); + } + else if (!strncasecmp (header, "Content-type:", 13)) { //If we receive this header after we have processed chunked data //send an error. if(TrailerProcessing){ _SendError (RESPONSE_CODE_406); return false; } - const char *s = header + 13; - while (*s && ((*s==' ') || (*s=='\t'))) - s++; - ContentType = s; - if (bSetEnvironmentStrings) - setenv ("CONTENT_TYPE", s, true); - } - else if (!strncasecmp (header, "Transfer-Encoding:", 18)) { + const char *s = header + 13; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + ContentType = s; + if (bSetEnvironmentStrings) + setenv ("CONTENT_TYPE", s, true); + } + else if (!strncasecmp (header, "Transfer-Encoding:", 18)) { if(TrailerProcessing){ _SendError (RESPONSE_CODE_406); return false; } - const char *s = header + 18; - while (*s && ((*s==' ') || (*s=='\t'))) - s++; - if (!strncasecmp (s, "chunked", 7)){ + const char *s = header + 18; + while (*s && ((*s==' ') || (*s=='\t'))) + s++; + if (!strncasecmp (s, "chunked", 7)){ TrailerProcessing = 1; ContentChunked = 1; } - - } + + } else if (!strncasecmp (header, "Trailer:", 8)) { if(TrailerProcessing){ _SendError (RESPONSE_CODE_406); return false; } } - // Copy the incoming header into a block - if ((HeaderBlockPos + strlen(header) + 1) < HeaderBlockSize) { - int len = strlen(header); - memcpy (HeaderBlock+HeaderBlockPos, header, len); - HeaderBlockPos += len; - HeaderBlock [HeaderBlockPos++] = 0; - } - else { - // TODO, log this. - _SendError (RESPONSE_CODE_406); - return false; - } - - return true; + // Copy the incoming header into a block + if ((HeaderBlockPos + strlen(header) + 1) < HeaderBlockSize) { + int len = strlen(header); + memcpy (HeaderBlock+HeaderBlockPos, header, len); + HeaderBlockPos += len; + HeaderBlock [HeaderBlockPos++] = 0; + } + else { + // TODO, log this. + _SendError (RESPONSE_CODE_406); + return false; + } + + return true; } @@ -609,108 +604,108 @@ HttpConnection_t::_InterpretRequest bool HttpConnection_t::_InterpretRequest (const char *header) { - /* Return T/F to indicate whether we should continue processing - * this request. Return false to indicate that we detected a fatal - * error or other condition which should cause us to drop the - * connection. - * Interpret the contents of the given line as an HTTP request string. - * WE ASSUME the passed-in header is not null. - * - * In preparation for a CGI-style call, we set the following - * environment strings here (other code will DEPEND ON ALL OF - * THESE BEING SET HERE in case there are no errors): - * REQUEST_METHOD, PATH_INFO, QUERY_STRING. - * - * Oh and by the way, this code sucks. It's reasonably fast - * but not terribly fast, and it's ugly. Refactor someday. - */ - - const char *blank = strchr (header, ' '); - if (!blank) { - _SendError (RESPONSE_CODE_406); - return false; - } - - if (!_DetectVerbAndSetEnvString (header, blank - header)) - return false; - - blank++; - if (*blank != '/') { - _SendError (RESPONSE_CODE_406); - return false; - } - - const char *blank2 = strchr (blank, ' '); - if (!blank2) { - _SendError (RESPONSE_CODE_406); - return false; - } - if (strcasecmp (blank2 + 1, "HTTP/1.0") && strcasecmp (blank2 + 1, "HTTP/1.1")) { - _SendError (RESPONSE_CODE_505); - return false; - } - - string prot (blank2+1); - Protocol = prot.c_str(); - - // Here, the request starts at blank and ends just before blank2. - // Find the query-string (?) and/or fragment (#,;), if either are present. - const char *questionmark = strchr (blank, '?'); - if (questionmark && (questionmark >= blank2)) - questionmark = NULL; - // const char *fragment = strpbrk ((questionmark ? (questionmark+1) : blank), "#;"); - const char *fragment = strpbrk ((questionmark ? (questionmark+1) : blank), "#"); - if (fragment && (fragment >= blank2)) - fragment = NULL; - - if (questionmark) { - string req (blank, questionmark - blank); - PathInfo = req.c_str(); - RequestUri = req.c_str(); - string qs (questionmark+1, fragment ? (fragment - (questionmark+1)) : (blank2 - (questionmark+1))); - QueryString = qs.c_str(); - - if (bSetEnvironmentStrings) { - setenv ("PATH_INFO", req.c_str(), true); - setenv ("REQUEST_URI", req.c_str(), true); - setenv ("QUERY_STRING", qs.c_str(), true); - setenv ("PROTOCOL", prot.c_str(), true); - } - } - else if (fragment) { - string req (blank, fragment - blank); - PathInfo = req.c_str(); - RequestUri = req.c_str(); - #ifdef OS_WIN32 - QueryString.erase(QueryString.begin(),QueryString.end()); - #else - QueryString.clear(); - #endif - if (bSetEnvironmentStrings) { - setenv ("PATH_INFO", req.c_str(), true); - setenv ("REQUEST_URI", req.c_str(), true); - setenv ("QUERY_STRING", "", true); - setenv ("PROTOCOL", prot.c_str(), true); - } - } - else { - string req (blank, blank2 - blank); - PathInfo = req.c_str(); - RequestUri = req.c_str(); - #ifdef OS_WIN32 - QueryString.erase(QueryString.begin(),QueryString.end()); - #else - QueryString.clear(); - #endif - if (bSetEnvironmentStrings) { - setenv ("PATH_INFO", req.c_str(), true); - setenv ("REQUEST_URI", req.c_str(), true); - setenv ("QUERY_STRING", "", true); - setenv ("PROTOCOL", prot.c_str(), true); - } - } - - return true; + /* Return T/F to indicate whether we should continue processing + * this request. Return false to indicate that we detected a fatal + * error or other condition which should cause us to drop the + * connection. + * Interpret the contents of the given line as an HTTP request string. + * WE ASSUME the passed-in header is not null. + * + * In preparation for a CGI-style call, we set the following + * environment strings here (other code will DEPEND ON ALL OF + * THESE BEING SET HERE in case there are no errors): + * REQUEST_METHOD, PATH_INFO, QUERY_STRING. + * + * Oh and by the way, this code sucks. It's reasonably fast + * but not terribly fast, and it's ugly. Refactor someday. + */ + + const char *blank = strchr (header, ' '); + if (!blank) { + _SendError (RESPONSE_CODE_406); + return false; + } + + if (!_DetectVerbAndSetEnvString (header, blank - header)) + return false; + + blank++; + if (*blank != '/') { + _SendError (RESPONSE_CODE_406); + return false; + } + + const char *blank2 = strchr (blank, ' '); + if (!blank2) { + _SendError (RESPONSE_CODE_406); + return false; + } + if (strcasecmp (blank2 + 1, "HTTP/1.0") && strcasecmp (blank2 + 1, "HTTP/1.1")) { + _SendError (RESPONSE_CODE_505); + return false; + } + + string prot (blank2+1); + Protocol = prot.c_str(); + + // Here, the request starts at blank and ends just before blank2. + // Find the query-string (?) and/or fragment (#,;), if either are present. + const char *questionmark = strchr (blank, '?'); + if (questionmark && (questionmark >= blank2)) + questionmark = NULL; + // const char *fragment = strpbrk ((questionmark ? (questionmark+1) : blank), "#;"); + const char *fragment = strpbrk ((questionmark ? (questionmark+1) : blank), "#"); + if (fragment && (fragment >= blank2)) + fragment = NULL; + + if (questionmark) { + string req (blank, questionmark - blank); + PathInfo = req.c_str(); + RequestUri = req.c_str(); + string qs (questionmark+1, fragment ? (fragment - (questionmark+1)) : (blank2 - (questionmark+1))); + QueryString = qs.c_str(); + + if (bSetEnvironmentStrings) { + setenv ("PATH_INFO", req.c_str(), true); + setenv ("REQUEST_URI", req.c_str(), true); + setenv ("QUERY_STRING", qs.c_str(), true); + setenv ("PROTOCOL", prot.c_str(), true); + } + } + else if (fragment) { + string req (blank, fragment - blank); + PathInfo = req.c_str(); + RequestUri = req.c_str(); + #ifdef OS_WIN32 + QueryString.erase(QueryString.begin(),QueryString.end()); + #else + QueryString.clear(); + #endif + if (bSetEnvironmentStrings) { + setenv ("PATH_INFO", req.c_str(), true); + setenv ("REQUEST_URI", req.c_str(), true); + setenv ("QUERY_STRING", "", true); + setenv ("PROTOCOL", prot.c_str(), true); + } + } + else { + string req (blank, blank2 - blank); + PathInfo = req.c_str(); + RequestUri = req.c_str(); + #ifdef OS_WIN32 + QueryString.erase(QueryString.begin(),QueryString.end()); + #else + QueryString.clear(); + #endif + if (bSetEnvironmentStrings) { + setenv ("PATH_INFO", req.c_str(), true); + setenv ("REQUEST_URI", req.c_str(), true); + setenv ("QUERY_STRING", "", true); + setenv ("PROTOCOL", prot.c_str(), true); + } + } + + return true; } @@ -720,42 +715,40 @@ HttpConnection_t::_DetectVerbAndSetEnvString bool HttpConnection_t::_DetectVerbAndSetEnvString (const char *request, int verblength) { - /* Helper method for _InterpretRequest. - * WE MUST SET THE ENV STRING "REQUEST_METHOD" HERE - * unless there is an error. - * The hardcoded verbs MUST be static, as we'll carry around pointers to them. - */ - - static const char *verbs[] = { - "GET", - "POST", - "PUT", - "DELETE", - "HEAD", - "OPTIONS" - }; - - int n_verbs = sizeof(verbs) / sizeof(const char*); - - // Warning, this algorithm is vulnerable to head-matches, - // so compare the longer head-matching strings first. - // We could fix this if we included the blank in the search - // string but then we'd have to lop it off in the env string. - // ALSO NOTICE the early return on success. - for (int i=0; i < n_verbs; i++) { - if (!strncasecmp (request, verbs[i], verblength) && (strlen(verbs[i]) == (size_t)verblength)) { - RequestMethod = verbs[i]; - if (bSetEnvironmentStrings) - setenv ("REQUEST_METHOD", verbs[i], 1); - return true; - } - } - - _SendError (RESPONSE_CODE_406); - return false; -} - + /* Helper method for _InterpretRequest. + * WE MUST SET THE ENV STRING "REQUEST_METHOD" HERE + * unless there is an error. + * The hardcoded verbs MUST be static, as we'll carry around pointers to them. + */ + + static const char *verbs[] = { + "GET", + "POST", + "PUT", + "DELETE", + "HEAD", + "OPTIONS" + }; + + int n_verbs = sizeof(verbs) / sizeof(const char*); + + // Warning, this algorithm is vulnerable to head-matches, + // so compare the longer head-matching strings first. + // We could fix this if we included the blank in the search + // string but then we'd have to lop it off in the env string. + // ALSO NOTICE the early return on success. + for (int i=0; i < n_verbs; i++) { + if (!strncasecmp (request, verbs[i], verblength) && (strlen(verbs[i]) == (size_t)verblength)) { + RequestMethod = verbs[i]; + if (bSetEnvironmentStrings) + setenv ("REQUEST_METHOD", verbs[i], 1); + return true; + } + } + _SendError (RESPONSE_CODE_406); + return false; +} /**************************** HttpConnection_t::_SendError @@ -763,11 +756,11 @@ HttpConnection_t::_SendError void HttpConnection_t::_SendError (const char *header) { - stringstream ss; - ss << "HTTP/1.1 " << header << "\r\n"; - ss << "Connection: close\r\n"; - ss << "Content-Type: text/plain\r\n"; - ss << "\r\n"; + stringstream ss; + ss << "HTTP/1.1 " << header << "\r\n"; + ss << "Connection: close\r\n"; + ss << "Content-Type: text/plain\r\n"; + ss << "\r\n"; - SendData (ss.str().c_str(), ss.str().length()); + SendData (ss.str().c_str(), ss.str().length()); } diff --git a/ext/http.h b/ext/http.h index cd9d987..1998250 100644 --- a/ext/http.h +++ b/ext/http.h @@ -36,96 +36,93 @@ class HttpConnection_t class HttpConnection_t { - public: - HttpConnection_t(); - virtual ~HttpConnection_t(); - - void ConsumeData (const char*, int); - - virtual void SendData (const char*, int); - virtual void CloseConnection (bool after_writing); - virtual void ProcessRequest (const char *method, - const char *cookie, - const char *ifnonematch, - const char *content_type, - const char *query_string, - const char *path_info, - const char *request_uri, - const char *protocol, - int postlength, - const char *postdata, - const char* hdrblock, - int hdrblksize, - int content_chunked); - - virtual void ReceivePostData(const char *data, int len); - virtual void SetNoEnvironmentStrings() {bSetEnvironmentStrings = false;} - virtual void SetDontAccumulatePost() {bAccumulatePost = false;} + public: + HttpConnection_t(); + virtual ~HttpConnection_t(); + + void ConsumeData (const char*, int); + + virtual void SendData (const char*, int); + virtual void CloseConnection (bool after_writing); + virtual void ProcessRequest (const char *method, + const char *cookie, + const char *ifnonematch, + const char *content_type, + const char *query_string, + const char *path_info, + const char *request_uri, + const char *protocol, + int postlength, + const char *postdata, + const char* hdrblock, + int hdrblksize, + int content_chunked); + + virtual void ReceivePostData(const char *data, int len); + virtual void SetNoEnvironmentStrings() {bSetEnvironmentStrings = false;} + virtual void SetDontAccumulatePost() {bAccumulatePost = false;} private: - enum { - BaseState, - PreheaderState, - HeaderState, - ReadingContentState, + enum { + BaseState, + PreheaderState, + HeaderState, + ReadingContentState, ReadingChunkLen, ReadingChunkedContent, - DispatchState, - EndState - } ProtocolState; + DispatchState, + EndState + } ProtocolState; - enum { - MaxLeadingBlanks = 12, - MaxHeaderLineLength = 8 * 1024, - MaxContentLength = 20 * 1024 * 1024, - HeaderBlockSize = 16 * 1024 - }; - int nLeadingBlanks; + enum { + MaxLeadingBlanks = 12, + MaxHeaderLineLength = 8 * 1024, + MaxContentLength = 20 * 1024 * 1024, + HeaderBlockSize = 16 * 1024 + }; + int nLeadingBlanks; - char HeaderLine [MaxHeaderLineLength]; - int HeaderLinePos; + char HeaderLine [MaxHeaderLineLength]; + int HeaderLinePos; - char HeaderBlock [HeaderBlockSize]; - int HeaderBlockPos; + char HeaderBlock [HeaderBlockSize]; + int HeaderBlockPos; - int ContentLength; + int ContentLength; int ContentChunked; int TrailerProcessing; - int ContentPos; - char *_Content; + int ContentPos; + char *_Content; int Chunk_req_received; - + char chunklen_s[10]; int foundsemi; int foundslashr; int chunklen; int foundslashn; - bool bSetEnvironmentStrings; - bool bAccumulatePost; - bool bRequestSeen; - bool bContentLengthSeen; + bool bSetEnvironmentStrings; + bool bAccumulatePost; + bool bRequestSeen; + bool bContentLengthSeen; // ofstream programlog; - const char *RequestMethod; - std::string Cookie; - std::string IfNoneMatch; - std::string ContentType; - std::string PathInfo; - std::string RequestUri; - std::string QueryString; - std::string Protocol; + const char *RequestMethod; + std::string Cookie; + std::string IfNoneMatch; + std::string ContentType; + std::string PathInfo; + std::string RequestUri; + std::string QueryString; + std::string Protocol; - private: + private: int _GetChunkLength(const char *, int *, int *); - bool _InterpretHeaderLine (const char*); - bool _InterpretRequest (const char*); - bool _DetectVerbAndSetEnvString (const char*, int); - void _SendError (const char*); + bool _InterpretHeaderLine (const char*); + bool _InterpretRequest (const char*); + bool _DetectVerbAndSetEnvString (const char*, int); + void _SendError (const char*); }; #endif // __HttpPersonality__H_ - - -