Skip to content

Commit

Permalink
http: allow raw header capture (#347) (#349)
Browse files Browse the repository at this point in the history
The golang textproto library does a few things when parsing the HTTP
headers:

* consume some whitespace characters (e.g. \r\n)
* canonicalizes the header keys (e.g. "content-type" => "Content-Type")
* moves the headers into a map

This all makes sense when parsing HTTP, but for a scanner some may want
to have the exact headers, to match on order, non-canonical keys, etc.

This adds that option, if '--raw-headers' is specified during an HTTP
scan.  This is accomplished by implementing a tee reader on the pconn
interface, that tees before the bufio reader is put in place.  The
tee copy can be disabled once the headers have been read, so as to not
waste memory while consuming the HTTP body.

While denoted as "raw headers", this will also capture the raw status
line as well.

(cherry picked from commit 83e55e0)
Signed-off-by: Jeff Cody <[email protected]>
  • Loading branch information
codyprime authored Feb 18, 2024
1 parent 4e4ace8 commit 1e97dd8
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 9 deletions.
23 changes: 23 additions & 0 deletions lib/http/response.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ type Response struct {
// Keys in the map are canonicalized (see CanonicalHeaderKey).
Header Header `json:"headers,omitempty"`

// The raw bytes of the MIME headers, as read from the underlying
// reader. This allows for post-processing to be done on an exact
// copy of the headers. The headers will not be canonicalized nor
// re-ordered or converted to a map.
HeadersRaw []byte `json:"headers_raw,omitempty"`

// Body represents the response body.
//
// The http Client and Transport guarantee that Body is always
Expand Down Expand Up @@ -158,11 +164,23 @@ func (r *Response) Location() (*url.URL, error) {
// After that call, clients can inspect resp.Trailer to find key/value
// pairs included in the response trailer.
func ReadResponse(r *bufio.Reader, req *Request) (*Response, error) {
return readResponse(&TeeConn{br: r}, req)
}
func ReadResponseTee(tc *TeeConn, req *Request) (*Response, error) {
return readResponse(tc, req)
}
func readResponse(tc *TeeConn, req *Request) (*Response, error) {
r := tc.BufioReader()
tp := textproto.NewReader(r)
resp := &Response{
Request: req,
}

// To extract the raw response through headers, we want to find the offsets
// for where we are at in the io.TeeReader compared to the bufio.Reader
// both at the start of the response parsing, and at the end.
hdrStart := tc.ReadPos()

// Parse the first line of the response.
line, err := tp.ReadLine()
if err != nil {
Expand Down Expand Up @@ -202,6 +220,11 @@ func ReadResponse(r *bufio.Reader, req *Request) (*Response, error) {
}
return resp, err
}
// No need to continue tee reads into the tee buffer, go ahead and
// disable it
tc.Disable()
hdrEnd := tc.ReadPos()
resp.HeadersRaw = tc.Bytes(hdrStart, hdrEnd)
resp.Header = Header(mimeHeader)

fixPragmaCacheControl(resp.Header)
Expand Down
68 changes: 59 additions & 9 deletions lib/http/transport.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package http

import (
"bufio"
"bytes"
"compress/gzip"
"container/list"
"context"
Expand Down Expand Up @@ -198,6 +199,10 @@ type Transport struct {
h2transport *http2Transport // non-nil if http2 wired up

// TODO: tunable on max per-host TCP dials in flight (Issue 13957)

// Enable raw read buffering and raw header extraction
// zgrab2-specific
RawHeaderBuffer bool
}

// onceSetNextProtoDefaults initializes TLSNextProto.
Expand Down Expand Up @@ -1027,6 +1032,8 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon
pconn.conn = conn
}

pconn.tee = &TeeConn{}

// Proxy setup.
switch {
case cm.proxyURL == nil:
Expand Down Expand Up @@ -1058,8 +1065,10 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon
// Read response.
// Okay to use and discard buffered reader here, because
// TLS server will not speak until spoken to.
br := bufio.NewReader(conn)
resp, err := ReadResponse(br, connectReq)
tee := TeeConn{
br: bufio.NewReader(conn),
}
resp, err := ReadResponseTee(&tee, connectReq)
if err != nil {
conn.Close()
return nil, err
Expand Down Expand Up @@ -1123,13 +1132,49 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon
}
}

pconn.br = bufio.NewReader(pconn)
pconn.tee.br = bufio.NewReader(pconn)
pconn.tee.enabled = t.RawHeaderBuffer
pconn.bw = bufio.NewWriter(persistConnWriter{pconn})
go pconn.readLoop()
go pconn.writeLoop()
return pconn, nil
}

// The underlying br Reader is bufio, so it will perform read-ahead.
// The underlying tb is a bytes buffer, that acts as a tee, receiving
// the raw bytes for reads against the io.Reader backing br.
type TeeConn struct {
enabled bool // tee writes to tb are enabled
tb bytes.Buffer // buffer that tr tees into
br *bufio.Reader // from conn
}

// To get the current position in tb as seen by the buffered io reader,
// we need to subtract out the buffered portion of the bufio reader.
func (t *TeeConn) ReadPos() int {
l := t.tb.Len()
if l == 0 {
return 0
}
return l - t.br.Buffered()
}

func (t *TeeConn) Bytes(s, e int) []byte {
if s >= t.tb.Len() {
return nil
}
return t.tb.Bytes()[s:e]
}

func (t *TeeConn) BufioReader() *bufio.Reader {
return t.br
}

// Stops the tee writes to t.tb
func (t *TeeConn) Disable() {
t.enabled = false
}

// persistConnWriter is the io.Writer written to by pc.bw.
// It accumulates the number of bytes written to the underlying conn,
// so the retry logic can determine whether any bytes made it across
Expand Down Expand Up @@ -1277,7 +1322,7 @@ type persistConn struct {
cacheKey connectMethodKey
conn net.Conn
tlsState *tls.ConnectionState
br *bufio.Reader // from conn
tee *TeeConn // from conn, includes a raw buffer and tee
bw *bufio.Writer // to conn
nwrite int64 // bytes written
reqch chan requestAndChan // written by roundTrip; read by readLoop
Expand Down Expand Up @@ -1329,6 +1374,11 @@ func (pc *persistConn) Read(p []byte) (n int, err error) {
pc.sawEOF = true
}
pc.readLimit -= int64(n)
if pc.tee.enabled && n > 0 {
if n, err := pc.tee.tb.Write(p[:n]); err != nil {
return n, err
}
}
return
}

Expand Down Expand Up @@ -1482,7 +1532,7 @@ func (pc *persistConn) readLoop() {
alive := true
for alive {
pc.readLimit = pc.maxHeaderResponseSize()
_, err := pc.br.Peek(1)
_, err := pc.tee.br.Peek(1)

pc.mu.Lock()
if pc.numExpectedResponses == 0 {
Expand Down Expand Up @@ -1636,7 +1686,7 @@ func (pc *persistConn) readLoopPeekFailLocked(peekErr error) {
if pc.closed != nil {
return
}
if n := pc.br.Buffered(); n > 0 {
if n := pc.tee.br.Buffered(); n > 0 {
}
if peekErr == io.EOF {
// common case.
Expand All @@ -1651,11 +1701,11 @@ func (pc *persistConn) readLoopPeekFailLocked(peekErr error) {
// trace is optional.
func (pc *persistConn) readResponse(rc requestAndChan, trace *httptrace.ClientTrace) (resp *Response, err error) {
if trace != nil && trace.GotFirstResponseByte != nil {
if peek, err := pc.br.Peek(1); err == nil && len(peek) == 1 {
if peek, err := pc.tee.br.Peek(1); err == nil && len(peek) == 1 {
trace.GotFirstResponseByte()
}
}
resp, err = ReadResponse(pc.br, rc.req)
resp, err = ReadResponseTee(pc.tee, rc.req)
if err != nil {
if err == io.ErrUnexpectedEOF {
pc.sawEOF = true
Expand All @@ -1674,7 +1724,7 @@ func (pc *persistConn) readResponse(rc requestAndChan, trace *httptrace.ClientTr
}
if resp.StatusCode == 100 {
pc.readLimit = pc.maxHeaderResponseSize() // reset the limit
resp, err = ReadResponse(pc.br, rc.req)
resp, err = ReadResponseTee(pc.tee, rc.req)
if err != nil {
return
}
Expand Down
4 changes: 4 additions & 0 deletions modules/http/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ type Flags struct {

// WithBodyLength enables adding the body_size field to the Response
WithBodyLength bool `long:"with-body-size" description:"Enable the body_size attribute, for how many bytes actually read"`

// Extract the raw header as it is on the wire
RawHeaders bool `long:"raw-headers" description:"Extract raw response up through headers"`
}

// A Results object is returned by the HTTP module's Scanner.Scan()
Expand Down Expand Up @@ -454,6 +457,7 @@ func (scanner *Scanner) newHTTPScan(t *zgrab2.ScanTarget, useHTTPS bool) *scan {
DisableKeepAlives: false,
DisableCompression: false,
MaxIdleConnsPerHost: scanner.config.MaxRedirects,
RawHeaderBuffer: scanner.config.RawHeaders,
},
client: http.MakeNewClient(),
globalDeadline: time.Now().Add(scanner.config.Timeout),
Expand Down

0 comments on commit 1e97dd8

Please sign in to comment.