From 7f5df74ba7ea25d299bb4a6af8bdc093a9196b7e Mon Sep 17 00:00:00 2001 From: Javier Evans Date: Thu, 25 Apr 2024 07:19:11 -0700 Subject: [PATCH] feat: Support AWS S3 Express One Zone buckets (#229) # What This change adds the `S3_SERVICE` configuration variable which will default to `s3` and may be one of `s3express` or `s3`. It also introduces the `virtual-v2` `S3_STYLE` argument option in support of the connectivity requirement of the S3 Express One Zone (directory) buckets. We are using this as a successor to `virtual` and believe it should work well in all AWS usages but want to be cautious as we make this change. Many thanks for @hveiga for driving the implementation of this feature in their original pull request. Setting this variable to s3express will change the "service" used to sign the requests with the V4 header to s3express. Currently the gateway works without this step, but it's advised in the documentation [here](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-security-best-practices.html). ## Other Changes We are moving the determination of the hostname used to query S3 into the docker entrypoint (or bootstrap script for non-docker installs). If `S3_STYLE` is set to `virtual` (this is the default and aws recommended scheme) then the hostname will be: ``` ${S3_BUCKET_NAME}.${S3_SERVER}:${S3_SERVER_PORT} ``` which will be used in these locations: * The `proxy_path` directive * The HTTP `Host` header sent to AWS * The `host` element of the canonical headers used in signing AWS signature V4 requests. Based on my reading here: https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html It looks like AWS recommends that the bucket be always prepended and other schemes exist only for backwards compatibility reasons. However, please comment on this discussion if you have concerns https://github.com/nginxinc/nginx-s3-gateway/discussions/231 Co-authored-by: @hveiga " --- .github/workflows/main.yml | 5 +- .gitignore | 43 ++++++++++++- .../00-check-for-required-env.sh | 3 +- common/docker-entrypoint.sh | 21 +++++++ common/etc/nginx/include/s3gateway.js | 20 ++---- common/etc/nginx/nginx.conf | 1 + .../etc/nginx/templates/default.conf.template | 15 +++-- .../gateway/s3_location_common.conf.template | 2 +- deployments/s3_express/.terraform.lock.hcl | 25 ++++++++ deployments/s3_express/.tool-versions | 1 + deployments/s3_express/README.md | 45 ++++++++++++++ deployments/s3_express/main.tf | 51 ++++++++++++++++ .../s3_express/settings.s3express.example | 22 +++++++ deployments/s3_express/test_data/test.txt | 2 + deployments/s3_express/variables.tf | 20 ++++++ deployments/s3_express/versions.tf | 8 +++ docs/getting_started.md | 61 ++++++++++++++++++- .../nginx/templates/upstreams.conf.template | 5 +- .../nginx/templates/upstreams.conf.template | 4 +- settings.example | 3 +- standalone_ubuntu_oss_install.sh | 32 +++++++++- test.sh | 12 ++-- test/docker-compose.yaml | 4 +- 23 files changed, 362 insertions(+), 43 deletions(-) create mode 100644 deployments/s3_express/.terraform.lock.hcl create mode 100644 deployments/s3_express/.tool-versions create mode 100644 deployments/s3_express/README.md create mode 100644 deployments/s3_express/main.tf create mode 100644 deployments/s3_express/settings.s3express.example create mode 100644 deployments/s3_express/test_data/test.txt create mode 100644 deployments/s3_express/variables.tf create mode 100644 deployments/s3_express/versions.tf diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f7bed9dc..52675aa0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,6 +54,9 @@ jobs: test-oss: runs-on: ubuntu-22.04 needs: build-oss-for-test + strategy: + matrix: + path_style: [virtual, virtual-v2] steps: - uses: actions/checkout@v4 - name: Install dependencies @@ -82,7 +85,7 @@ jobs: run: | docker load --input ${{ runner.temp }}/oss.tar - name: Run tests - stable njs version - run: ./test.sh --type oss + run: S3_STYLE=${{ matrix.path_style }} ./test.sh --type oss build-latest-njs-for-test: runs-on: ubuntu-22.04 diff --git a/.gitignore b/.gitignore index d2a86aba..bfd15afa 100644 --- a/.gitignore +++ b/.gitignore @@ -346,4 +346,45 @@ test-settings.* s3-requests.http httpRequests/ -.bin/ \ No newline at end of file +.bin/ + +# Created by https://www.toptal.com/developers/gitignore/api/terraform +# Edit at https://www.toptal.com/developers/gitignore?templates=terraform + +### Terraform ### +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log +crash.*.log + +# Exclude all .tfvars files, which are likely to contain sensitive data, such as +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject +# to change depending on the environment. +*.tfvars +*.tfvars.json + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Include override files you do wish to add to version control using negated pattern +# !example_override.tf + +# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan +# example: *tfplan* + +# Ignore CLI configuration files +.terraformrc +terraform.rc +.tfplan +# End of https://www.toptal.com/developers/gitignore/api/terraform diff --git a/common/docker-entrypoint.d/00-check-for-required-env.sh b/common/docker-entrypoint.d/00-check-for-required-env.sh index 604214d2..a09a76a1 100755 --- a/common/docker-entrypoint.d/00-check-for-required-env.sh +++ b/common/docker-entrypoint.d/00-check-for-required-env.sh @@ -22,7 +22,7 @@ set -e failed=0 -required=("S3_BUCKET_NAME" "S3_SERVER" "S3_SERVER_PORT" "S3_SERVER_PROTO" +required=("S3_SERVICE" "S3_BUCKET_NAME" "S3_SERVER" "S3_SERVER_PORT" "S3_SERVER_PROTO" "S3_REGION" "S3_STYLE" "ALLOW_DIRECTORY_LIST" "AWS_SIGS_VERSION" "CORS_ENABLED") @@ -122,6 +122,7 @@ if [ $failed -gt 0 ]; then fi echo "S3 Backend Environment" +echo "Service: ${S3_SERVICE}" echo "Access Key ID: ${AWS_ACCESS_KEY_ID}" echo "Origin: ${S3_SERVER_PROTO}://${S3_BUCKET_NAME}.${S3_SERVER}:${S3_SERVER_PORT}" echo "Region: ${S3_REGION}" diff --git a/common/docker-entrypoint.sh b/common/docker-entrypoint.sh index d607c804..86886027 100644 --- a/common/docker-entrypoint.sh +++ b/common/docker-entrypoint.sh @@ -68,6 +68,27 @@ if [ -z "${CORS_ALLOWED_ORIGIN+x}" ]; then export CORS_ALLOWED_ORIGIN="*" fi +# This is the primary logic to determine the s3 host used for the +# upstream (the actual proxying action) as well as the `Host` header +# +# It is currently slightly more complex than necessary because we are transitioning +# to a new logic which is defined by "virtual-v2". "virtual-v2" is the recommended setting +# for all deployments. + +# S3_UPSTREAM needs the port specified. The port must +# correspond to https/http in the proxy_pass directive. +if [ "${S3_STYLE}" == "virtual-v2" ]; then + export S3_UPSTREAM="${S3_BUCKET_NAME}.${S3_SERVER}:${S3_SERVER_PORT}" + export S3_HOST_HEADER="${S3_BUCKET_NAME}.${S3_SERVER}:${S3_SERVER_PORT}" +elif [ "${S3_STYLE}" == "path" ]; then + export S3_UPSTREAM="${S3_SERVER}:${S3_SERVER_PORT}" + export S3_HOST_HEADER="${S3_SERVER}:${S3_SERVER_PORT}" +else + export S3_UPSTREAM="${S3_SERVER}:${S3_SERVER_PORT}" + export S3_HOST_HEADER="${S3_BUCKET_NAME}.${S3_SERVER}" +fi + + # Nothing is modified under this line if [ -z "${NGINX_ENTRYPOINT_QUIET_LOGS:-}" ]; then diff --git a/common/etc/nginx/include/s3gateway.js b/common/etc/nginx/include/s3gateway.js index 7a497cf8..d9e016a8 100644 --- a/common/etc/nginx/include/s3gateway.js +++ b/common/etc/nginx/include/s3gateway.js @@ -39,6 +39,7 @@ _requireEnvVars('S3_SERVER_PORT'); _requireEnvVars('S3_REGION'); _requireEnvVars('AWS_SIGS_VERSION'); _requireEnvVars('S3_STYLE'); +_requireEnvVars('S3_SERVICE'); /** @@ -86,7 +87,7 @@ const INDEX_PAGE = "index.html"; * Constant defining the service requests are being signed for. * @type {string} */ -const SERVICE = 's3'; +const SERVICE = process.env['S3_SERVICE'] || "s3"; /** * Transform the headers returned from S3 such that there isn't information @@ -165,12 +166,7 @@ function s3date(r) { function s3auth(r) { const bucket = process.env['S3_BUCKET_NAME']; const region = process.env['S3_REGION']; - let server; - if (S3_STYLE === 'path') { - server = process.env['S3_SERVER'] + ':' + process.env['S3_SERVER_PORT']; - } else { - server = process.env['S3_SERVER']; - } + const host = r.variables.s3_host; const sigver = process.env['AWS_SIGS_VERSION']; let signature; @@ -180,7 +176,7 @@ function s3auth(r) { let req = _s3ReqParamsForSigV2(r, bucket); signature = awssig2.signatureV2(r, req.uri, req.httpDate, credentials); } else { - let req = _s3ReqParamsForSigV4(r, bucket, server); + let req = _s3ReqParamsForSigV4(r, bucket, host); signature = awssig4.signatureV4(r, awscred.Now(), region, SERVICE, req.uri, req.queryParams, req.host, credentials); } @@ -221,15 +217,11 @@ function _s3ReqParamsForSigV2(r, bucket) { * @see {@link https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html | AWS V4 Signing Process} * @param r {NginxHTTPRequest} HTTP request object * @param bucket {string} S3 bucket associated with request - * @param server {string} S3 host associated with request + * @param host {string} S3 host associated with request * @returns {S3ReqParams} s3ReqParams object (host, uri, queryParams) * @private */ -function _s3ReqParamsForSigV4(r, bucket, server) { - let host = server; - if (S3_STYLE === 'virtual' || S3_STYLE === 'default' || S3_STYLE === undefined) { - host = bucket + '.' + host; - } +function _s3ReqParamsForSigV4(r, bucket, host) { const baseUri = s3BaseUri(r); const computed_url = !utils.parseBoolean(r.variables.forIndexPage) ? r.variables.uri_path diff --git a/common/etc/nginx/nginx.conf b/common/etc/nginx/nginx.conf index cd938089..7b9d9c60 100644 --- a/common/etc/nginx/nginx.conf +++ b/common/etc/nginx/nginx.conf @@ -20,6 +20,7 @@ env S3_REGION; env AWS_SIGS_VERSION; env DEBUG; env S3_STYLE; +env S3_SERVICE; env ALLOW_DIRECTORY_LIST; env PROVIDE_INDEX_PAGE; env APPEND_SLASH_FOR_POSSIBLE_DIRECTORY; diff --git a/common/etc/nginx/templates/default.conf.template b/common/etc/nginx/templates/default.conf.template index faa1e492..52d1e55f 100644 --- a/common/etc/nginx/templates/default.conf.template +++ b/common/etc/nginx/templates/default.conf.template @@ -19,11 +19,10 @@ map $uri_full_path $uri_path { default $PREFIX_LEADING_DIRECTORY_PATH$uri_full_path; } -map $S3_STYLE $s3_host_hdr { - virtual "${S3_BUCKET_NAME}.${S3_SERVER}"; - path "${S3_SERVER}:${S3_SERVER_PORT}"; - default "${S3_BUCKET_NAME}.${S3_SERVER}"; -} +# S3_HOST_HEADER is set in the startup script +# (either ./common/docker-entrypoint.sh or ./standalone_ubuntu_oss_install.sh) +# based on the S3_STYLE configuration option. +js_var $s3_host ${S3_HOST_HEADER}; js_var $indexIsEmpty true; js_var $forIndexPage true; @@ -141,7 +140,7 @@ server { proxy_set_header X-Amz-Security-Token $awsSessionToken; # We set the host as the bucket name to inform the S3 API of the bucket - proxy_set_header Host $s3_host_hdr; + proxy_set_header Host $s3_host; # Use keep alive connections in order to improve performance proxy_http_version 1.1; @@ -202,7 +201,7 @@ server { proxy_set_header X-Amz-Security-Token $awsSessionToken; # We set the host as the bucket name to inform the S3 API of the bucket - proxy_set_header Host $s3_host_hdr; + proxy_set_header Host $s3_host; # Use keep alive connections in order to improve performance proxy_http_version 1.1; @@ -265,7 +264,7 @@ server { proxy_set_header X-Amz-Security-Token $awsSessionToken; # We set the host as the bucket name to inform the S3 API of the bucket - proxy_set_header Host $s3_host_hdr; + proxy_set_header Host $s3_host; # Use keep alive connections in order to improve performance proxy_http_version 1.1; diff --git a/common/etc/nginx/templates/gateway/s3_location_common.conf.template b/common/etc/nginx/templates/gateway/s3_location_common.conf.template index f65f9987..30501642 100644 --- a/common/etc/nginx/templates/gateway/s3_location_common.conf.template +++ b/common/etc/nginx/templates/gateway/s3_location_common.conf.template @@ -19,7 +19,7 @@ proxy_set_header Authorization $s3auth; proxy_set_header X-Amz-Security-Token $awsSessionToken; # We set the host as the bucket name to inform the S3 API of the bucket -proxy_set_header Host $s3_host_hdr; +proxy_set_header Host $s3_host; # Use keep alive connections in order to improve performance proxy_http_version 1.1; diff --git a/deployments/s3_express/.terraform.lock.hcl b/deployments/s3_express/.terraform.lock.hcl new file mode 100644 index 00000000..d6034f09 --- /dev/null +++ b/deployments/s3_express/.terraform.lock.hcl @@ -0,0 +1,25 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.45.0" + constraints = "5.45.0" + hashes = [ + "h1:8m3+C1VNevzU/8FsABoKp2rTOx3Ue7674INfhfk0TZY=", + "zh:1379bcf45aef3d486ee18b4f767bfecd40a0056510d26107f388be3d7994c368", + "zh:1615a6f5495acfb3a0cb72324587261dd4d72711a3cc51aff13167b14531501e", + "zh:18b69a0f33f8b1862fbd3f200756b7e83e087b73687085f2cf9c7da4c318e3e6", + "zh:2c5e7aecd197bc3d3b19290bad8cf4c390c2c6a77bb165da4e11f53f2dfe2e54", + "zh:3794da9bef97596e3bc60e12cdd915bda5ec2ed62cd1cd93723d58b4981905fe", + "zh:40a5e45ed91801f83db76dffd467dcf425ea2ca8642327cf01119601cb86021c", + "zh:4abfc3f53d0256a7d5d1fa5e931e4601b02db3d1da28f452341d3823d0518f1a", + "zh:4eb0e98078f79aeb06b5ff6115286dc2135d12a80287885698d04036425494a2", + "zh:75470efbadea4a8d783642497acaeec5077fc4a7f3df3340defeaa1c7de29bf7", + "zh:8861a0b4891d5fa2fa7142f236ae613cea966c45b5472e3915a4ac3abcbaf487", + "zh:8bf6f21cd9390b742ca0b4393fde92616ca9e6553fb75003a0999006ad233d35", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:ad73008a044e75d337acda910fb54d8b81a366873c8a413fec1291034899a814", + "zh:bf261713b0b8bebfe8c199291365b87d9043849f28a2dc764bafdde73ae43693", + "zh:da3bafa1fd830be418dfcc730e85085fe67c0d415c066716f2ac350a2306f40a", + ] +} diff --git a/deployments/s3_express/.tool-versions b/deployments/s3_express/.tool-versions new file mode 100644 index 00000000..fad6fadc --- /dev/null +++ b/deployments/s3_express/.tool-versions @@ -0,0 +1 @@ +terraform 1.8.1 diff --git a/deployments/s3_express/README.md b/deployments/s3_express/README.md new file mode 100644 index 00000000..75242564 --- /dev/null +++ b/deployments/s3_express/README.md @@ -0,0 +1,45 @@ +# Purpose +This Terraform script sets up an AWS S3 Express One Zone bucket for testing. + +## Usage +Use environment variables to authenticate: + +```bash +export AWS_ACCESS_KEY_ID="anaccesskey" +export AWS_SECRET_ACCESS_KEY="asecretkey" +export AWS_REGION="us-west-2" +``` + +Generate a plan: +```bash +terraform plan -out=plan.tfplan \ +> -var="bucket_name=my-bucket-name--usw2-az1--x-s3" \ +> -var="region=us-west-2" \ +> -var="availability_zone_id=usw2-az1" \ +> -var="owner_email=my_email@foo.com" +``` +> [!NOTE] +> Note that AWS S3 Express One Zone is only available in [certain regions and availability zones](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-networking.html#s3-express-endpoints). If you get an error like this: `api error InvalidBucketName`. If you have met the [naming rules](https://docs.aws.amazon.com/AmazonS3/latest/userguide/directory-bucket-naming-rules.html), this likely means you have chosen a bad region/availability zone combination. + + +If you are comfortable with the plan, apply it: +``` +terraform apply "plan.tfplan" +``` + +Then build the image (you can also use the latest release) +```bash +docker build --file Dockerfile.oss --tag nginx-s3-gateway:oss --tag nginx-s3-gateway . +``` + +Configure and run the image: + +```bash +docker run --rm --env-file ./settings.s3express.example --publish 80:80 --name nginx-s3-gateway \ + nginx-s3-gateway:oss +``` + +Confirm that it is working. The terraform script will prepopulate the bucket with a single test object +```bash +curl http://localhost:80/test.txt +``` diff --git a/deployments/s3_express/main.tf b/deployments/s3_express/main.tf new file mode 100644 index 00000000..55edfd62 --- /dev/null +++ b/deployments/s3_express/main.tf @@ -0,0 +1,51 @@ +provider "aws" { + region = var.region +} + +resource "aws_s3_directory_bucket" "example" { + bucket = var.bucket_name + location { + name = var.availability_zone_id + } + + force_destroy = true +} + +data "aws_partition" "current" {} +data "aws_caller_identity" "current" {} + +data "aws_iam_policy_document" "example" { + statement { + effect = "Allow" + + actions = [ + "s3express:*", + ] + + resources = [ + aws_s3_directory_bucket.example.arn, + ] + + principals { + type = "AWS" + identifiers = ["arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:root"] + } + } +} + +resource "aws_s3_bucket_policy" "example" { + bucket = aws_s3_directory_bucket.example.bucket + policy = data.aws_iam_policy_document.example.json +} + +# The filemd5() function is available in Terraform 0.11.12 and later +# For Terraform 0.11.11 and earlier, use the md5() function and the file() function: +# etag = "${md5(file("path/to/file"))}" +# etag = filemd5("path/to/file") +resource "aws_s3_object" "example" { + bucket = aws_s3_directory_bucket.example.bucket + key = "test.txt" + source = "${path.root}/test_data/test.txt" +} + + diff --git a/deployments/s3_express/settings.s3express.example b/deployments/s3_express/settings.s3express.example new file mode 100644 index 00000000..3dcc2c6f --- /dev/null +++ b/deployments/s3_express/settings.s3express.example @@ -0,0 +1,22 @@ +S3_BUCKET_NAME=my-bucket-name--usw2-az1--x-s3 +AWS_ACCESS_KEY_ID=ZZZZZZZZZZZZZZZZZZZZ +AWS_SECRET_ACCESS_KEY=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +AWS_SESSION_TOKEN=bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb +S3_SERVER=s3express-usw2-az1.us-west-2.amazonaws.com +S3_SERVER_PORT=443 +S3_SERVER_PROTO=https +S3_REGION=us-west-2 +S3_STYLE=virtual-v2 +S3_SERVICE=s3express +DEBUG=true +AWS_SIGS_VERSION=4 +ALLOW_DIRECTORY_LIST=false +PROVIDE_INDEX_PAGE=false +APPEND_SLASH_FOR_POSSIBLE_DIRECTORY=false +DIRECTORY_LISTING_PATH_PREFIX="" +PROXY_CACHE_MAX_SIZE=10g +PROXY_CACHE_SLICE_SIZE="1m" +PROXY_CACHE_INACTIVE=60m +PROXY_CACHE_VALID_OK=1h +PROXY_CACHE_VALID_NOTFOUND=1m +PROXY_CACHE_VALID_FORBIDDEN=30s diff --git a/deployments/s3_express/test_data/test.txt b/deployments/s3_express/test_data/test.txt new file mode 100644 index 00000000..b0a9adc7 --- /dev/null +++ b/deployments/s3_express/test_data/test.txt @@ -0,0 +1,2 @@ +Congratulations, friend. You are using Amazon S3 Express One Zone. +🚂🚂🚂 Choo-choo~ 🚂🚂🚂 \ No newline at end of file diff --git a/deployments/s3_express/variables.tf b/deployments/s3_express/variables.tf new file mode 100644 index 00000000..689eef56 --- /dev/null +++ b/deployments/s3_express/variables.tf @@ -0,0 +1,20 @@ +# Format for bucket name [bucket_name]--[azid]--x-s3 +variable "bucket_name" { + type = string + default = "example--usw2-az2--x-s3" +} + +variable "owner_email" { + type = string +} + +variable "region" { + type = string + default = "us-west-2" +} + +# "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#az-ids" +variable "availability_zone_id" { + type = string + default = "usw2-az2" +} diff --git a/deployments/s3_express/versions.tf b/deployments/s3_express/versions.tf new file mode 100644 index 00000000..a1aaa0de --- /dev/null +++ b/deployments/s3_express/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "5.45.0" + } + } +} diff --git a/docs/getting_started.md b/docs/getting_started.md index d3380817..e1e6d072 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -25,14 +25,15 @@ running as a Container or as a Systemd service. | `S3_SERVER_PORT` | Yes | | | SSL/TLS port to connect to | | `S3_SERVER_PROTO` | Yes | `http`, `https` | | Protocol to used connect to S3 server | | `S3_SERVER` | Yes | | | S3 host to connect to | -| `S3_STYLE` | Yes | `virtual`, `path`, `default` | `default` | The S3 host/path method.
  • `virtual` is the method that that uses DNS-style bucket+hostname:port. This is the `default` value.
  • `path` is a method that appends the bucket name as the first directory in the URI's path. This method is used by many S3 compatible services.

    See this [AWS blog article](https://aws.amazon.com/blogs/aws/amazon-s3-path-deprecation-plan-the-rest-of-the-story/) for further information. | +| `S3_STYLE` | Yes | `virtual-v2`, `virtual`, `path`, `default` | `default` | The S3 host/path method.
  • `virtual` and `virtual-v2` represent the method that uses DNS-style bucket+hostname:port. The `default` is the same as `virtual`. In the future, the `default` value will become `virtual-v2`. See [Choosing a `S3_STYLE` Setting](#user-content-choosing-a-s3_style-setting) below for details.
  • `path` is a method that appends the bucket name as the first directory in the URI's path. This method is used by many S3 compatible services.

    See this [AWS blog article](https://aws.amazon.com/blogs/aws/amazon-s3-path-deprecation-plan-the-rest-of-the-story/) for further information. | +| `S3_SERVICE` | Yes | |`s3`, `s3express` | `s3` | Configures the gateway to interface with either normal S3 buckets or S3 Express One Zone | | `DEBUG` | No | `true`, `false` | `false` | Flag enabling AWS signatures debug output | | `APPEND_SLASH_FOR_POSSIBLE_DIRECTORY` | No | `true`, `false` | `false` | Flag enabling the return a 302 with a `/` appended to the path. This is independent of the behavior selected in `ALLOW_DIRECTORY_LIST` or `PROVIDE_INDEX_PAGE`. | | `DIRECTORY_LISTING_PATH_PREFIX` | No | | | In `ALLOW_DIRECTORY_LIST=true` mode [adds defined prefix to links](#configuring-directory-listing) | | `DNS_RESOLVERS` | No | | | DNS resolvers (separated by single spaces) to configure NGINX with | | `PROXY_CACHE_MAX_SIZE` | No | | `10g` | Limits cache size | -| `PROXY_CACHE_INACTIVE` | No | | `60m` | Cached data that are not accessed during the time specified by the parameter get removed from the cache regardless of their freshness -| `PROXY_CACHE_SLICE_SIZE` | No | | `1m` | For requests with a `Range` header included, determines the size of the chunks in which the file is fetched. Values much smaller than the requests can lead to inefficiencies due to reading and writing many files. See [below for more details](#byte-range-requests-and-caching) | | +| `PROXY_CACHE_INACTIVE` | No | | `60m` | Cached data that are not accessed during the time specified by the parameter get removed from the cache regardless of their freshness | +| `PROXY_CACHE_SLICE_SIZE` | No | | `1m` | For requests with a `Range` header included, determines the size of the chunks in which the file is fetched. Values much smaller than the requests can lead to inefficiencies due to reading and writing many files. See [below for more details](#byte-range-requests-and-caching) | | `PROXY_CACHE_VALID_OK` | No | | `1h` | Sets caching time for response code 200 and 302 | | `PROXY_CACHE_VALID_NOTFOUND` | No | | `1m` | Sets caching time for response code 404 | | `PROXY_CACHE_VALID_FORBIDDEN` | No | | `30s` | Sets caching time for response code 403 | @@ -63,6 +64,29 @@ There are few optional environment variables that can be used. be coded to the current AWS region. This environment variable will be ignored if `STS_ENDPOINT` is set. Valid options are: `global` (default) or `regional`. +### Choosing a `S3_STYLE` Setting +**If you are using AWS S3 or S3 Express One Zone, use `virtual-v2`.** We are maintaining `virtual` temporarily until we hear from the community that `virtual-v2` does not cause issues - or we introduce a versioning system that allows us to safely flag breaking changes. +Until then, `virtual` works as before, and `default` still causes the `virtual` behavior to be used. + +**`virtual-v2` is not expected to be a breaking change** but we are being cautious. + +A full reference for S3 addressing styles may be found [here](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html) + +Here is the difference between `virtual` and `virtual-v2`: +#### virtual +* Proxied endpoint: `S3_SERVER:S3_SERVER_PORT` +* `Host` header: `S3_BUCKET_NAME}.S3_SERVER` +* `host` field in the [S3 V4 `CanonicalHeaders`](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html): `S3_BUCKET_NAME}.S3_SERVER` + +#### virtual-v2 +All items are set to the same value: +* Proxied endpoint: `S3_BUCKET_NAME.S3_SERVER:S3_SERVER_PORT` +* `Host` header: `S3_BUCKET_NAME.S3_SERVER:S3_SERVER_PORT` +* `host` field in the [S3 V4 `CanonicalHeaders`](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html): `S3_BUCKET_NAME.S3_SERVER:S3_SERVER_PORT` + +#### path +`path` style routing does not prepend the bucket name to the host, and includes it as the first segment in the request path. AWS is actively trying to move away from this method. Some S3 compatible object stores may require that you use this setting - but try to avoid it if your object store works with `virtual-v2`. + ### Configuring Directory Listing @@ -124,6 +148,37 @@ Setting your slice size too small can have performance impacts since NGINX perfo You may make byte-range requests and normal requests for the same file and NGINX will automatically handle them differently. The caches for file chunks and normal file requests are separate on disk. +## Usage with AWS S3 Express One Zone +The gateway may be used to proxy files in the AWS S3 Express One Zone product (also called Directory Buckets). + +To do so, be sure that `S3_STYLE` is set to `virtual-v2`. Additionally, the `S3_SERVER` configuration must be set a combination of the bucket name and the [Zonal Endpoint](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-networking.html#s3-express-endpoints). + +### Directory Bucket Names +See the [official documentation](https://docs.aws.amazon.com/AmazonS3/latest/userguide/directory-bucket-naming-rules.html) for the most up to date rules on Directory Bucket naming. + +Directory Buckets must have names matching this format: +``` +bucket-base-name--azid--x-s3 +``` +For example: +``` +bucket-base-name--usw2-az1--x-s3 +``` +### Final Configuration +The bucket name must be prepended to the zonal endpoint like this +``` +bucket-base-name--usw2-az1--x-s3.s3express-usw2-az1.us-west-2.amazonaws.com +``` +The above is the value that must be provided to the `S3_SERVER` variable. +Additionally, the `S3_BUCKET_NAME` must be set to the full bucket name with the suffix: +``` +bucket-base-name--usw2-az1--x-s3 +``` +Buckets created in the AWS UI don't require manual specification of a suffix but it must be included in the gateway configuration. + +### Trying it Out +A sample Terraform script to provision a bucket is provided in `/deployments/s3_express`. + ## Running as a Systemd Service An [install script](/standalone_ubuntu_oss_install.sh) for the gateway shows diff --git a/oss/etc/nginx/templates/upstreams.conf.template b/oss/etc/nginx/templates/upstreams.conf.template index 9e93a857..383c1667 100644 --- a/oss/etc/nginx/templates/upstreams.conf.template +++ b/oss/etc/nginx/templates/upstreams.conf.template @@ -4,8 +4,5 @@ resolver ${DNS_RESOLVERS}; upstream storage_urls { # Upstreams are not refreshed until NGINX configuration is reloaded. # NGINX Plus will dynamically reload upstreams when DNS records are changed. - - # Be sure to specify the port in the S3_SERVER and be sure that port - # corresponds to the https/http in the proxy_pass directive. - server ${S3_SERVER}:${S3_SERVER_PORT}; + server ${S3_UPSTREAM}; } diff --git a/plus/etc/nginx/templates/upstreams.conf.template b/plus/etc/nginx/templates/upstreams.conf.template index 5074cd14..6c87e6bd 100644 --- a/plus/etc/nginx/templates/upstreams.conf.template +++ b/plus/etc/nginx/templates/upstreams.conf.template @@ -7,7 +7,5 @@ resolver ${DNS_RESOLVERS}; upstream storage_urls { zone s3_backends 64k; - # Be sure to specify the port in the S3_SERVER and be sure that port - # corresponds to the https/http in the proxy_pass directive. - server ${S3_SERVER}:${S3_SERVER_PORT} resolve; + server ${S3_UPSTREAM} resolve; } diff --git a/settings.example b/settings.example index b2884a63..ca71efc9 100644 --- a/settings.example +++ b/settings.example @@ -6,7 +6,8 @@ S3_SERVER=s3.us-east-1.amazonaws.com S3_SERVER_PORT=443 S3_SERVER_PROTO=https S3_REGION=us-east-1 -S3_STYLE=virtual +S3_STYLE=virtual-v2 +S3_SERVICE=s3 DEBUG=false AWS_SIGS_VERSION=4 ALLOW_DIRECTORY_LIST=false diff --git a/standalone_ubuntu_oss_install.sh b/standalone_ubuntu_oss_install.sh index ee173499..4b68cae8 100644 --- a/standalone_ubuntu_oss_install.sh +++ b/standalone_ubuntu_oss_install.sh @@ -30,7 +30,7 @@ fi failed=0 -required=("S3_BUCKET_NAME" "S3_SERVER" "S3_SERVER_PORT" "S3_SERVER_PROTO" +required=("S3_SERVICE" "S3_BUCKET_NAME" "S3_SERVER" "S3_SERVER_PORT" "S3_SERVER_PROTO" "S3_REGION" "S3_STYLE" "ALLOW_DIRECTORY_LIST" "AWS_SIGS_VERSION") if [ ! -z ${AWS_CONTAINER_CREDENTIALS_RELATIVE_URI+x} ]; then @@ -162,6 +162,8 @@ S3_SERVER_PROTO=${S3_SERVER_PROTO} S3_SERVER=${S3_SERVER} # The S3 host/path method - 'virtual', 'path' or 'default' S3_STYLE=${S3_STYLE:-'default'} +# Name of S3 service - 's3' or 's3express' +S3_SERVICE=${S3_SERVICE:-'s3'} # Flag (true/false) enabling AWS signatures debug output (default: false) DEBUG=${DEBUG:-'false'} # Cache size limit @@ -199,6 +201,33 @@ LIMIT_METHODS_TO="GET HEAD" LIMIT_METHODS_TO_CSV="GET, HEAD" EOF fi + +# This is the primary logic to determine the s3 host used for the +# upstream (the actual proxying action) as well as the `Host` header +# +# It is currently slightly more complex than necessary because we are transitioning +# to a new logic which is defined by "virtual-v2". "virtual-v2" is the recommended setting +# for all deployments. + +# S3_UPSTREAM needs the port specified. The port must +# correspond to https/http in the proxy_pass directive. +if [ "${S3_STYLE}" == "virtual-v2" ]; then + cat >> "/etc/nginx/environment" << EOF +S3_UPSTREAM="${S3_BUCKET_NAME}.${S3_SERVER}:${S3_SERVER_PORT}" +S3_HOST_HEADER="${S3_BUCKET_NAME}.${S3_SERVER}:${S3_SERVER_PORT}" +EOF +elif [ "${S3_STYLE}" == "path" ]; then + cat >> "/etc/nginx/environment" << EOF +S3_UPSTREAM="${S3_SERVER}:${S3_SERVER_PORT}" +S3_HOST_HEADER="${S3_SERVER}:${S3_SERVER_PORT}" +EOF +else + cat >> "/etc/nginx/environment" << EOF +S3_UPSTREAM="${S3_SERVER}:${S3_SERVER_PORT}" +S3_HOST_HEADER="${S3_BUCKET_NAME}.${S3_SERVER}" +EOF +fi + set -o nounset # abort on unbound variable if [ -z "${CORS_ALLOWED_ORIGIN+x}" ]; then @@ -339,6 +368,7 @@ env S3_REGION; env AWS_SIGS_VERSION; env DEBUG; env S3_STYLE; +env S3_SERVICE; env ALLOW_DIRECTORY_LIST; events { diff --git a/test.sh b/test.sh index a12fd28e..772b9cff 100755 --- a/test.sh +++ b/test.sh @@ -350,7 +350,8 @@ runUnitTestWithOutSessionToken() { -v "$(pwd)/test/unit:/var/tmp" \ --workdir /var/tmp \ -e "DEBUG=true" \ - -e "S3_STYLE=virtual" \ + -e "S3_STYLE=virtual-v2" \ + -e "S3_SERVICE=s3" \ -e "AWS_ACCESS_KEY_ID=unit_test" \ -e "AWS_SECRET_ACCESS_KEY=unit_test" \ -e "S3_BUCKET_NAME=unit_test" \ @@ -369,7 +370,8 @@ runUnitTestWithOutSessionToken() { -v "$(pwd)/test/unit:/var/tmp" \ --workdir /var/tmp \ -e "DEBUG=true" \ - -e "S3_STYLE=virtual" \ + -e "S3_STYLE=virtual-v2" \ + -e "S3_SERVICE=s3" \ -e "AWS_ACCESS_KEY_ID=unit_test" \ -e "AWS_SECRET_ACCESS_KEY=unit_test" \ -e "S3_BUCKET_NAME=unit_test" \ @@ -395,7 +397,8 @@ runUnitTestWithSessionToken() { -v "$(pwd)/test/unit:/var/tmp" \ --workdir /var/tmp \ -e "DEBUG=true" \ - -e "S3_STYLE=virtual" \ + -e "S3_STYLE=virtual-v2" \ + -e "S3_SERVICE=s3" \ -e "AWS_ACCESS_KEY_ID=unit_test" \ -e "AWS_SECRET_ACCESS_KEY=unit_test" \ -e "AWS_SESSION_TOKEN=unit_test" \ @@ -415,7 +418,8 @@ runUnitTestWithSessionToken() { -v "$(pwd)/test/unit:/var/tmp" \ --workdir /var/tmp \ -e "DEBUG=true" \ - -e "S3_STYLE=virtual" \ + -e "S3_STYLE=virtual-v2" \ + -e "S3_SERVICE=s3" \ -e "AWS_ACCESS_KEY_ID=unit_test" \ -e "AWS_SECRET_ACCESS_KEY=unit_test" \ -e "AWS_SESSION_TOKEN=unit_test" \ diff --git a/test/docker-compose.yaml b/test/docker-compose.yaml index 44c58763..2faa9c8a 100644 --- a/test/docker-compose.yaml +++ b/test/docker-compose.yaml @@ -22,7 +22,8 @@ services: S3_SERVER_PROTO: "http" S3_REGION: "us-east-1" DEBUG: "true" - S3_STYLE: "virtual" + S3_STYLE: "${S3_STYLE:-virtual-v2}" + S3_SERVICE: "s3" ALLOW_DIRECTORY_LIST: PROVIDE_INDEX_PAGE: APPEND_SLASH_FOR_POSSIBLE_DIRECTORY: @@ -39,6 +40,7 @@ services: minio: image: quay.io/minio/minio:RELEASE.2023-06-09T07-32-12Z + hostname: bucket-1.minio ports: - "9090:9000/tcp" restart: "no"