Skip to content

Commit

Permalink
improve error messages and update readme
Browse files Browse the repository at this point in the history
  • Loading branch information
cookel2 committed Jan 13, 2025
1 parent e3d3e85 commit 23704b8
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 26 deletions.
42 changes: 22 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,28 @@ Batch nomad job for reindexing search

### Configuration

| Environment variable | Default | Description |
|-----------------------------|--------------------------|----------------------------------------------------------------------------|
| AWS_REGION | "eu-west-2" | AWS region |
| AWS_SEC_SKIP_VERIFY | false | Whether to skip TLS verification for AWS requests |
| DATASET_API_URL | "http://localhost:22000" | URL of the Dataset API |
| DATASET_PAGINATION_LIMIT | 500 | Number of datasets to fetch per page of requests to Dataset API |
| ENABLE_TOPIC_TAGGING | false | Whether to enable topic auto-tagging |
| ELASTIC_SEARCH_URL | "http://localhost:11200" | URL of elastic search server (or AWS Opensearch) |
| MAX_DOCUMENT_EXTRACTIONS | 100 | Max number of concurrent Document Extractions (ie. Zebedee connections) |
| MAX_DOCUMENT_TRANSFORMS | 20 | Max number of concurrent Document Transformation workers |
| MAX_DATASET_EXTRACTIONS | 20 | Max number of concurrent Dataset Extractions (ie. Dataset API connections) |
| MAX_DATASET_TRANSFORMS | 10 | Max number of concurrent Dataset Transformation workers |
| SERVICE_AUTH_TOKEN | "" | Zebedee Service Auth Token for API requests |
| SIGN_ELASTICSEARCH_REQUESTS | false | Whether to sign elasticsearch requests (true for AWS) |
| TOPIC_API_URL | "http://localhost:25300" | URL of the Topic API |
| TRACKER_INTERVAL | 5s | Interval for progress tracker summary logging |
| ZEBEDEE_URL | "http://localhost:8082" | URL of publishing zebedee |
| ZEBEDEE_TIMEOUT | 2m | Timeout for Zebedee endpoints - published index can take > 2 minutes |
| ENABLE_DATASET_API_REINDEX | true | Whether to get documents from the Dataset API for reindexing or not |
| ENABLE_ZEBEDEE_REINDEX | true | Whether to get documents from Zebedee for reindexing or not |
| Environment variable | Default | Description |
|-------------------------------|--------------------------------------------|------------------------------------------------------------------------------------------------|
| AWS_REGION | "eu-west-2" | AWS region |
| AWS_SEC_SKIP_VERIFY | false | Whether to skip TLS verification for AWS requests |
| DATASET_API_URL | "http://localhost:22000" | URL of the Dataset API |
| DATASET_PAGINATION_LIMIT | 500 | Number of datasets to fetch per page of requests to Dataset API |
| ENABLE_TOPIC_TAGGING | false | Whether to enable topic auto-tagging |
| ELASTIC_SEARCH_URL | "http://localhost:11200" | URL of elastic search server (or AWS Opensearch) |
| MAX_DOCUMENT_EXTRACTIONS | 100 | Max number of concurrent Document Extractions (ie. Zebedee connections) |
| MAX_DOCUMENT_TRANSFORMS | 20 | Max number of concurrent Document Transformation workers |
| MAX_DATASET_EXTRACTIONS | 20 | Max number of concurrent Dataset Extractions (ie. Dataset API connections) |
| MAX_DATASET_TRANSFORMS | 10 | Max number of concurrent Dataset Transformation workers |
| SERVICE_AUTH_TOKEN | "" | Zebedee Service Auth Token for API requests |
| SIGN_ELASTICSEARCH_REQUESTS | false | Whether to sign elasticsearch requests (true for AWS) |
| TOPIC_API_URL | "http://localhost:25300" | URL of the Topic API |
| TRACKER_INTERVAL | 5s | Interval for progress tracker summary logging |
| ZEBEDEE_URL | "http://localhost:8082" | URL of publishing zebedee |
| ZEBEDEE_TIMEOUT | 2m | Timeout for Zebedee endpoints - published index can take > 2 minutes |
| ENABLE_DATASET_API_REINDEX | false | Whether to get documents from the Dataset API for reindexing or not |
| ENABLE_ZEBEDEE_REINDEX | false | Whether to get documents from Zebedee for reindexing or not |
| ENABLE_OTHER_SERVICES_REINDEX | false | Whether to get documents from other upstream services orr not |
| OTHER_UPSTREAM_SERVICES | {{"http://localhost:29600", "/resources"}} | List of string pairs, each consisting of a domain and endpoint, representing upstream services |

### Local Prerequisites

Expand Down
4 changes: 2 additions & 2 deletions task/reindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func reindex(ctx context.Context, cfg *config.Config) error {
serviceEndpoint := cfg.OtherUpstreamServices[i][1]
upstreamStubClient := upstreamStubSDK.New(serviceURL, serviceEndpoint)
if upstreamStubClient == nil {
err := errors.New("failed to create search upstream stub client for upstream service: " + serviceURL + serviceEndpoint)
err := errors.New("failed to create client for upstream service: " + serviceURL + serviceEndpoint)
log.Error(ctx, err.Error(), err)
return err
}
Expand Down Expand Up @@ -251,7 +251,7 @@ func getResourceItems(ctx context.Context, errChan chan error, upstreamStubClien
resources, err := upstreamStubClient.GetResources(ctx, opts)
if err != nil {
errChan <- err
log.Error(ctx, "failed to get resources from search upstream stub", err, log.Data{"options": opts})
log.Error(ctx, "failed to get resources from upstream service", err, log.Data{"options": opts})
return
}

Expand Down
8 changes: 4 additions & 4 deletions task/reindex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ func TestURIProducer(t *testing.T) {
return zebedee.PublishedIndex{
Count: 4,
Items: []zebedee.PublishedIndexItem{
zebedee.PublishedIndexItem{URI: "/economy"},
zebedee.PublishedIndexItem{URI: "/timeseries/ec12/previous/nov2011"},
zebedee.PublishedIndexItem{URI: "/timeseries/ec12"},
zebedee.PublishedIndexItem{URI: "/dataset/ec12/previous"},
{URI: "/economy"},
{URI: "/timeseries/ec12/previous/nov2011"},
{URI: "/timeseries/ec12"},
{URI: "/dataset/ec12/previous"},
},
Limit: 20,
Offset: 0,
Expand Down

0 comments on commit 23704b8

Please sign in to comment.