diff --git a/docs/GLOBAL_TIMEOUT.md b/docs/GLOBAL_TIMEOUT.md new file mode 100644 index 00000000..c11a85af --- /dev/null +++ b/docs/GLOBAL_TIMEOUT.md @@ -0,0 +1,193 @@ +# Global Timeout for Multi-Node Failover + +## Overview + +The SDK now supports a global timeout feature that caps the total time spent across all RPC nodes during failover operations. This prevents scenarios where slow nodes cause the entire failover loop to take too long. + +## Problem Solved + +Previously, if all RPC nodes were slow (but not completely unresponsive), the failover loop could take an excessive amount of time. For example, with 3 nodes each taking 30 seconds to timeout, the total operation could take up to 90 seconds. + +With the global timeout feature, you can set a maximum time limit for the entire failover operation, regardless of how many nodes are configured. + +## Configuration + +### Go SDK + +#### Using NetworkConfig + +```go +config := rpc.NetworkConfig{ + Name: "custom", + HorizonURL: "https://horizon.stellar.org", + NetworkPassphrase: "Public Global Stellar Network ; September 2015", + SorobanRPCURL: "https://soroban-rpc.stellar.org", + TotalTimeout: 30 * time.Second, // 30 second global timeout +} + +client, err := rpc.NewClient(rpc.WithNetworkConfig(config)) +``` + +#### Using Builder Options + +```go +client, err := rpc.NewClient( + rpc.WithNetwork(rpc.Mainnet), + rpc.WithAltURLs([]string{ + "https://horizon1.stellar.org", + "https://horizon2.stellar.org", + "https://horizon3.stellar.org", + }), + rpc.WithTotalTimeout(45 * time.Second), // 45 second global timeout +) +``` + +#### Default Values + +The predefined network configurations have a default global timeout of 60 seconds: +- `TestnetConfig.TotalTimeout = 60 * time.Second` +- `MainnetConfig.TotalTimeout = 60 * time.Second` +- `FuturenetConfig.TotalTimeout = 60 * time.Second` + +### TypeScript SDK + +#### Using RPCConfig + +```typescript +import { FallbackRPCClient, RPCConfigParser } from './rpc'; + +const config = RPCConfigParser.loadConfig({ + rpc: ['https://rpc1.stellar.org', 'https://rpc2.stellar.org'], + timeout: 30000, // Individual request timeout (30s) + totalTimeout: 60000, // Global timeout for all nodes (60s) + retries: 3, +}); + +const client = new FallbackRPCClient(config); +``` + +#### Default Values + +The TypeScript SDK has a default global timeout of 60 seconds (60000ms) when using `RPCConfigParser.loadConfig()`. + +## Behavior + +### When Global Timeout is Enabled + +1. **Timeout Enforcement**: The SDK starts a timer when beginning the failover loop +2. **Context Cancellation**: If the global timeout is reached, the operation is cancelled immediately +3. **Error Response**: A timeout error is returned indicating the global timeout was exceeded +4. **Early Termination**: The SDK stops trying additional nodes once the timeout is reached + +### When Global Timeout is Disabled + +- Set `TotalTimeout` to `0` (Go) or `totalTimeout` to `0` (TypeScript) to disable +- The SDK will attempt all configured nodes regardless of total time spent +- Individual request timeouts still apply + +## Affected Methods + +The global timeout applies to all methods that use multi-node failover: + +### Go SDK +- `GetTransaction()` +- `GetLedgerHeader()` +- `SimulateTransaction()` +- `GetHealth()` +- `GetLedgerEntries()` (single batch mode) + +### TypeScript SDK +- All methods that use `FallbackRPCClient.request()` +- `getTransaction()` +- `simulateTransaction()` +- `getHealth()` +- `getLatestLedger()` + +## Examples + +### Scenario 1: Fast Failover + +```go +// 3 nodes, each taking 2 seconds to fail +// Without global timeout: 6+ seconds total +// With 4-second global timeout: ~4 seconds total + +client, _ := rpc.NewClient( + rpc.WithAltURLs([]string{"slow1", "slow2", "slow3"}), + rpc.WithTotalTimeout(4 * time.Second), +) + +start := time.Now() +_, err := client.GetTransaction(ctx, "hash") +elapsed := time.Since(start) // ~4 seconds, not 6+ +``` + +### Scenario 2: Success Within Timeout + +```go +// First node fails quickly, second succeeds +// Total time: <1 second (well within 10s timeout) + +client, _ := rpc.NewClient( + rpc.WithAltURLs([]string{"fail-fast", "success"}), + rpc.WithTotalTimeout(10 * time.Second), +) + +resp, err := client.GetTransaction(ctx, "hash") // Success +``` + +## Error Handling + +### Go SDK + +```go +_, err := client.GetTransaction(ctx, "hash") +if err != nil { + if strings.Contains(err.Error(), "global timeout exceeded") { + // Handle global timeout + log.Printf("Operation timed out after %v", client.Config.TotalTimeout) + } else { + // Handle other errors + log.Printf("Other error: %v", err) + } +} +``` + +### TypeScript SDK + +```typescript +try { + const result = await client.request('/transaction/hash'); +} catch (error) { + if (error.message.includes('Global timeout exceeded')) { + // Handle global timeout + console.log(`Operation timed out after ${config.totalTimeout}ms`); + } else { + // Handle other errors + console.log(`Other error: ${error.message}`); + } +} +``` + +## Best Practices + +1. **Set Reasonable Timeouts**: Consider your application's latency requirements +2. **Monitor Metrics**: Track timeout occurrences to tune the timeout value +3. **Fallback Strategy**: Have a plan for when all nodes timeout +4. **Individual vs Global**: Set individual request timeouts shorter than global timeout +5. **Testing**: Test timeout behavior with slow/unresponsive test servers + +## Migration + +### Existing Code + +Existing code continues to work without changes. The default 60-second global timeout provides reasonable protection against excessively long failover operations. + +### Upgrading + +To take advantage of custom global timeouts: + +1. **Go**: Use `WithTotalTimeout()` builder option or set `TotalTimeout` in `NetworkConfig` +2. **TypeScript**: Pass `totalTimeout` to `RPCConfigParser.loadConfig()` + +No breaking changes are introduced by this feature. \ No newline at end of file diff --git a/internal/rpc/builder.go b/internal/rpc/builder.go index 4e99d39e..54b40a8b 100644 --- a/internal/rpc/builder.go +++ b/internal/rpc/builder.go @@ -127,6 +127,20 @@ func WithRequestTimeout(d time.Duration) ClientOption { } } +// WithTotalTimeout sets a global timeout for multi-node failover loops. +// This caps the total time spent across all nodes during failover operations. +// If not set, the default from NetworkConfig is used. +func WithTotalTimeout(d time.Duration) ClientOption { + return func(b *clientBuilder) error { + if b.config == nil { + cfg := b.getConfig(b.network) + b.config = &cfg + } + b.config.TotalTimeout = d + return nil + } +} + func WithHTTPClient(client *http.Client) ClientOption { return func(b *clientBuilder) error { b.httpClient = client diff --git a/internal/rpc/client.go b/internal/rpc/client.go index e81317d7..b5367988 100644 --- a/internal/rpc/client.go +++ b/internal/rpc/client.go @@ -71,6 +71,7 @@ type NetworkConfig struct { HorizonURL string NetworkPassphrase string SorobanRPCURL string + TotalTimeout time.Duration // Global timeout for multi-node failover loops } // Predefined network configurations @@ -80,6 +81,7 @@ var ( HorizonURL: TestnetHorizonURL, NetworkPassphrase: "Test SDF Network ; September 2015", SorobanRPCURL: TestnetSorobanURL, + TotalTimeout: 60 * time.Second, // Default 60 seconds for failover } MainnetConfig = NetworkConfig{ @@ -87,6 +89,7 @@ var ( HorizonURL: MainnetHorizonURL, NetworkPassphrase: "Public Global Stellar Network ; September 2015", SorobanRPCURL: MainnetSorobanURL, + TotalTimeout: 60 * time.Second, // Default 60 seconds for failover } FuturenetConfig = NetworkConfig{ @@ -94,6 +97,7 @@ var ( HorizonURL: FuturenetHorizonURL, NetworkPassphrase: "Test SDF Future Network ; October 2022", SorobanRPCURL: FuturenetSorobanURL, + TotalTimeout: 60 * time.Second, // Default 60 seconds for failover } ) @@ -487,9 +491,26 @@ type StellarbeatResponse struct { // GetTransaction fetches the transaction details and full XDR data func (c *Client) GetTransaction(ctx context.Context, hash string) (*TransactionResponse, error) { + // Apply global timeout if configured + if c.Config.TotalTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, c.Config.TotalTimeout) + defer cancel() + } + attempts := c.endpointAttempts() var failures []NodeFailure for attempt := 0; attempt < attempts; attempt++ { + // Check if context has been cancelled or timed out + select { + case <-ctx.Done(): + if ctx.Err() == context.DeadlineExceeded { + return nil, errors.WrapRPCTimeout(fmt.Errorf("global timeout exceeded after %v", c.Config.TotalTimeout)) + } + return nil, ctx.Err() + default: + } + resp, err := c.getTransactionAttempt(ctx, hash) if err == nil { c.markHorizonSuccess() @@ -636,9 +657,26 @@ type GetLedgerEntriesResponse struct { // // GetLedgerHeader fetches ledger header details for a specific sequence with automatic fallback. func (c *Client) GetLedgerHeader(ctx context.Context, sequence uint32) (*LedgerHeaderResponse, error) { + // Apply global timeout if configured + if c.Config.TotalTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, c.Config.TotalTimeout) + defer cancel() + } + attempts := c.endpointAttempts() var failures []NodeFailure for attempt := 0; attempt < attempts; attempt++ { + // Check if context has been cancelled or timed out + select { + case <-ctx.Done(): + if ctx.Err() == context.DeadlineExceeded { + return nil, errors.WrapRPCTimeout(fmt.Errorf("global timeout exceeded after %v", c.Config.TotalTimeout)) + } + return nil, ctx.Err() + default: + } + resp, err := c.getLedgerHeaderAttempt(ctx, sequence) if err == nil { c.markHorizonSuccess() @@ -851,8 +889,24 @@ func (c *Client) GetLedgerEntries(ctx context.Context, keys []string) (map[strin } // Single batch - use existing failover logic + // Apply global timeout if configured + if c.Config.TotalTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, c.Config.TotalTimeout) + defer cancel() + } + attempts := c.endpointAttempts() for attempt := 0; attempt < attempts; attempt++ { + // Check if context has been cancelled or timed out + select { + case <-ctx.Done(): + if ctx.Err() == context.DeadlineExceeded { + return nil, errors.WrapRPCTimeout(fmt.Errorf("global timeout exceeded after %v", c.Config.TotalTimeout)) + } + return nil, ctx.Err() + default: + } fetchedEntries, err := c.getLedgerEntriesAttempt(ctx, keysToFetch) if err == nil { @@ -1272,9 +1326,26 @@ type SimulateTransactionResponse struct { // SimulateTransaction calls Soroban RPC simulateTransaction using a base64 TransactionEnvelope XDR. func (c *Client) SimulateTransaction(ctx context.Context, envelopeXdr string) (*SimulateTransactionResponse, error) { + // Apply global timeout if configured + if c.Config.TotalTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, c.Config.TotalTimeout) + defer cancel() + } + attempts := c.endpointAttempts() var failures []NodeFailure for attempt := 0; attempt < attempts; attempt++ { + // Check if context has been cancelled or timed out + select { + case <-ctx.Done(): + if ctx.Err() == context.DeadlineExceeded { + return nil, errors.WrapRPCTimeout(fmt.Errorf("global timeout exceeded after %v", c.Config.TotalTimeout)) + } + return nil, ctx.Err() + default: + } + resp, err := c.simulateTransactionAttempt(ctx, envelopeXdr) if err == nil { c.markSorobanSuccess() @@ -1384,9 +1455,26 @@ func (c *Client) simulateTransactionAttempt(ctx context.Context, envelopeXdr str // GetHealth checks the health of the Soroban RPC endpoint. func (c *Client) GetHealth(ctx context.Context) (*GetHealthResponse, error) { + // Apply global timeout if configured + if c.Config.TotalTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, c.Config.TotalTimeout) + defer cancel() + } + attempts := c.endpointAttempts() var failures []NodeFailure for attempt := 0; attempt < attempts; attempt++ { + // Check if context has been cancelled or timed out + select { + case <-ctx.Done(): + if ctx.Err() == context.DeadlineExceeded { + return nil, errors.WrapRPCTimeout(fmt.Errorf("global timeout exceeded after %v", c.Config.TotalTimeout)) + } + return nil, ctx.Err() + default: + } + resp, err := c.getHealthAttempt(ctx) if err == nil { c.markSorobanSuccess() diff --git a/internal/rpc/timeout_test.go b/internal/rpc/timeout_test.go new file mode 100644 index 00000000..65a9d8ec --- /dev/null +++ b/internal/rpc/timeout_test.go @@ -0,0 +1,288 @@ +// Copyright 2026 Erst Users +// SPDX-License-Identifier: Apache-2.0 + +package rpc + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGlobalTimeout_ExpiresBeforeAllNodes(t *testing.T) { + // Create slow servers that take 2 seconds each + server1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server1.Close() + + server2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server2.Close() + + server3 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server3.Close() + + // Create client with 3 second global timeout (should timeout before trying all 3 servers) + client, err := NewClient( + WithAltURLs([]string{server1.URL, server2.URL, server3.URL}), + WithTotalTimeout(3*time.Second), + WithRequestTimeout(5*time.Second), // Individual request timeout longer than global + ) + require.NoError(t, err) + + start := time.Now() + ctx := context.Background() + + _, err = client.GetTransaction(ctx, "test-hash") + + elapsed := time.Since(start) + + // Should fail with timeout error + assert.Error(t, err) + assert.Contains(t, err.Error(), "global timeout exceeded") + + // Should timeout around 3 seconds, not 6+ seconds (2s per server * 3 servers) + assert.Less(t, elapsed, 4*time.Second, "Should timeout before trying all servers") + assert.Greater(t, elapsed, 2*time.Second, "Should have tried at least one server") +} + +func TestGlobalTimeout_SucceedsWithinTimeout(t *testing.T) { + // First server fails quickly, second succeeds + server1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer server1.Close() + + server2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Simulate successful transaction response + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, `{ + "hash": "test-hash", + "ledger": 123, + "envelope_xdr": "test-envelope", + "result_xdr": "test-result", + "result_meta_xdr": "test-meta", + "successful": true + }`) + })) + defer server2.Close() + + client, err := NewClient( + WithAltURLs([]string{server1.URL, server2.URL}), + WithTotalTimeout(10*time.Second), + ) + require.NoError(t, err) + + start := time.Now() + ctx := context.Background() + + resp, err := client.GetTransaction(ctx, "test-hash") + + elapsed := time.Since(start) + + // Should succeed + assert.NoError(t, err) + assert.NotNil(t, resp) + assert.Equal(t, "test-hash", resp.Hash) + + // Should complete quickly (well under the 10s timeout) + assert.Less(t, elapsed, 2*time.Second) +} + +func TestGlobalTimeout_NoTimeoutSet(t *testing.T) { + // Server that takes a long time + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(1 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + // Create client without global timeout (should use default behavior) + client, err := NewClient( + WithAltURLs([]string{server.URL}), + WithRequestTimeout(2*time.Second), + ) + require.NoError(t, err) + + // Verify default timeout is set + assert.Equal(t, 60*time.Second, client.Config.TotalTimeout) + + ctx := context.Background() + _, err = client.GetTransaction(ctx, "test-hash") + + // Should fail with connection error, not timeout + assert.Error(t, err) + assert.NotContains(t, err.Error(), "global timeout exceeded") +} + +func TestGlobalTimeout_ZeroTimeout(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(100 * time.Millisecond) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + // Create client with zero timeout (should disable global timeout) + client, err := NewClient( + WithAltURLs([]string{server.URL}), + WithTotalTimeout(0), + ) + require.NoError(t, err) + + ctx := context.Background() + _, err = client.GetTransaction(ctx, "test-hash") + + // Should fail with connection error, not timeout + assert.Error(t, err) + assert.NotContains(t, err.Error(), "global timeout exceeded") +} + +func TestGlobalTimeout_ContextAlreadyCancelled(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + client, err := NewClient( + WithAltURLs([]string{server.URL}), + WithTotalTimeout(10*time.Second), + ) + require.NoError(t, err) + + // Create already cancelled context + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, err = client.GetTransaction(ctx, "test-hash") + + // Should fail with context cancelled error + assert.Error(t, err) + assert.Equal(t, context.Canceled, err) +} + +func TestGlobalTimeout_NetworkConfigDefault(t *testing.T) { + // Test that predefined network configs have default timeout + assert.Equal(t, 60*time.Second, TestnetConfig.TotalTimeout) + assert.Equal(t, 60*time.Second, MainnetConfig.TotalTimeout) + assert.Equal(t, 60*time.Second, FuturenetConfig.TotalTimeout) +} + +func TestWithTotalTimeout_BuilderOption(t *testing.T) { + client, err := NewClient( + WithNetwork(Testnet), + WithTotalTimeout(30*time.Second), + ) + require.NoError(t, err) + + assert.Equal(t, 30*time.Second, client.Config.TotalTimeout) +} + +func TestGlobalTimeout_GetLedgerHeader(t *testing.T) { + // Create slow servers + server1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server1.Close() + + server2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server2.Close() + + client, err := NewClient( + WithAltURLs([]string{server1.URL, server2.URL}), + WithTotalTimeout(3*time.Second), + ) + require.NoError(t, err) + + start := time.Now() + ctx := context.Background() + + _, err = client.GetLedgerHeader(ctx, 123) + + elapsed := time.Since(start) + + assert.Error(t, err) + assert.Contains(t, err.Error(), "global timeout exceeded") + assert.Less(t, elapsed, 4*time.Second) +} + +func TestGlobalTimeout_SimulateTransaction(t *testing.T) { + // Create slow servers + server1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server1.Close() + + server2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server2.Close() + + client, err := NewClient( + WithAltURLs([]string{server1.URL, server2.URL}), + WithTotalTimeout(3*time.Second), + ) + require.NoError(t, err) + + start := time.Now() + ctx := context.Background() + + _, err = client.SimulateTransaction(ctx, "test-envelope-xdr") + + elapsed := time.Since(start) + + assert.Error(t, err) + assert.Contains(t, err.Error(), "global timeout exceeded") + assert.Less(t, elapsed, 4*time.Second) +} + +func TestGlobalTimeout_GetHealth(t *testing.T) { + // Create slow servers + server1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server1.Close() + + server2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(http.StatusInternalServerError) + })) + defer server2.Close() + + client, err := NewClient( + WithAltURLs([]string{server1.URL, server2.URL}), + WithTotalTimeout(3*time.Second), + ) + require.NoError(t, err) + + start := time.Now() + ctx := context.Background() + + _, err = client.GetHealth(ctx) + + elapsed := time.Since(start) + + assert.Error(t, err) + assert.Contains(t, err.Error(), "global timeout exceeded") + assert.Less(t, elapsed, 4*time.Second) +} \ No newline at end of file diff --git a/src/config/rpc-config.ts b/src/config/rpc-config.ts index 3582f855..91b45a98 100644 --- a/src/config/rpc-config.ts +++ b/src/config/rpc-config.ts @@ -13,6 +13,7 @@ export interface RPCConfig { circuitBreakerThreshold: number; circuitBreakerTimeout: number; maxRedirects: number; + totalTimeout?: number; // Global timeout for multi-node failover loops (ms) headers?: Record; middleware?: SDKMiddleware[]; } @@ -61,6 +62,7 @@ export class RPCConfigParser { rpc?: string | string[]; timeout?: number; retries?: number; + totalTimeout?: number; }): RPCConfig { // Get URLs from CLI args or environment variable const urlInput = options.rpc || process.env.STELLAR_RPC_URLS; @@ -77,6 +79,7 @@ export class RPCConfigParser { urls, timeout: options.timeout, retries: options.retries, + totalTimeout: options.totalTimeout, // leave other fields undefined so defaults applier can fill retryDelay: undefined as any, circuitBreakerThreshold: undefined as any, @@ -92,6 +95,7 @@ export class RPCConfigParser { circuitBreakerThreshold: 5, circuitBreakerTimeout: 60000, maxRedirects: 5, + totalTimeout: partial.totalTimeout ?? 60000, // Default 60 seconds for failover }; // Phase 3: validate using pluggable validators diff --git a/src/rpc/fallback-client-timeout.test.ts b/src/rpc/fallback-client-timeout.test.ts new file mode 100644 index 00000000..691f741b --- /dev/null +++ b/src/rpc/fallback-client-timeout.test.ts @@ -0,0 +1,224 @@ +// Copyright (c) 2026 dotandev +// SPDX-License-Identifier: MIT OR Apache-2.0 + +import { FallbackRPCClient } from './fallback-client'; +import { RPCConfig } from '../config/rpc-config'; + +// Mock axios for testing +jest.mock('axios'); +import axios from 'axios'; +const mockedAxios = axios as jest.Mocked; + +describe('FallbackRPCClient Global Timeout', () => { + beforeEach(() => { + jest.clearAllMocks(); + mockedAxios.create.mockReturnValue(mockedAxios); + }); + + it('should timeout before trying all nodes when global timeout is exceeded', async () => { + const config: RPCConfig = { + urls: ['http://slow1.com', 'http://slow2.com', 'http://slow3.com'], + timeout: 5000, // Individual request timeout + totalTimeout: 3000, // Global timeout (3 seconds) + retries: 3, + retryDelay: 1000, + circuitBreakerThreshold: 5, + circuitBreakerTimeout: 60000, + maxRedirects: 5, + }; + + const client = new FallbackRPCClient(config); + + // Mock slow responses (2 seconds each) + mockedAxios.mockImplementation(() => + new Promise((resolve) => { + setTimeout(() => { + resolve({ + status: 500, + statusText: 'Internal Server Error', + data: { error: 'Server error' }, + headers: {}, + config: {}, + }); + }, 2000); + }) + ); + + const startTime = Date.now(); + + await expect(client.request('/test')).rejects.toThrow('Global timeout exceeded'); + + const elapsed = Date.now() - startTime; + + // Should timeout around 3 seconds, not 6+ seconds (2s per server * 3 servers) + expect(elapsed).toBeLessThan(4000); + expect(elapsed).toBeGreaterThan(2500); // Should have tried at least one server + }); + + it('should succeed within global timeout', async () => { + const config: RPCConfig = { + urls: ['http://fail.com', 'http://success.com'], + timeout: 5000, + totalTimeout: 10000, // 10 second global timeout + retries: 3, + retryDelay: 1000, + circuitBreakerThreshold: 5, + circuitBreakerTimeout: 60000, + maxRedirects: 5, + }; + + const client = new FallbackRPCClient(config); + + let callCount = 0; + mockedAxios.mockImplementation(() => { + callCount++; + if (callCount === 1) { + // First call fails + return Promise.reject(new Error('Network error')); + } else { + // Second call succeeds + return Promise.resolve({ + status: 200, + statusText: 'OK', + data: { result: 'success' }, + headers: {}, + config: {}, + }); + } + }); + + const startTime = Date.now(); + + const result = await client.request('/test'); + + const elapsed = Date.now() - startTime; + + expect(result).toEqual({ result: 'success' }); + expect(elapsed).toBeLessThan(2000); // Should complete quickly + }); + + it('should not apply global timeout when set to 0', async () => { + const config: RPCConfig = { + urls: ['http://slow.com'], + timeout: 1000, + totalTimeout: 0, // Disabled + retries: 3, + retryDelay: 1000, + circuitBreakerThreshold: 5, + circuitBreakerTimeout: 60000, + maxRedirects: 5, + }; + + const client = new FallbackRPCClient(config); + + // Mock slow response + mockedAxios.mockImplementation(() => + new Promise((resolve) => { + setTimeout(() => { + resolve({ + status: 500, + statusText: 'Internal Server Error', + data: { error: 'Server error' }, + headers: {}, + config: {}, + }); + }, 500); + }) + ); + + await expect(client.request('/test')).rejects.toThrow('All RPC endpoints failed'); + // Should not throw global timeout error + }); + + it('should not apply global timeout when not configured', async () => { + const config: RPCConfig = { + urls: ['http://slow.com'], + timeout: 1000, + // totalTimeout not set + retries: 3, + retryDelay: 1000, + circuitBreakerThreshold: 5, + circuitBreakerTimeout: 60000, + maxRedirects: 5, + }; + + const client = new FallbackRPCClient(config); + + // Mock slow response + mockedAxios.mockImplementation(() => + new Promise((resolve) => { + setTimeout(() => { + resolve({ + status: 500, + statusText: 'Internal Server Error', + data: { error: 'Server error' }, + headers: {}, + config: {}, + }); + }, 500); + }) + ); + + await expect(client.request('/test')).rejects.toThrow('All RPC endpoints failed'); + // Should not throw global timeout error + }); + + it('should check timeout between endpoint attempts', async () => { + const config: RPCConfig = { + urls: ['http://fast1.com', 'http://fast2.com', 'http://fast3.com'], + timeout: 5000, + totalTimeout: 1500, // Very short global timeout + retries: 3, + retryDelay: 1000, + circuitBreakerThreshold: 5, + circuitBreakerTimeout: 60000, + maxRedirects: 5, + }; + + const client = new FallbackRPCClient(config); + + let callCount = 0; + mockedAxios.mockImplementation(() => { + callCount++; + // Each call takes 800ms and fails + return new Promise((resolve) => { + setTimeout(() => { + resolve({ + status: 500, + statusText: 'Internal Server Error', + data: { error: 'Server error' }, + headers: {}, + config: {}, + }); + }, 800); + }); + }); + + const startTime = Date.now(); + + await expect(client.request('/test')).rejects.toThrow('Global timeout exceeded'); + + const elapsed = Date.now() - startTime; + + // Should timeout after first attempt (800ms) but before second attempt would complete + expect(elapsed).toBeLessThan(2000); + expect(callCount).toBeLessThanOrEqual(2); // Should not try all 3 endpoints + }); + + it('should use default global timeout from config parser', () => { + const config: RPCConfig = { + urls: ['http://test.com'], + timeout: 30000, + retries: 3, + retryDelay: 1000, + circuitBreakerThreshold: 5, + circuitBreakerTimeout: 60000, + maxRedirects: 5, + totalTimeout: 60000, // Default from config parser + }; + + const client = new FallbackRPCClient(config); + + expect(client['config'].totalTimeout).toBe(60000); + }); +}); \ No newline at end of file diff --git a/src/rpc/fallback-client.ts b/src/rpc/fallback-client.ts index 214daffb..eebc394b 100644 --- a/src/rpc/fallback-client.ts +++ b/src/rpc/fallback-client.ts @@ -121,72 +121,99 @@ export class FallbackRPCClient { const startTime = Date.now(); let lastError: Error | null = null; - for (let attempt = 0; attempt < this.endpoints.length; attempt++) { - const endpoint = this.getNextHealthyEndpoint(); - - if (!endpoint) { - throw new Error('All RPC endpoints are unavailable'); - } - - try { - endpoint.totalRequests++; - - logger.verbose(LogCategory.RPC, `→ ${ctx.method} ${ctx.path}`); - logger.verboseIndent(LogCategory.RPC, `Endpoint: ${endpoint.url}`); - - const requestStartTime = Date.now(); - const client = this.clients.get(endpoint.url)!; + // Apply global timeout if configured + const globalTimeoutMs = this.config.totalTimeout; + const timeoutPromise = globalTimeoutMs && globalTimeoutMs > 0 + ? new Promise((_, reject) => { + setTimeout(() => { + reject(new Error(`Global timeout exceeded after ${globalTimeoutMs}ms`)); + }, globalTimeoutMs); + }) + : null; + + const executeWithGlobalTimeout = async (): Promise> => { + for (let attempt = 0; attempt < this.endpoints.length; attempt++) { + // Check if we've exceeded the global timeout + if (globalTimeoutMs && globalTimeoutMs > 0) { + const elapsed = Date.now() - startTime; + if (elapsed >= globalTimeoutMs) { + throw new Error(`Global timeout exceeded after ${elapsed}ms`); + } + } - const requestSize = ctx.data ? JSON.stringify(ctx.data).length : 0; - logger.verboseIndent(LogCategory.RPC, `${ctx.method} request to ${ctx.path}`); - logger.verboseIndent(LogCategory.RPC, `Request size: ${logger.formatBytes(requestSize)}`); + const endpoint = this.getNextHealthyEndpoint(); - const response = await this.executeWithRetry(client, ctx.method as 'GET' | 'POST', ctx.path, ctx.data, ctx.headers); + if (!endpoint) { + throw new Error('All RPC endpoints are unavailable'); + } - const duration = Date.now() - requestStartTime; - this.updateMetrics(endpoint, duration, true); + try { + endpoint.totalRequests++; - this.markSuccess(endpoint); - this.currentIndex = 0; + logger.verbose(LogCategory.RPC, `→ ${ctx.method} ${ctx.path}`); + logger.verboseIndent(LogCategory.RPC, `Endpoint: ${endpoint.url}`); - const responseSize = response.data ? JSON.stringify(response.data).length : 0; - logger.verbose(LogCategory.RPC, `← Response received (${duration}ms)`); - logger.verboseIndent(LogCategory.RPC, `Status: ${response.status} ${response.statusText}`); - logger.verboseIndent(LogCategory.RPC, `Response size: ${logger.formatBytes(responseSize)}`); + const requestStartTime = Date.now(); + const client = this.clients.get(endpoint.url)!; - return { - data: response.data, - status: response.status, - duration, - endpoint: endpoint.url, - metadata: { ...ctx.metadata }, - }; + const requestSize = ctx.data ? JSON.stringify(ctx.data).length : 0; + logger.verboseIndent(LogCategory.RPC, `${ctx.method} request to ${ctx.path}`); + logger.verboseIndent(LogCategory.RPC, `Request size: ${logger.formatBytes(requestSize)}`); - } catch (error) { - lastError = error as Error; - const duration = Date.now() - startTime; - this.updateMetrics(endpoint, duration, false); + const response = await this.executeWithRetry(client, ctx.method as 'GET' | 'POST', ctx.path, ctx.data, ctx.headers); - if (this.isRetryableError(error)) { - logger.warn(`RPC request failed: ${endpoint.url}`); + const duration = Date.now() - requestStartTime; + this.updateMetrics(endpoint, duration, true); - if (axios.isAxiosError(error)) { - logger.verbose(LogCategory.ERROR, `Request error: ${error.message}`); - if (error.code) logger.verboseIndent(LogCategory.ERROR, `Code: ${error.code}`); + this.markSuccess(endpoint); + this.currentIndex = 0; + + const responseSize = response.data ? JSON.stringify(response.data).length : 0; + logger.verbose(LogCategory.RPC, `← Response received (${duration}ms)`); + logger.verboseIndent(LogCategory.RPC, `Status: ${response.status} ${response.statusText}`); + logger.verboseIndent(LogCategory.RPC, `Response size: ${logger.formatBytes(responseSize)}`); + + return { + data: response.data, + status: response.status, + duration, + endpoint: endpoint.url, + metadata: { ...ctx.metadata }, + }; + + } catch (error) { + lastError = error as Error; + const duration = Date.now() - startTime; + this.updateMetrics(endpoint, duration, false); + + if (this.isRetryableError(error)) { + logger.warn(`RPC request failed: ${endpoint.url}`); + + if (axios.isAxiosError(error)) { + logger.verbose(LogCategory.ERROR, `Request error: ${error.message}`); + if (error.code) logger.verboseIndent(LogCategory.ERROR, `Code: ${error.code}`); + } + + this.markFailure(endpoint); + continue; + } else { + this.markFailure(endpoint); + throw error; } - - this.markFailure(endpoint); - continue; - } else { - this.markFailure(endpoint); - throw error; } } - } - const totalDuration = Date.now() - startTime; - logger.error(`All RPC endpoints failed after ${totalDuration}ms`); - throw new Error(`All RPC endpoints failed: ${lastError?.message}`); + const totalDuration = Date.now() - startTime; + logger.error(`All RPC endpoints failed after ${totalDuration}ms`); + throw new Error(`All RPC endpoints failed: ${lastError?.message}`); + }; + + // Race between the execution and the global timeout + if (timeoutPromise) { + return Promise.race([executeWithGlobalTimeout(), timeoutPromise]); + } else { + return executeWithGlobalTimeout(); + } } /**