Skip to content

Commit

Permalink
Merge pull request #349 from eventflow/configure-retry-delay
Browse files Browse the repository at this point in the history
MSSQL config and cleanup
  • Loading branch information
rasmus authored Jul 10, 2017
2 parents 0095439 + d63436e commit e827d61
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 59 deletions.
8 changes: 8 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@
that is useful when developing event and read model stores for EventFlow.
The package is an initial release and its interface is unstable and
subject to change
* New: Now possible to configure retry delay for MSSQL error `40501` (server
too busy) using `IMsSqlConfiguration.SetServerBusyRetryDelay(RetryDelay)`
* New: Now possible to configure the retry count of transient exceptions for
MSSQL and SQLite using the `ISqlConfiguration.SetTransientRetryCount(int)`
* Fixed: Added MSSQL error codes `10928`, `10929`, `18401` and `40540` as well
as a few native `Win32Exception` exceptions to the list treated as transient
errors, i.e., EventFlow will automatically retry if the server returns one
of these

### New in 0.47.2894 (released 2017-06-28)

Expand Down
4 changes: 4 additions & 0 deletions Source/EventFlow.MsSql/IMsSqlConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

using EventFlow.Core;
using EventFlow.Sql.Connections;

namespace EventFlow.MsSql
{
public interface IMsSqlConfiguration : ISqlConfiguration<IMsSqlConfiguration>
{
RetryDelay ServerBusyRetryDelay { get; }

IMsSqlConfiguration SetServerBusyRetryDelay(RetryDelay retryDelay);
}
}
14 changes: 14 additions & 0 deletions Source/EventFlow.MsSql/MsSqlConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

using System;
using EventFlow.Core;
using EventFlow.Sql.Connections;

namespace EventFlow.MsSql
Expand All @@ -32,5 +34,17 @@ public class MsSqlConfiguration : SqlConfiguration<IMsSqlConfiguration>, IMsSqlC
private MsSqlConfiguration()
{
}

// From official documentation on MSDN: "The service is currently busy. Retry the request after 10 seconds"
public RetryDelay ServerBusyRetryDelay { get; private set; } = RetryDelay.Between(
TimeSpan.FromSeconds(10),
TimeSpan.FromSeconds(15));

public IMsSqlConfiguration SetServerBusyRetryDelay(RetryDelay retryDelay)
{
ServerBusyRetryDelay = retryDelay;

return this;
}
}
}
165 changes: 109 additions & 56 deletions Source/EventFlow.MsSql/RetryStrategies/MsSqlErrorRetryStrategy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data.SqlClient;
using System.Linq;
using EventFlow.Core;
using EventFlow.Logs;

Expand All @@ -32,7 +35,6 @@ public class MsSqlErrorRetryStrategy : IMsSqlErrorRetryStrategy
{
private readonly ILog _log;
private readonly IMsSqlConfiguration _msSqlConfiguration;
private static readonly Random Random = new Random();

public MsSqlErrorRetryStrategy(
ILog log,
Expand All @@ -42,73 +44,124 @@ public MsSqlErrorRetryStrategy(
_msSqlConfiguration = msSqlConfiguration;
}

public virtual Retry ShouldThisBeRetried(Exception exception, TimeSpan totalExecutionTime, int currentRetryCount)
public virtual Retry ShouldThisBeRetried(
Exception exception,
TimeSpan totalExecutionTime,
int currentRetryCount)
{
// List of possible errors inspired by Azure SqlDatabaseTransientErrorDetectionStrategy

var sqlException = exception as SqlException;
if (sqlException == null || currentRetryCount > 2)
if (sqlException == null || currentRetryCount > _msSqlConfiguration.TransientRetryCount)
{
return Retry.No;
}

switch (sqlException.Number)
var retry = Enumerable.Empty<Retry>()
.Concat(CheckErrorCode(sqlException))
.Concat(CheckInnerException(sqlException))
.FirstOrDefault();

return retry ?? Retry.No;
}

private IEnumerable<Retry> CheckErrorCode(SqlException sqlException)
{
foreach (SqlError sqlExceptionError in sqlException.Errors)
{
// SQL Error Code: 40501
// The service is currently busy. Retry the request after 10 seconds.
case 40501:
// ReSharper disable once SwitchStatementMissingSomeCases
switch (sqlExceptionError.Number)
{
// SQL Error Code: 40501
// The service is currently busy. Retry the request after 10 seconds.
case 40501:
{
var delay = TimeSpan.FromMilliseconds(5000 + (10000 * Random.NextDouble()));
var delay = _msSqlConfiguration.ServerBusyRetryDelay.PickDelay();
_log.Warning(
"MSSQL server returned error 40501 which means it too busy! Trying to wait {0:0.###} (random between 5 and 15 seconds)",
"MSSQL server returned error 40501 which means it too busy and asked us to wait 10 seconds! Trying to wait {0:0.###} seconds.",
delay.TotalSeconds);
return Retry.YesAfter(delay);
yield return Retry.YesAfter(delay);
yield break;
}

// SQL Error Code: 40197
// The service has encountered an error processing your request. Please try again.
case 40197:

// SQL Error Code: 10053
// A transport-level error has occurred when receiving results from the server.
// An established connection was aborted by the software in your host machine.
case 10053:

// SQL Error Code: 10054
// A transport-level error has occurred when sending the request to the server.
// (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
case 10054:

// SQL Error Code: 10060
// A network-related or instance-specific error occurred while establishing a connection to SQL Server.
// The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server
// is configured to allow remote connections. (provider: TCP Provider, error: 0 - A connection attempt failed
// because the connected party did not properly respond after a period of time, or established connection failed
// because connected host has failed to respond.)"}
case 10060:

// SQL Error Code: 40613
// Database XXXX on server YYYY is not currently available. Please retry the connection later. If the problem persists, contact customer
// support, and provide them the session tracing ID of ZZZZZ.
case 40613:

// SQL Error Code: 40143
// The service has encountered an error processing your request. Please try again.
case 40143:

// SQL Error Code: 233
// The client was unable to establish a connection because of an error during connection initialization process before login.
// Possible causes include the following: the client tried to connect to an unsupported version of SQL Server; the server was too busy
// to accept new connections; or there was a resource limitation (insufficient memory or maximum allowed connections) on the server.
// (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
case 233:

// SQL Error Code: 64
// A connection was successfully established with the server, but then an error occurred during the login process.
// (provider: TCP Provider, error: 0 - The specified network name is no longer available.)
case 64:
return Retry.YesAfter(_msSqlConfiguration.TransientRetryDelay.PickDelay());

default:
return Retry.No;
// SQL Error Code: 40613
// Database XXXX on server YYYY is not currently available. Please retry the connection later. If the problem persists, contact customer
// support, and provide them the session tracing ID of ZZZZZ.
case 40613:

// SQL Error Code: 40540
// The service has encountered an error processing your request. Please try again.
case 40540:

// SQL Error Code: 40197
// The service has encountered an error processing your request. Please try again.
case 40197:

// SQL Error Code: 40143
// The service has encountered an error processing your request. Please try again.
case 40143:

// SQL Error Code: 18401
// Login failed for user '%s'. Reason: Server is in script upgrade mode. Only administrator can connect at this time.
// Devnote: this can happen when SQL is going through recovery (e.g. after failover)
case 18401:

// SQL Error Code: 10929
// Resource ID: %d. The %s minimum guarantee is %d, maximum limit is %d and the current usage for the database is %d.
// However, the server is currently too busy to support requests greater than %d for this database.
case 10929:

// SQL Error Code: 10928
// Resource ID: %d. The %s limit for the database is %d and has been reached.
case 10928:

// SQL Error Code: 10060
// A network-related or instance-specific error occurred while establishing a connection to SQL Server.
// The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server
// is configured to allow remote connections. (provider: TCP Provider, error: 0 - A connection attempt failed
// because the connected party did not properly respond after a period of time, or established connection failed
// because connected host has failed to respond.)"}
case 10060:

// SQL Error Code: 10054
// A transport-level error has occurred when sending the request to the server.
// (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
case 10054:

// SQL Error Code: 10053
// A transport-level error has occurred when receiving results from the server.
// An established connection was aborted by the software in your host machine.
case 10053:

// SQL Error Code: 233
// The client was unable to establish a connection because of an error during connection initialization process before login.
// Possible causes include the following: the client tried to connect to an unsupported version of SQL Server; the server was too busy
// to accept new connections; or there was a resource limitation (insufficient memory or maximum allowed connections) on the server.
// (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
case 233:

// SQL Error Code: 64
// A connection was successfully established with the server, but then an error occurred during the login process.
// (provider: TCP Provider, error: 0 - The specified network name is no longer available.)
case 64:
yield return Retry.YesAfter(_msSqlConfiguration.TransientRetryDelay.PickDelay());
yield break;
}
}
}

private IEnumerable<Retry> CheckInnerException(SqlException sqlException)
{
// Prelogin failure can happen due to waits expiring on windows handles. Or
// due to bugs in the gateway code, a dropped database with a pooled connection
// when reset results in a timeout error instead of immediate failure.

var win32Exception = sqlException.InnerException as Win32Exception;
if (win32Exception == null) yield break;

if (win32Exception.NativeErrorCode == 0x102 || win32Exception.NativeErrorCode == 0x121)
{
yield return Retry.YesAfter(_msSqlConfiguration.TransientRetryDelay.PickDelay());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public SQLiteErrorRetryStrategy(
public Retry ShouldThisBeRetried(Exception exception, TimeSpan totalExecutionTime, int currentRetryCount)
{
var sqLiteException = exception as SQLiteException;
if (sqLiteException == null || currentRetryCount > 2)
if (sqLiteException == null || currentRetryCount > _configuration.TransientRetryCount)
{
return Retry.No;
}
Expand Down
4 changes: 3 additions & 1 deletion Source/EventFlow.Sql/Connections/ISqlConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ namespace EventFlow.Sql.Connections
public interface ISqlConfiguration<out T>
where T : ISqlConfiguration<T>
{
RetryDelay TransientRetryDelay { get; }
string ConnectionString { get; }
RetryDelay TransientRetryDelay { get; }
int TransientRetryCount { get; }

T SetTransientRetryDelay(RetryDelay retryDelay);
T SetTransientRetryCount(int retryCount);
}
}
10 changes: 10 additions & 0 deletions Source/EventFlow.Sql/Connections/SqlConfiguration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ public abstract class SqlConfiguration<T> : ISqlConfiguration<T>
TimeSpan.FromMilliseconds(50),
TimeSpan.FromMilliseconds(100));

public int TransientRetryCount { get; private set; } = 2;

public T SetConnectionString(string connectionString)
{
ConnectionString = connectionString;
Expand All @@ -50,5 +52,13 @@ public T SetTransientRetryDelay(RetryDelay retryDelay)
// Are there alternatives to this double cast?
return (T)(object)this;
}

public T SetTransientRetryCount(int retryCount)
{
TransientRetryCount = retryCount;

// Are there alternatives to this double cast?
return (T)(object)this;
}
}
}
10 changes: 9 additions & 1 deletion Source/EventFlow/Core/Retry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

using System;
using System.Collections.Generic;
using EventFlow.ValueObjects;

namespace EventFlow.Core
{
public class Retry
public class Retry : ValueObject
{
public static Retry Yes { get; } = new Retry(true, TimeSpan.Zero);
public static Retry YesAfter(TimeSpan retryAfter) => new Retry(true, retryAfter);
Expand All @@ -44,5 +46,11 @@ private Retry(bool shouldBeRetried, TimeSpan retryAfter)
ShouldBeRetried = shouldBeRetried;
RetryAfter = retryAfter;
}

protected override IEnumerable<object> GetEqualityComponents()
{
yield return ShouldBeRetried;
yield return RetryAfter;
}
}
}

0 comments on commit e827d61

Please sign in to comment.