From 9dfa4f5f5d11560082e903f4cf57435128f0ada4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Gonz=C3=A1lez?= Date: Thu, 20 Jun 2024 18:03:00 +0200 Subject: [PATCH] fix: prevent idle TCP connections from being dropped by OCI middleboxes This happened mainly with interactive SSH connections on which the user is AFK and no terminal activity happened, as the default 2 hour kernel timeout is too long for these cloud firewalls and/or NAT boxes. --- roles/common/tasks/main.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index 8a50960..fa35538 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -195,3 +195,21 @@ name: "{{ ansible_user }}" groups: "{{ sdkman_group }}" append: true + +# sshd and other servers may leverage the kernel TCP keepalives support, but +# shorter keepalive messages are necessary because OCI firewalls/NAT gateways +# are eager to drop connections that are kept idle for much less time than the +# default of two hours considered by the Linux kernel. SSH sessions that get +# inactive are a prime candidate for being dropped because of this. See also: +# https://bugs.launchpad.net/cloud-images/+bug/1838670 +- name: Tweak TCP keepalive time + become: true + ansible.posix.sysctl: + name: net.ipv4.tcp_keepalive_time + value: 120 + +- name: Tweak TCP keepalive maximum probes + become: true + ansible.posix.sysctl: + name: net.ipv4.tcp_keepalive_probes + value: 3