2 * Copyright (C) 2017 Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
4 * SPDX-License-Identifier: GPL-2.0-only
8 #include "tcp_keep_alive.hpp"
10 #include <common/compat/getenv.hpp>
11 #include <common/defaults.hpp>
12 #include <common/ini-config/ini-config.hpp>
13 #include <common/time.hpp>
16 #include <netinet/tcp.h>
18 #include <sys/socket.h>
19 #include <sys/types.h>
21 #define SOLARIS_IDLE_TIME_MIN_S 10
22 #define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
23 #define SOLARIS_ABORT_THRESHOLD_MIN_S 1
24 #define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
26 /* Per-platform definitions of TCP socket options. */
27 #if defined(__linux__)
29 #define COMPAT_TCP_LEVEL SOL_TCP
30 #define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
31 #define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
32 #define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
33 #define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
35 #elif defined(__sun__) /* ! defined (__linux__) */
37 #define COMPAT_TCP_LEVEL IPPROTO_TCP
39 #ifdef TCP_KEEPALIVE_THRESHOLD
40 #define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
41 #else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
42 #define COMPAT_TCP_KEEPIDLE 0
43 #endif /* TCP_KEEPALIVE_THRESHOLD */
45 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
46 #define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
47 #else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
48 #define COMPAT_TCP_ABORT_THRESHOLD 0
49 #endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
51 #define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
52 #define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
54 #else /* ! defined (__linux__) && ! defined (__sun__) */
56 #define COMPAT_TCP_LEVEL 0
57 #define COMPAT_TCP_ABORT_THRESHOLD 0
58 #define COMPAT_TCP_KEEPIDLE 0
59 #define COMPAT_TCP_KEEPINTVL 0
60 #define COMPAT_TCP_KEEPCNT 0
62 #endif /* ! defined (__linux__) && ! defined (__sun__) */
65 struct tcp_keep_alive_support
{
66 /* TCP keep-alive is supported by this platform. */
68 /* Overriding idle-time per socket is supported by this platform. */
69 bool idle_time_supported
;
71 * Overriding probe interval per socket is supported by this
74 bool probe_interval_supported
;
76 * Configuring max probe count per socket is supported by this
79 bool max_probe_count_supported
;
80 /* Overriding on a per-socket basis is supported by this platform. */
81 bool abort_threshold_supported
;
84 struct tcp_keep_alive_config
{
85 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
88 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
93 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
94 * environment variable.
98 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
99 * environment variable.
103 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
104 * environment variable.
109 struct tcp_keep_alive_config the_config
= { .enabled
= false,
111 .probe_interval
= -1,
112 .max_probe_count
= -1,
113 .abort_threshold
= -1 };
115 struct tcp_keep_alive_support the_support
= { .supported
= false,
116 .idle_time_supported
= false,
117 .probe_interval_supported
= false,
118 .max_probe_count_supported
= false,
119 .abort_threshold_supported
= false };
123 * Common parser for string to positive int conversion where the value must be
124 * in range [-1, INT_MAX].
126 * Returns -2 on invalid value.
128 static int get_env_int(const char *env_var
, const char *value
)
132 char *endptr
= nullptr;
135 tmp
= strtol(value
, &endptr
, 0);
137 ERR("%s cannot be parsed.", env_var
);
138 PERROR("errno for previous parsing failure");
143 if (endptr
== value
|| *endptr
!= '\0') {
144 ERR("%s is not a valid number", env_var
);
150 ERR("%s must be greater or equal to -1", env_var
);
155 ERR("%s is too big. Maximum value is %d", env_var
, INT_MAX
);
166 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
167 * Returns -2 on invalid value.
171 static int convert_idle_time(int value
)
176 if (value
== -1 || value
== 0) {
177 /* Use system defaults */
183 ERR("Invalid tcp keep-alive idle time (%i)", value
);
189 * Additional constraints for Solaris 11.
190 * Minimum 10s, maximum 10 days. Defined by
191 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
193 if ((value
< SOLARIS_IDLE_TIME_MIN_S
|| value
> SOLARIS_IDLE_TIME_MAX_S
)) {
194 ERR("%s must be comprised between %d and %d inclusively on Solaris",
195 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
196 SOLARIS_IDLE_TIME_MIN_S
,
197 SOLARIS_IDLE_TIME_MAX_S
);
202 /* On Solaris idle time is given in milliseconds. */
203 tmp_ms
= ((unsigned int) value
) * MSEC_PER_SEC
;
204 if ((value
!= 0 && (tmp_ms
/ ((unsigned int) value
)) != MSEC_PER_SEC
) || tmp_ms
> INT_MAX
) {
206 const int max_value
= INT_MAX
/ MSEC_PER_SEC
;
208 ERR("%s is too big: maximum supported value is %d",
209 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
215 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
221 #else /* ! defined(__sun__) */
223 static int convert_idle_time(int value
)
228 #endif /* ! defined(__sun__) */
230 /* Per-platform support of tcp_keep_alive functionality. */
231 #if defined(__linux__)
233 static void tcp_keep_alive_init_support(struct tcp_keep_alive_support
*support
)
235 support
->supported
= true;
236 support
->idle_time_supported
= true;
237 support
->probe_interval_supported
= true;
238 support
->max_probe_count_supported
= true;
239 /* Solaris specific */
240 support
->abort_threshold_supported
= false;
243 #elif defined(__sun__) /* ! defined (__linux__) */
245 static void tcp_keep_alive_init_support(struct tcp_keep_alive_support
*support
)
247 support
->supported
= true;
248 #ifdef TCP_KEEPALIVE_THRESHOLD
249 support
->idle_time_supported
= true;
251 support
->idle_time_supported
= false;
253 #endif /* TCP_KEEPALIVE_THRESHOLD */
256 * Solaris does not support either tcp_keepalive_probes or
257 * tcp_keepalive_intvl.
258 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
259 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
260 * alternative, but Solaris does not detail the algorithm used (such as
261 * constant time retry like Linux).
263 * Ignore those settings on Solaris 11. We prefer exposing an
264 * environment variable only used on Solaris for the abort threshold.
266 support
->probe_interval_supported
= false;
267 support
->max_probe_count_supported
= false;
268 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
269 support
->abort_threshold_supported
= true;
271 support
->abort_threshold_supported
= false;
272 #endif /* TCP_KEEPALIVE_THRESHOLD */
275 #else /* ! defined(__sun__) && ! defined(__linux__) */
277 /* Assume nothing is supported on other platforms. */
278 static void tcp_keep_alive_init_support(struct tcp_keep_alive_support
*support
)
280 support
->supported
= false;
281 support
->idle_time_supported
= false;
282 support
->probe_interval_supported
= false;
283 support
->max_probe_count_supported
= false;
284 support
->abort_threshold_supported
= false;
287 #endif /* ! defined(__sun__) && ! defined(__linux__) */
292 * Solaris specific modifier for abort threshold.
293 * Return -2 on error.
295 static int convert_abort_threshold(int value
)
301 /* Use system defaults */
307 ERR("Invalid tcp keep-alive abort threshold (%i)", value
);
313 * Additional constraints for Solaris 11.
315 * Between 0 and 8 minutes.
316 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
318 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
319 * the purpose of dead peers detection by never timing out when probing.
320 * It does NOT mean that the connection times out immediately.
322 if ((value
< SOLARIS_ABORT_THRESHOLD_MIN_S
|| value
> SOLARIS_ABORT_THRESHOLD_MAX_S
)) {
323 ERR("%s must be comprised between %d and %d inclusively on Solaris",
324 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
,
325 SOLARIS_ABORT_THRESHOLD_MIN_S
,
326 SOLARIS_ABORT_THRESHOLD_MAX_S
);
331 /* Abort threshold is given in milliseconds. */
332 tmp_ms
= ((unsigned int) value
) * MSEC_PER_SEC
;
333 if ((value
!= 0 && (tmp_ms
/ ((unsigned int) value
)) != MSEC_PER_SEC
) || tmp_ms
> INT_MAX
) {
335 const int max_value
= INT_MAX
/ MSEC_PER_SEC
;
337 ERR("%s is too big: maximum supported value is %d",
338 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
,
344 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
352 static int convert_abort_threshold(int value
)
357 #endif /* defined (__sun__) */
360 * Retrieve settings from environment variables and warn for settings not
361 * supported by the platform.
363 static int tcp_keep_alive_init_config(struct tcp_keep_alive_support
*support
,
364 struct tcp_keep_alive_config
*config
)
369 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV
);
370 if (!support
->supported
) {
372 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
373 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV
);
375 config
->enabled
= false;
377 ret
= config_parse_value(value
);
378 if (ret
< 0 || ret
> 1) {
379 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV
);
383 config
->enabled
= ret
;
385 DBG("TCP keep-alive mechanism %s", config
->enabled
? "enabled" : "disabled");
387 /* Get value for tcp_keepalive_time in seconds. */
388 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
);
389 if (!support
->idle_time_supported
&& value
) {
390 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
391 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
);
392 config
->idle_time
= -1;
394 int idle_time_platform
;
395 int idle_time_seconds
;
398 get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
, value
);
399 if (idle_time_seconds
< -1) {
404 idle_time_platform
= convert_idle_time(idle_time_seconds
);
405 if (idle_time_platform
< -1) {
410 config
->idle_time
= idle_time_platform
;
411 DBG("Overriding %s to %d",
412 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
416 /* Get value for tcp_keepalive_intvl in seconds. */
417 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
);
418 if (!support
->probe_interval_supported
&& value
) {
419 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
420 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
);
421 config
->probe_interval
= -1;
426 get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
, value
);
427 if (probe_interval
< -1) {
432 config
->probe_interval
= probe_interval
;
433 DBG("Overriding %s to %d",
434 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
,
435 config
->probe_interval
);
438 /* Get value for tcp_keepalive_probes. */
439 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
);
440 if (!support
->max_probe_count_supported
&& value
) {
441 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
442 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
);
443 config
->max_probe_count
= -1;
448 get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
, value
);
449 if (max_probe_count
< -1) {
454 config
->max_probe_count
= max_probe_count
;
455 DBG("Overriding %s to %d",
456 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
,
457 config
->max_probe_count
);
460 /* Get value for tcp_keepalive_abort_interval. */
461 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
);
462 if (!support
->abort_threshold_supported
&& value
) {
463 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
464 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
);
465 config
->abort_threshold
= -1;
467 int abort_threshold_platform
;
468 int abort_threshold_seconds
;
470 abort_threshold_seconds
=
471 get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
, value
);
472 if (abort_threshold_seconds
< -1) {
477 abort_threshold_platform
= convert_abort_threshold(abort_threshold_seconds
);
478 if (abort_threshold_platform
< -1) {
483 config
->abort_threshold
= abort_threshold_platform
;
484 DBG("Overriding %s to %d",
485 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
,
486 config
->abort_threshold
);
495 /* Initialize the TCP keep-alive configuration. */
496 __attribute__((constructor
)) static void tcp_keep_alive_init()
498 tcp_keep_alive_init_support(&the_support
);
499 (void) tcp_keep_alive_init_config(&the_support
, &the_config
);
503 * Set the socket options regarding TCP keep-alive.
505 int socket_apply_keep_alive_config(int socket_fd
)
511 if (!the_support
.supported
|| !the_config
.enabled
) {
516 DBG("TCP keep-alive enabled for socket %d", socket_fd
);
517 ret
= setsockopt(socket_fd
, SOL_SOCKET
, SO_KEEPALIVE
, &val
, sizeof(val
));
519 PERROR("setsockopt so_keepalive");
523 /* TCP keep-alive idle time */
524 if (the_support
.idle_time_supported
&& the_config
.idle_time
> 0) {
525 DBG("TCP keep-alive keep idle: %d enabled for socket %d",
526 the_config
.idle_time
,
528 ret
= setsockopt(socket_fd
,
531 &the_config
.idle_time
,
532 sizeof(the_config
.idle_time
));
534 PERROR("setsockopt TCP_KEEPIDLE");
538 /* TCP keep-alive probe interval */
539 if (the_support
.probe_interval_supported
&& the_config
.probe_interval
> 0) {
540 DBG("TCP keep-alive probe_interval: %d enabled for socket %d",
541 the_config
.probe_interval
,
543 ret
= setsockopt(socket_fd
,
545 COMPAT_TCP_KEEPINTVL
,
546 &the_config
.probe_interval
,
547 sizeof(the_config
.probe_interval
));
549 PERROR("setsockopt TCP_KEEPINTVL");
554 /* TCP keep-alive max probe count */
555 if (the_support
.max_probe_count_supported
&& the_config
.max_probe_count
> 0) {
556 DBG("TCP keep-alive max_probe: %d enabled for socket %d",
557 the_config
.max_probe_count
,
559 ret
= setsockopt(socket_fd
,
562 &the_config
.max_probe_count
,
563 sizeof(the_config
.max_probe_count
));
565 PERROR("setsockopt TCP_KEEPCNT");
570 /* TCP keep-alive abort threshold */
571 if (the_support
.abort_threshold_supported
&& the_config
.abort_threshold
> 0) {
572 DBG("TCP keep-alive abort threshold: %d enabled for socket %d",
573 the_config
.abort_threshold
,
575 ret
= setsockopt(socket_fd
,
577 COMPAT_TCP_ABORT_THRESHOLD
,
578 &the_config
.abort_threshold
,
579 sizeof(the_config
.max_probe_count
));
581 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");