2 * Copyright (C) 2017 Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
4 * SPDX-License-Identifier: GPL-2.0-only
9 #include <netinet/tcp.h>
11 #include <sys/socket.h>
14 #include <common/compat/getenv.h>
15 #include <common/time.h>
16 #include <common/defaults.h>
17 #include <common/config/session-config.h>
19 #include "tcp_keep_alive.h"
21 #define SOLARIS_IDLE_TIME_MIN_S 10
22 #define SOLARIS_IDLE_TIME_MAX_S 864000 /* 10 days */
23 #define SOLARIS_ABORT_THRESHOLD_MIN_S 1
24 #define SOLARIS_ABORT_THRESHOLD_MAX_S 480 /* 8 minutes */
26 /* Per-platform definitions of TCP socket options. */
27 #if defined (__linux__)
29 #define COMPAT_TCP_LEVEL SOL_TCP
30 #define COMPAT_TCP_ABORT_THRESHOLD 0 /* Does not exist on linux. */
31 #define COMPAT_TCP_KEEPIDLE TCP_KEEPIDLE
32 #define COMPAT_TCP_KEEPINTVL TCP_KEEPINTVL
33 #define COMPAT_TCP_KEEPCNT TCP_KEEPCNT
35 #elif defined (__sun__) /* ! defined (__linux__) */
37 #define COMPAT_TCP_LEVEL IPPROTO_TCP
39 #ifdef TCP_KEEPALIVE_THRESHOLD
40 #define COMPAT_TCP_KEEPIDLE TCP_KEEPALIVE_THRESHOLD
41 #else /* ! defined (TCP_KEEPALIVE_THRESHOLD) */
42 #define COMPAT_TCP_KEEPIDLE 0
43 #endif /* TCP_KEEPALIVE_THRESHOLD */
45 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
46 #define COMPAT_TCP_ABORT_THRESHOLD TCP_KEEPALIVE_ABORT_THRESHOLD
47 #else /* ! defined (TCP_KEEPALIVE_ABORT_THRESHOLD) */
48 #define COMPAT_TCP_ABORT_THRESHOLD 0
49 #endif /* TCP_KEEPALIVE_ABORT_THRESHOLD */
51 #define COMPAT_TCP_KEEPINTVL 0 /* Does not exist on Solaris. */
52 #define COMPAT_TCP_KEEPCNT 0 /* Does not exist on Solaris. */
54 #else /* ! defined (__linux__) && ! defined (__sun__) */
56 #define COMPAT_TCP_LEVEL 0
57 #define COMPAT_TCP_ABORT_THRESHOLD 0
58 #define COMPAT_TCP_KEEPIDLE 0
59 #define COMPAT_TCP_KEEPINTVL 0
60 #define COMPAT_TCP_KEEPCNT 0
62 #endif /* ! defined (__linux__) && ! defined (__sun__) */
64 struct tcp_keep_alive_support
{
65 /* TCP keep-alive is supported by this platform. */
67 /* Overriding idle-time per socket is supported by this platform. */
68 bool idle_time_supported
;
70 * Overriding probe interval per socket is supported by this
73 bool probe_interval_supported
;
75 * Configuring max probe count per socket is supported by this
78 bool max_probe_count_supported
;
79 /* Overriding on a per-socket basis is supported by this platform. */
80 bool abort_threshold_supported
;
83 struct tcp_keep_alive_config
{
84 /* Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV environment variable. */
87 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV environment
92 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
93 * environment variable.
97 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
98 * environment variable.
102 * Maps to the LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
103 * environment variable.
108 static struct tcp_keep_alive_config the_config
= {.enabled
= false,
110 .probe_interval
= -1,
111 .max_probe_count
= -1,
112 .abort_threshold
= -1};
114 static struct tcp_keep_alive_support the_support
= {.supported
= false,
115 .idle_time_supported
= false,
116 .probe_interval_supported
= false,
117 .max_probe_count_supported
= false,
118 .abort_threshold_supported
= false};
121 * Common parser for string to positive int conversion where the value must be
122 * in range [-1, INT_MAX].
124 * Returns -2 on invalid value.
127 int get_env_int(const char *env_var
,
135 tmp
= strtol(value
, &endptr
, 0);
137 ERR("%s cannot be parsed.", env_var
);
138 PERROR("errno for previous parsing failure");
143 if (endptr
== value
|| *endptr
!= '\0') {
144 ERR("%s is not a valid number", env_var
);
150 ERR("%s must be greater or equal to -1", env_var
);
155 ERR("%s is too big. Maximum value is %d", env_var
, INT_MAX
);
166 * Per-platform implementation of tcp_keep_alive_idle_time_modifier.
167 * Returns -2 on invalid value.
172 int convert_idle_time(int value
)
177 if (value
== -1 || value
== 0) {
178 /* Use system defaults */
184 ERR("Invalid tcp keep-alive idle time (%i)", value
);
190 * Additional constraints for Solaris 11.
191 * Minimum 10s, maximum 10 days. Defined by
192 * https://docs.oracle.com/cd/E23824_01/html/821-1475/tcp-7p.html#REFMAN7tcp-7p
194 if ((value
< SOLARIS_IDLE_TIME_MIN_S
||
195 value
> SOLARIS_IDLE_TIME_MAX_S
)) {
196 ERR("%s must be comprised between %d and %d inclusively on Solaris",
197 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
198 SOLARIS_IDLE_TIME_MIN_S
,
199 SOLARIS_IDLE_TIME_MAX_S
);
204 /* On Solaris idle time is given in milliseconds. */
205 tmp_ms
= ((unsigned int) value
) * MSEC_PER_SEC
;
206 if ((value
!= 0 && (tmp_ms
/ ((unsigned int) value
)) != MSEC_PER_SEC
)
207 || tmp_ms
> INT_MAX
) {
209 const int max_value
= INT_MAX
/ MSEC_PER_SEC
;
211 ERR("%s is too big: maximum supported value is %d",
212 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
218 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
224 #else /* ! defined(__sun__) */
227 int convert_idle_time(int value
)
232 #endif /* ! defined(__sun__) */
234 /* Per-platform support of tcp_keep_alive functionality. */
235 #if defined (__linux__)
238 void tcp_keep_alive_init_support(struct tcp_keep_alive_support
*support
)
240 support
->supported
= true;
241 support
->idle_time_supported
= true;
242 support
->probe_interval_supported
= true;
243 support
->max_probe_count_supported
= true;
244 /* Solaris specific */
245 support
->abort_threshold_supported
= false;
248 #elif defined(__sun__) /* ! defined (__linux__) */
251 void tcp_keep_alive_init_support(struct tcp_keep_alive_support
*support
)
253 support
->supported
= true;
254 #ifdef TCP_KEEPALIVE_THRESHOLD
255 support
->idle_time_supported
= true;
257 support
->idle_time_supported
= false;;
258 #endif /* TCP_KEEPALIVE_THRESHOLD */
261 * Solaris does not support either tcp_keepalive_probes or
262 * tcp_keepalive_intvl.
263 * Inferring a value for TCP_KEEP_ALIVE_ABORT_THRESHOLD using
264 * (tcp_keepalive_probes * tcp_keepalive_intvl) could yield a good
265 * alternative, but Solaris does not detail the algorithm used (such as
266 * constant time retry like Linux).
268 * Ignore those settings on Solaris 11. We prefer exposing an
269 * environment variable only used on Solaris for the abort threshold.
271 support
->probe_interval_supported
= false;
272 support
->max_probe_count_supported
= false;
273 #ifdef TCP_KEEPALIVE_ABORT_THRESHOLD
274 support
->abort_threshold_supported
= true;
276 support
->abort_threshold_supported
= false;
277 #endif /* TCP_KEEPALIVE_THRESHOLD */
280 #else /* ! defined(__sun__) && ! defined(__linux__) */
282 /* Assume nothing is supported on other platforms. */
284 void tcp_keep_alive_init_support(struct tcp_keep_alive_support
*support
)
286 support
->supported
= false;
287 support
->idle_time_supported
= false;
288 support
->probe_interval_supported
= false;
289 support
->max_probe_count_supported
= false;
290 support
->abort_threshold_supported
= false;
293 #endif /* ! defined(__sun__) && ! defined(__linux__) */
298 * Solaris specific modifier for abort threshold.
299 * Return -2 on error.
302 int convert_abort_threshold(int value
)
308 /* Use system defaults */
314 ERR("Invalid tcp keep-alive abort threshold (%i)", value
);
320 * Additional constraints for Solaris 11.
322 * Between 0 and 8 minutes.
323 * https://docs.oracle.com/cd/E19120-01/open.solaris/819-2724/fsvdh/index.html
325 * Restrict from 1 seconds to 8 minutes sice the 0 value goes against
326 * the purpose of dead peers detection by never timing out when probing.
327 * It does NOT mean that the connection times out immediately.
329 if ((value
< SOLARIS_ABORT_THRESHOLD_MIN_S
|| value
> SOLARIS_ABORT_THRESHOLD_MAX_S
)) {
330 ERR("%s must be comprised between %d and %d inclusively on Solaris",
331 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
,
332 SOLARIS_ABORT_THRESHOLD_MIN_S
,
333 SOLARIS_ABORT_THRESHOLD_MAX_S
);
338 /* Abort threshold is given in milliseconds. */
339 tmp_ms
= ((unsigned int) value
) * MSEC_PER_SEC
;
340 if ((value
!= 0 && (tmp_ms
/ ((unsigned int) value
)) != MSEC_PER_SEC
)
341 || tmp_ms
> INT_MAX
) {
343 const int max_value
= INT_MAX
/ MSEC_PER_SEC
;
345 ERR("%s is too big: maximum supported value is %d",
346 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
,
352 /* tmp_ms is >= 0 and <= INT_MAX. Cast is safe. */
361 int convert_abort_threshold(int value
)
366 #endif /* defined (__sun__) */
369 * Retrieve settings from environment variables and warn for settings not
370 * supported by the platform.
373 int tcp_keep_alive_init_config(struct tcp_keep_alive_support
*support
,
374 struct tcp_keep_alive_config
*config
)
379 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV
);
380 if (!support
->supported
) {
382 WARN("Using per-socket TCP keep-alive mechanism is not supported by this platform. Ignoring the %s environment variable.",
383 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV
);
385 config
->enabled
= false;
387 ret
= config_parse_value(value
);
388 if (ret
< 0 || ret
> 1) {
389 ERR("Invalid value for %s", DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ENV
);
393 config
->enabled
= ret
;
395 DBG("TCP keep-alive mechanism %s", config
->enabled
? "enabled": "disabled");
397 /* Get value for tcp_keepalive_time in seconds. */
398 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
);
399 if (!support
->idle_time_supported
&& value
) {
400 WARN("Overriding the TCP keep-alive idle time threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
401 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
);
402 config
->idle_time
= -1;
404 int idle_time_platform
;
405 int idle_time_seconds
;
407 idle_time_seconds
= get_env_int(
408 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
410 if (idle_time_seconds
< -1) {
415 idle_time_platform
= convert_idle_time(idle_time_seconds
);
416 if (idle_time_platform
< -1) {
421 config
->idle_time
= idle_time_platform
;
422 DBG("Overriding %s to %d",
423 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_IDLE_TIME_ENV
,
427 /* Get value for tcp_keepalive_intvl in seconds. */
428 value
= lttng_secure_getenv(
429 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
);
430 if (!support
->probe_interval_supported
&& value
) {
431 WARN("Overriding the TCP keep-alive probe interval time per-socket is not supported by this platform. Ignoring the %s environment variable.",
432 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
);
433 config
->probe_interval
= -1;
437 probe_interval
= get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
,
439 if (probe_interval
< -1) {
444 config
->probe_interval
= probe_interval
;
445 DBG("Overriding %s to %d",
446 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_PROBE_INTERVAL_ENV
,
447 config
->probe_interval
);
450 /* Get value for tcp_keepalive_probes. */
451 value
= lttng_secure_getenv(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
);
452 if (!support
->max_probe_count_supported
&& value
) {
453 WARN("Overriding the TCP keep-alive maximum probe count per-socket is not supported by this platform. Ignoring the %s environment variable.",
454 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
);
455 config
->max_probe_count
= -1;
459 max_probe_count
= get_env_int(DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
,
461 if (max_probe_count
< -1) {
466 config
->max_probe_count
= max_probe_count
;
467 DBG("Overriding %s to %d",
468 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
,
469 config
->max_probe_count
);
472 /* Get value for tcp_keepalive_abort_interval. */
473 value
= lttng_secure_getenv(
474 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
);
475 if (!support
->abort_threshold_supported
&& value
) {
476 WARN("Overriding the TCP keep-alive abort threshold per-socket is not supported by this platform. Ignoring the %s environment variable.",
477 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
);
478 config
->abort_threshold
= -1;
480 int abort_threshold_platform
;
481 int abort_threshold_seconds
;
483 abort_threshold_seconds
= get_env_int(
484 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_MAX_PROBE_COUNT_ENV
,
486 if (abort_threshold_seconds
< -1) {
491 abort_threshold_platform
= convert_abort_threshold(
492 abort_threshold_seconds
);
493 if (abort_threshold_platform
< -1) {
498 config
->abort_threshold
= abort_threshold_platform
;
499 DBG("Overriding %s to %d",
500 DEFAULT_LTTNG_RELAYD_TCP_KEEP_ALIVE_ABORT_THRESHOLD_ENV
,
501 config
->abort_threshold
);
510 /* Initialize the TCP keep-alive configuration. */
511 __attribute__((constructor
)) static
512 void tcp_keep_alive_init(void)
514 tcp_keep_alive_init_support(&the_support
);
515 (void) tcp_keep_alive_init_config(&the_support
, &the_config
);
519 * Set the socket options regarding TCP keep-alive.
522 int socket_apply_keep_alive_config(int socket_fd
)
528 if (!the_support
.supported
|| !the_config
.enabled
) {
533 DBG("TCP keep-alive enabled for socket %d", socket_fd
);
534 ret
= setsockopt(socket_fd
, SOL_SOCKET
, SO_KEEPALIVE
, &val
,
537 PERROR("setsockopt so_keepalive");
541 /* TCP keep-alive idle time */
542 if (the_support
.idle_time_supported
&& the_config
.idle_time
> 0) {
543 DBG("TCP keep-alive keep idle: %d enabled for socket %d",
544 the_config
.idle_time
, socket_fd
);
545 ret
= setsockopt(socket_fd
, COMPAT_TCP_LEVEL
,
546 COMPAT_TCP_KEEPIDLE
, &the_config
.idle_time
,
547 sizeof(the_config
.idle_time
));
549 PERROR("setsockopt TCP_KEEPIDLE");
553 /* TCP keep-alive probe interval */
554 if (the_support
.probe_interval_supported
&&
555 the_config
.probe_interval
> 0) {
556 DBG("TCP keep-alive probe_interval: %d enabled for socket %d",
557 the_config
.probe_interval
, socket_fd
);
558 ret
= setsockopt(socket_fd
, COMPAT_TCP_LEVEL
,
559 COMPAT_TCP_KEEPINTVL
,
560 &the_config
.probe_interval
,
561 sizeof(the_config
.probe_interval
));
563 PERROR("setsockopt TCP_KEEPINTVL");
568 /* TCP keep-alive max probe count */
569 if (the_support
.max_probe_count_supported
&&
570 the_config
.max_probe_count
> 0) {
571 DBG("TCP keep-alive max_probe: %d enabled for socket %d",
572 the_config
.max_probe_count
, socket_fd
);
573 ret
= setsockopt(socket_fd
, COMPAT_TCP_LEVEL
,
574 COMPAT_TCP_KEEPCNT
, &the_config
.max_probe_count
,
575 sizeof(the_config
.max_probe_count
));
577 PERROR("setsockopt TCP_KEEPCNT");
582 /* TCP keep-alive abort threshold */
583 if (the_support
.abort_threshold_supported
&&
584 the_config
.abort_threshold
> 0) {
585 DBG("TCP keep-alive abort threshold: %d enabled for socket %d",
586 the_config
.abort_threshold
, socket_fd
);
587 ret
= setsockopt(socket_fd
, COMPAT_TCP_LEVEL
,
588 COMPAT_TCP_ABORT_THRESHOLD
,
589 &the_config
.abort_threshold
,
590 sizeof(the_config
.max_probe_count
));
592 PERROR("setsockopt TCP_KEEPALIVE_ABORT_THRESHOLD");