Skip to content
11 changes: 11 additions & 0 deletions NEWS.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,16 @@ https://github.com/networkupstools/nut/milestone/13
ended up as literal HTML on KDE Plasma 6; now that rich text formatting
is only used for main window status labels. [PR #3430]

- `upsd` data server updates:
* If we hit "Too many open files" during configuration reload, close
the oldest client connection and retry. [issue #3365]
* If the `MAXCONN` requested in the configuration file exceeds the OS
allowance on open file descriptors, fail early since the requested
configuration can not be guaranteed and can mis-fire unexpectedly
much later (tell the sysadmin to increase `ulimit` or set up a more
conservative `MAXCONN`). If there is a separate soft and hard limit,
and `MAXCONN` exceeds the soft limit, try to raise the bar. [issue #3365]

- Recipes, CI and helper script updates not classified above:
* Introduced `ci_build.sh` settings and respective CI workflow settings
to optionally re-use a `config.cache` file from older runs, and similar
Expand All @@ -127,6 +137,7 @@ https://github.com/networkupstools/nut/milestone/13
dependencies were not previously discovered and bundled). [issue #3420,
PRs #3429, #3432]


Release notes for NUT 2.8.5 - what's new since 2.8.4
----------------------------------------------------

Expand Down
7 changes: 7 additions & 0 deletions UPGRADING.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ Changes from 2.8.5 to 2.8.6
in this regard, at the cost of adding arguments to methods introduced in
the previous release. [issue #3331, PR #3408]

- Potentially a breaking change for existing deployments with a (large)
`MAXCONN` setting in `upsd.conf`: now this value is checked against the
`getrlimit()` (e.g. `ulimit -n`) setting of the operating system for this
daemon, where available, and the `upsd` data server would refuse to start
if the requested value is larger than what is allowed (minus some reserve
for configuration files and other use-cases). [issue #3365]


Changes from 2.8.4 to 2.8.5
---------------------------
Expand Down
1 change: 1 addition & 0 deletions docs/nut.dict
Original file line number Diff line number Diff line change
Expand Up @@ -2242,6 +2242,7 @@ getenv
gethostbyname
getopt
getproctag
getrlimit
getter
getters
gettext
Expand Down
35 changes: 33 additions & 2 deletions server/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "netssl.h"
#include "nut_stdint.h"
#include <ctype.h>
#include <errno.h>

static ups_t *upstable = NULL;
int num_ups = 0;
Expand Down Expand Up @@ -420,7 +421,12 @@ void load_upsdconf(int reloading)

pconf_init(&ctx, upsd_conf_err);

retry:
if (!pconf_file_begin(&ctx, fn)) {
if (errno == EMFILE && reloading == 2) {
close_oldest_client();
goto retry;
}
pconf_finish(&ctx);

if (!reloading)
Expand Down Expand Up @@ -491,6 +497,24 @@ void load_upsdconf(int reloading)
pconf_finish(&ctx);
}

static int load_upsconf(int reloading) {
int ret;

ret = read_upsconf(0); /* 0 = do not abort fatally just yet */
if (ret == -1) {
if (errno == EMFILE && reloading == 2) {
upsdebugx(1, "%s: close an oldest client connection and try reading config again", __func__);
close_oldest_client();
ret = read_upsconf(1); /* 1 = may abort upon fundamental errors */
} else {
/* Not fatalx(), the method above already reported the problem */
exit(EXIT_FAILURE);
}
}

return ret;
}

/* callback during parsing of ups.conf */
void do_upsconf_args(char *upsname, char *var, char *val)
{
Expand Down Expand Up @@ -654,12 +678,19 @@ static int check_file(const char *fn)
{
char chkfn[NUT_PATH_MAX];
FILE *f;
int retries = 0;

snprintf(chkfn, sizeof(chkfn), "%s/%s", confpath(), fn);

retry:
f = fopen(chkfn, "r");

if (!f) {
if (errno == EMFILE && retries < 10) {
close_oldest_client();
retries++;
goto retry;
}
upslog_with_errno(LOG_ERR, "Reload failed: can't open %s", chkfn);
return 0; /* failed */
}
Expand Down Expand Up @@ -687,11 +718,11 @@ void conf_reload(void)
}

/* reload from ups.conf */
read_upsconf(1); /* 1 = may abort upon fundamental errors */
load_upsconf(2); /* 2 = reloading, and may retry by closing clients if EMFILE */
upsconf_add(1); /* 1 = reloading */

/* now reread upsd.conf */
load_upsdconf(1); /* 1 = reloading */
load_upsdconf(2); /* 2 = reloading, and may retry by closing clients if EMFILE */

/* now delete all UPS entries that didn't get reloaded */

Expand Down
120 changes: 117 additions & 3 deletions server/upsd.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@
# include <signal.h>
/* #include <poll.h> */
# endif
# ifdef HAVE_SYS_RESOURCE_H
# include <sys/resource.h> /* for getrlimit() and struct rlimit */
# endif
#else /* WIN32 */
/* Those 2 files for support of getaddrinfo, getnameinfo and freeaddrinfo
on Windows 2000 and older versions */
Expand Down Expand Up @@ -94,12 +97,14 @@ int allow_no_device = 0;
*/
int allow_not_all_listeners = 0;

/* preloaded to POSIX sysconf(_SC_OPEN_MAX) or WIN32 MAX_WAIT_OBJECTS in main
/* Preloaded to POSIX sysconf(_SC_OPEN_MAX) or WIN32 MAX_WAIT_OBJECTS in main
* and elsewhere, the run-time value can be overridden via upsd.conf `MAXCONN`
* option (may cause partial waits chunk by chunk, if sysmaxconn is smaller).
* The sysmaxconn_hard is derived from getrlimit() (aka `ulimit` on allowed
* opened file descriptors) where available.
*/
nfds_t maxconn = 0;
static nfds_t sysmaxconn = 0;
static nfds_t sysmaxconn = 0, sysmaxconn_hard = 0;

/* preloaded to STATEPATH in main, can be overridden via upsd.conf */
char *statepath = NULL;
Expand Down Expand Up @@ -1258,12 +1263,68 @@ static void update_sysmaxconn(void)
char *s = getenv("NUT_SYSMAXCONN_LIMIT");

#ifndef WIN32
# ifdef HAVE_SYS_RESOURCE_H
struct rlimit limit;
# endif /* HAVE_SYS_RESOURCE_H */

/* default to system limit (may be overridden in upsd.conf) */
/* FIXME: Check for overflows (and int size of nfds_t vs. long) - see get_max_pid_t() for example */
l = sysconf(_SC_OPEN_MAX);

# ifdef HAVE_SYS_RESOURCE_H
/* Try to use getrlimit/setrlimit to detect and possibly increase the limit */
if (getrlimit(RLIMIT_NOFILE, &limit) == 0) {
upsdebugx(2, "%s: System file descriptor limits: soft=%ld, hard=%ld",
__func__, (long)limit.rlim_cur, (long)limit.rlim_max);

/* If we requested a specific MAXCONN, try to ensure we have enough FDs */
if (maxconn > 0) {
rlim_t needed = (rlim_t)maxconn + RESERVE_FD_COUNT_UPSD;

if (limit.rlim_cur < needed) {
if (needed <= limit.rlim_max) {
upslogx(LOG_INFO, "Increasing file descriptor limit to %ld", (long)needed);

limit.rlim_cur = needed;
if (setrlimit(RLIMIT_NOFILE, &limit) != 0) {
upslog_with_errno(LOG_WARNING, "setrlimit(RLIMIT_NOFILE) to %ld failed", (long)needed);
}
} else {
upslogx(LOG_WARNING, "WARNING: Requested MAXCONN %" PRIdMAX
" requires %ld FDs overall "
"(with %ld reserved for non-connection purposes), "
"but system hard limit is %ld",
(intmax_t)maxconn, (long)needed,
(long)RESERVE_FD_COUNT_UPSD,
(long)limit.rlim_max);

/* We might still try to bump to hard limit */
if (limit.rlim_cur < limit.rlim_max) {
limit.rlim_cur = limit.rlim_max;
setrlimit(RLIMIT_NOFILE, &limit);
}
}
}
}

/* Refresh limit after possible update */
getrlimit(RLIMIT_NOFILE, &limit);
sysmaxconn_hard = (long)limit.rlim_cur;
} else {
# endif /* HAVE_SYS_RESOURCE_H */
/* Fallback to sysconf if getrlimit fails or is absent */
/* TOTHINK: Any other reasonable fallback hard limit? */
sysmaxconn_hard = (nfds_t)l;
# ifdef HAVE_SYS_RESOURCE_H
}
# endif /* HAVE_SYS_RESOURCE_H */

#else /* WIN32 */
/* hard-coded 64 (from ddk/wdm.h or winnt.h) */
l = (long)MAXIMUM_WAIT_OBJECTS;

/* No known limit, do not check */
sysmaxconn_hard = 0;
#endif /* WIN32 */

if (l < 1) {
Expand All @@ -1276,11 +1337,35 @@ static void update_sysmaxconn(void)
l);
}

if (sysmaxconn_hard > 0 && sysmaxconn_hard < RESERVE_FD_COUNT_UPSD + 10) {
fatalx(EXIT_FAILURE,
"System reported an absurd value %ld (below the %ld reservation for\n"
"non-connection purposes and some 10 for driver/client/... connections)\n"
"as its hard maximum number of connections.\n"
"The server won't start until this problem is resolved.\n",
(long)sysmaxconn_hard, (long)RESERVE_FD_COUNT_UPSD);
}

/* Note this historically also serves as
* the initial/default MAXCONN setting
* (so site/platform-dependent).
*/
sysmaxconn = (nfds_t)l;
if (sysmaxconn_hard > 0) {
if (l < RESERVE_FD_COUNT_UPSD + 10) {
fatalx(EXIT_FAILURE,
"System reported an absurd value %ld (below the %ld reservation for\n"
"non-connection purposes and some 10 for driver/client/... connections)\n"
"as its sysconf maximum number of connections.\n"
"The server won't start until this problem is resolved.\n",
l, (long)RESERVE_FD_COUNT_UPSD);
}

sysmaxconn = (nfds_t)(l - RESERVE_FD_COUNT_UPSD);
} else {
/* No known limit on open FDs/handles, whether connections or files or other streams */
sysmaxconn = (nfds_t)l;
}

if (maxconn < 1) {
upsdebugx(1, "%s: defaulting maxconn to sysmaxconn: %ld",
__func__, l);
Expand Down Expand Up @@ -1308,6 +1393,15 @@ static void poll_reload(void)
/* Not likely this would change, but refresh just in case */
update_sysmaxconn();

if (sysmaxconn_hard > 0 && (maxconn > sysmaxconn_hard - RESERVE_FD_COUNT_UPSD)) {
fatalx(EXIT_FAILURE,
"You requested %" PRIdMAX " as maximum number of connections,\n"
"but the system only allows %" PRIdMAX " and we need %d for ourselves.\n"
"The server won't start until this problem is resolved\n"
"(reduce MAXCONN or increase ulimit or similar settings).\n",
(intmax_t)maxconn, (intmax_t)sysmaxconn, RESERVE_FD_COUNT_UPSD);
}

if ((intmax_t)sysmaxconn < (intmax_t)maxconn) {
upslogx(LOG_WARNING,
"Your system limits the maximum number of connections to %" PRIdMAX "\n"
Expand Down Expand Up @@ -1593,6 +1687,7 @@ static void mainloop(void)
if (reload_flag) {
upsnotify(NOTIFY_STATE_RELOADING, NULL);
conf_reload();
/* Among other things, re-detect sysmaxconn after loading config, because MAXCONN might have changed */
poll_reload();
reload_flag = 0;
upsnotify(NOTIFY_STATE_READY, NULL);
Expand Down Expand Up @@ -2419,6 +2514,22 @@ void check_perms(const char *fn)
#endif /* WIN32 */
}

void close_oldest_client(void)
{
nut_ctype_t *client, *oldest = NULL;

for (client = firstclient; client; client = client->next) {
if (!oldest || client->last_heard < oldest->last_heard) {
oldest = client;
}
}

if (oldest) {
upslogx(LOG_INFO, "Closing oldest client connection from %s to free up file descriptors", oldest->addr);
client_disconnect(oldest);
}
}

int main(int argc, char **argv)
{
int opt_ret = 0, cmdret = 0, foreground = -1;
Expand Down Expand Up @@ -2708,6 +2819,9 @@ int main(int argc, char **argv)
/* handle upsd.conf */
load_upsdconf(0); /* 0 = initial */

/* Re-detect sysmaxconn after loading config, because MAXCONN might have changed */
update_sysmaxconn();

/* CLI debug level can not be smaller than debug_min specified
* in upsd.conf. Note that non-zero debug_min does not impact
* foreground running mode.
Expand Down
Loading
Loading