Merge pull request #17438 from anitazha/systoomd_quick

Additional fix ups from #17417
meson: fix setting of ENABLE_OOMD
2020-10-27 18:43:34 +01:00 · 2020-10-27 18:42:49 +01:00 · 2020-10-27 18:01:22 +01:00 · 2020-10-27 14:06:49 +01:00 · 2020-10-27 13:20:40 +01:00 · 2020-10-27 00:20:17 -07:00
19 changed files with 91 additions and 46 deletions
--- a/docs/CODING_STYLE.md
+++ b/docs/CODING_STYLE.md
@ -318,6 +318,14 @@ layout: default
  unlink("/foo/bar/baz");
  ```

+  When returning from a `void` function, you may also want to shorten the error
+  path boilerplate by returning a function invocation cast to `(void)` like so:
+
+  ```c
+  if (condition_not_met)
+          return (void) log_tests_skipped("Cannot run ...");
+  ```
+
  Don't cast function calls to `(void)` that return no error
  conditions. Specifically, the various `xyz_unref()` calls that return a
  `NULL` object shouldn't be cast to `(void)`, since not using the return value
--- a/meson.build
+++ b/meson.build
@ -1422,6 +1422,7 @@ else
        endif
 endif
 conf.set10('ENABLE_OOMD', have)
+substs.set10('ENABLE_OOMD', have)

 want_remote = get_option('remote')
 if want_remote != 'false'
@ -1462,7 +1463,6 @@ foreach term : ['analyze',
                'networkd',
                'nss-myhostname',
                'nss-systemd',
-                'oomd',
                'portabled',
                'pstore',
                'quotacheck',
--- a/src/core/automount.c
+++ b/src/core/automount.c
@ -989,7 +989,7 @@ static int automount_dispatch_io(sd_event_source *s, int fd, uint32_t events, vo
                if (packet.v5_packet.pid > 0) {
                        _cleanup_free_ char *p = NULL;

-                        get_process_comm(packet.v5_packet.pid, &p);
+                        (void) get_process_comm(packet.v5_packet.pid, &p);
                        log_unit_info(UNIT(a), "Got automount request for %s, triggered by %"PRIu32" (%s)", a->where, packet.v5_packet.pid, strna(p));
                } else
                        log_unit_debug(UNIT(a), "Got direct mount request on %s", a->where);
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@ -2422,6 +2422,29 @@ void unit_release_cgroup(Unit *u) {
        }
 }

+bool unit_maybe_release_cgroup(Unit *u) {
+        int r;
+
+        assert(u);
+
+        if (!u->cgroup_path)
+                return true;
+
+        /* Don't release the cgroup if there are still processes under it. If we get notified later when all the
+         * processes exit (e.g. the processes were in D-state and exited after the unit was marked as failed)
+         * we need the cgroup paths to continue to be tracked by the manager so they can be looked up and cleaned
+         * up later. */
+        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
+        if (r < 0)
+                log_unit_debug_errno(u, r, "Error checking if the cgroup is recursively empty, ignoring: %m");
+        else if (r == 1) {
+                unit_release_cgroup(u);
+                return true;
+        }
+
+        return false;
+}
+
 void unit_prune_cgroup(Unit *u) {
        int r;
        bool is_root_slice;
@ -2449,7 +2472,8 @@ void unit_prune_cgroup(Unit *u) {
        if (is_root_slice)
                return;

-        unit_release_cgroup(u);
+        if (!unit_maybe_release_cgroup(u)) /* Returns true if the cgroup was released */
+                return;

        u->cgroup_realized = false;
        u->cgroup_realized_mask = 0;
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@ -223,11 +223,15 @@ int unit_set_cgroup_path(Unit *u, const char *path);
 int unit_pick_cgroup_path(Unit *u);

 int unit_realize_cgroup(Unit *u);
-void unit_release_cgroup(Unit *u);
 void unit_prune_cgroup(Unit *u);
 int unit_watch_cgroup(Unit *u);
 int unit_watch_cgroup_memory(Unit *u);

+void unit_release_cgroup(Unit *u);
+/* Releases the cgroup only if it is recursively empty.
+ * Returns true if the cgroup was released, false otherwise. */
+bool unit_maybe_release_cgroup(Unit *u);
+
 void unit_add_to_cgroup_empty_queue(Unit *u);
 int unit_check_oomd_kill(Unit *u);
 int unit_check_oom(Unit *u);
--- a/src/core/killall.c
+++ b/src/core/killall.c
@ -214,7 +214,7 @@ static int killall(int sig, Set *pids, bool send_sighup) {
                if (sig == SIGKILL) {
                        _cleanup_free_ char *s = NULL;

-                        get_process_comm(pid, &s);
+                        (void) get_process_comm(pid, &s);
                        log_notice("Sending SIGKILL to PID "PID_FMT" (%s).", pid, strna(s));
                }

--- a/src/core/scope.c
+++ b/src/core/scope.c
@ -487,6 +487,11 @@ static void scope_notify_cgroup_empty_event(Unit *u) {

        if (IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
                scope_enter_dead(s, SCOPE_SUCCESS);
+
+        /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
+         * up the cgroup earlier and should do it now. */
+        if (IN_SET(s->state, SCOPE_DEAD, SCOPE_FAILED))
+                unit_prune_cgroup(u);
 }

 static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
--- a/src/core/service.c
+++ b/src/core/service.c
@ -3330,6 +3330,13 @@ static void service_notify_cgroup_empty_event(Unit *u) {

                break;

+        /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
+         * up the cgroup earlier and should do it now. */
+        case SERVICE_DEAD:
+        case SERVICE_FAILED:
+                unit_prune_cgroup(u);
+                break;
+
        default:
                ;
        }
--- a/src/journal/journald-console.c
+++ b/src/journal/journald-console.c
@ -66,7 +66,7 @@ void server_forward_console(
        /* Second: identifier and PID */
        if (ucred) {
                if (!identifier) {
-                        get_process_comm(ucred->pid, &ident_buf);
+                        (void) get_process_comm(ucred->pid, &ident_buf);
                        identifier = ident_buf;
                }

--- a/src/journal/journald-kmsg.c
+++ b/src/journal/journald-kmsg.c
@ -58,7 +58,7 @@ void server_forward_kmsg(
        /* Second: identifier and PID */
        if (ucred) {
                if (!identifier) {
-                        get_process_comm(ucred->pid, &ident_buf);
+                        (void) get_process_comm(ucred->pid, &ident_buf);
                        identifier = ident_buf;
                }

--- a/src/journal/journald-syslog.c
+++ b/src/journal/journald-syslog.c
@ -153,7 +153,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
        /* Third: identifier and PID */
        if (ucred) {
                if (!identifier) {
-                        get_process_comm(ucred->pid, &ident_buf);
+                        (void) get_process_comm(ucred->pid, &ident_buf);
                        identifier = ident_buf;
                }

--- a/src/journal/journald-wall.c
+++ b/src/journal/journald-wall.c
@ -27,7 +27,7 @@ void server_forward_wall(

        if (ucred) {
                if (!identifier) {
-                        get_process_comm(ucred->pid, &ident_buf);
+                        (void) get_process_comm(ucred->pid, &ident_buf);
                        identifier = ident_buf;
                }

--- a/src/login/loginctl.c
+++ b/src/login/loginctl.c
@ -485,7 +485,7 @@ static int print_session_status_info(sd_bus *bus, const char *path, bool *new_li

                printf("\t  Leader: %"PRIu32, i.leader);

-                get_process_comm(i.leader, &t);
+                (void) get_process_comm(i.leader, &t);
                if (t)
                        printf(" (%s)", t);

--- a/src/network/networkd-link.c
+++ b/src/network/networkd-link.c
@ -2782,7 +2782,7 @@ int link_update(Link *link, sd_netlink_message *m) {
                if (r < 0)
                        return log_link_warning_errno(link, r, "Could not update MAC address in DHCPv6 client: %m");

-                r = dhcp6_update_mac(link);
+                r = radv_update_mac(link);
                if (r < 0)
                        return log_link_warning_errno(link, r, "Could not update MAC address for Router Advertisement: %m");

--- a/src/systemctl/systemctl-logind.c
+++ b/src/systemctl/systemctl-logind.c
@ -146,7 +146,7 @@ int logind_check_inhibitors(enum action a) {
                                          ACTION_KEXEC) ? "shutdown" : "sleep"))
                        continue;

-                get_process_comm(pid, &comm);
+                (void) get_process_comm(pid, &comm);
                user = uid_to_name(uid);

                log_warning("Operation inhibited by \"%s\" (PID "PID_FMT" \"%s\", user %s), reason is \"%s\".",
--- a/test/units/testsuite-56-testbloat.service
+++ b/test/units/testsuite-56-testbloat.service
@ -0,0 +1,9 @@
+[Unit]
+Description=Create a lot of memory pressure
+
+[Service]
+# A very small memory.high will cause the script (trying to use a lot of memory)
+# to throttle and be put under heavy pressure
+MemoryHigh=2M
+Slice=testsuite-56-workload.slice
+ExecStart=/usr/lib/systemd/tests/testdata/units/testsuite-56-slowgrowth.sh
--- a/test/units/testsuite-56-testchill.service
+++ b/test/units/testsuite-56-testchill.service
@ -0,0 +1,6 @@
+[Unit]
+Description=No memory pressure
+
+[Service]
+Slice=testsuite-56-workload.slice
+ExecStart=sleep infinity
--- a/test/units/testsuite-56-workload.slice
+++ b/test/units/testsuite-56-workload.slice
@ -0,0 +1,10 @@
+[Unit]
+Description=Test slice for memory pressure kills
+
+[Slice]
+CPUAccounting=true
+MemoryAccounting=true
+IOAccounting=true
+TasksAccounting=true
+ManagedOOMMemoryPressure=kill
+ManagedOOMMemoryPressureLimitPercent=50%
--- a/test/units/testsuite-56.sh
+++ b/test/units/testsuite-56.sh
@ -14,54 +14,26 @@ if [[ "$cgroup_type" != *"cgroup2"* ]] && [[ "$cgroup_type" != *"0x63677270"* ]]
 fi
 [[ -e /skipped ]] && exit 0 || true

-cat > /etc/systemd/system/testworkload.slice <<EOF
-[Slice]
-CPUAccounting=true
-MemoryAccounting=true
-IOAccounting=true
-TasksAccounting=true
-ManagedOOMMemoryPressure=kill
-ManagedOOMMemoryPressureLimitPercent=50%
-EOF
-
-# Create a lot of memory pressure by setting memory.high to a very small value
-cat > /etc/systemd/system/testbloat.service <<EOF
-[Service]
-MemoryHigh=2M
-Slice=testworkload.slice
-ExecStart=/usr/lib/systemd/tests/testdata/units/testsuite-56-slowgrowth.sh
-EOF
-
-# This generates no memory pressure
-cat > /etc/systemd/system/testchill.service <<EOF
-[Service]
-MemoryHigh=2M
-Slice=testworkload.slice
-ExecStart=sleep infinity
-EOF
-
-systemctl daemon-reload
-
-systemctl start testbloat.service
-systemctl start testchill.service
+systemctl start testsuite-56-testbloat.service
+systemctl start testsuite-56-testchill.service

 # Verify systemd-oomd is monitoring the expected units
-oomctl | grep "/testworkload.slice"
+oomctl | grep "/testsuite-56-workload.slice"
 oomctl | grep "50%"

 # systemd-oomd watches for elevated pressure for 30 seconds before acting.
 # It can take time to build up pressure so either wait 5 minutes or for the service to fail.
 timeout=$(date -ud "5 minutes" +%s)
 while [[ $(date -u +%s) -le $timeout ]]; do
-    if ! systemctl status testbloat.service; then
+    if ! systemctl status testsuite-56-testbloat.service; then
        break
    fi
    sleep 15
 done

 # testbloat should be killed and testchill should be fine
-if systemctl status testbloat.service; then exit 42; fi
-if ! systemctl status testchill.service; then exit 24; fi
+if systemctl status testsuite-56-testbloat.service; then exit 42; fi
+if ! systemctl status testsuite-56-testchill.service; then exit 24; fi

 systemd-analyze log-level info
Author	SHA1	Message	Date
Zbigniew Jędrzejewski-Szmek	c37a43d2dd	Merge pull request #17438 from anitazha/systoomd_quick Additional fix ups from #17417	2020-10-27 18:43:34 +01:00
Zbigniew Jędrzejewski-Szmek	a881d96183	meson: fix setting of ENABLE_OOMD -Doomd=auto (the default) didn't work as intended because the initial correct value was overwritten later by logic that didn't check for 'auto'.	2020-10-27 18:42:49 +01:00
Timo Rothenpieler	d0c4275c21	network: actually update radv mac	2020-10-27 18:01:22 +01:00
Lennart Poettering	74d6421da0	tree-wide: cast result of get_process_comm() to (void) where we ignore it	2020-10-27 14:06:49 +01:00
Anita Zhang	e08dabfec7	core: clean up inactive/failed {service\|scope}'s cgroups when the last process exits If processes remain in the unit's cgroup after the final SIGKILL is sent and the unit has exceeded stop timeout, don't release the unit's cgroup information. Pid1 will have failed to `rmdir` the cgroup path due to processes remaining in the cgroup and releasing would leave the cgroup path on the file system with no tracking for pid1 to clean it up. Instead, keep the information around until the last process exits and pid1 sends the cgroup empty notification. The service/scope can then prune the cgroup if the unit is inactive/failed.	2020-10-27 13:20:40 +01:00
Anita Zhang	800d0802e4	docs: update coding style for `return (void) func(...)` Seems that people think it's useful for brevity so make it explicit in the CODING_STYLE.	2020-10-27 00:20:17 -07:00
Anita Zhang	311e3d4637	test: make TEST-56-OOMD service unit files static	2020-10-23 15:59:00 -07:00