1
0
mirror of https://github.com/systemd/systemd synced 2025-10-03 18:54:45 +02:00

Compare commits

...

7 Commits

Author SHA1 Message Date
Luca Boccassi
a2aa51b740
Merge pull request #18927 from poettering/dissect-alloca-fix
dissect-image: don't mix two forms of stack allocation in one line
2021-03-08 22:42:55 +00:00
Lennart Poettering
c27cb5113f format-table: simplify table_set_display_all() and keep it private 2021-03-08 21:33:35 +00:00
Luca Boccassi
ae43f2341c
Merge pull request #18918 from yuwata/man-no-new-priv-18914
man: update documents about NoNewPrivileges=
2021-03-08 20:57:32 +00:00
Lennart Poettering
794a579f29 dissect-image: don#t mix two forms of stack allocation in one expression
It's not necessarily safe to do this, hence don't.
2021-03-08 17:31:47 +01:00
Lennart Poettering
7533a33b0c dissect-image: remove unnecessary parens 2021-03-08 17:31:35 +01:00
Yu Watanabe
0538d2a811 core/execute: sort conditions to make them match documentation 2021-03-08 21:42:06 +09:00
Yu Watanabe
266d0bb9e0 man: update document about NoNewPrivileges=
Fixes #18914.
2021-03-08 21:42:06 +09:00
5 changed files with 69 additions and 45 deletions

View File

@ -695,16 +695,25 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
setgid bits, or filesystem capabilities). This is the simplest and most effective way to ensure that setgid bits, or filesystem capabilities). This is the simplest and most effective way to ensure that
a process and its children can never elevate privileges again. Defaults to false, but certain a process and its children can never elevate privileges again. Defaults to false, but certain
settings override this and ignore the value of this setting. This is the case when settings override this and ignore the value of this setting. This is the case when
<varname>SystemCallFilter=</varname>, <varname>SystemCallArchitectures=</varname>, <varname>DynamicUser=</varname>,
<varname>RestrictAddressFamilies=</varname>, <varname>RestrictNamespaces=</varname>, <varname>LockPersonality=</varname>,
<varname>PrivateDevices=</varname>, <varname>ProtectKernelTunables=</varname>, <varname>MemoryDenyWriteExecute=</varname>,
<varname>ProtectKernelModules=</varname>, <varname>ProtectKernelLogs=</varname>, <varname>PrivateDevices=</varname>,
<varname>ProtectClock=</varname>, <varname>MemoryDenyWriteExecute=</varname>, <varname>ProtectClock=</varname>,
<varname>RestrictRealtime=</varname>, <varname>RestrictSUIDSGID=</varname>, <varname>DynamicUser=</varname> <varname>ProtectHostname=</varname>,
or <varname>LockPersonality=</varname> are specified. Note that even if this setting is overridden by them, <varname>ProtectKernelLogs=</varname>,
<command>systemctl show</command> shows the original value of this setting. <varname>ProtectKernelModules=</varname>,
Also see <ulink url="https://www.kernel.org/doc/html/latest/userspace-api/no_new_privs.html">No New Privileges <varname>ProtectKernelTunables=</varname>,
Flag</ulink>.</para></listitem> <varname>RestrictAddressFamilies=</varname>,
<varname>RestrictNamespaces=</varname>,
<varname>RestrictRealtime=</varname>,
<varname>RestrictSUIDSGID=</varname>,
<varname>SystemCallArchitectures=</varname>,
<varname>SystemCallFilter=</varname>, or
<varname>SystemCallLog=</varname> are specified. Note that even if this setting is overridden
by them, <command>systemctl show</command> shows the original value of this setting. Also see
<ulink url="https://www.kernel.org/doc/html/latest/userspace-api/no_new_privs.html">No New
Privileges Flag</ulink>.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
@ -1697,6 +1706,10 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
the system into the service, it is hence not suitable for services that need to take notice of system the system into the service, it is hence not suitable for services that need to take notice of system
hostname changes dynamically.</para> hostname changes dynamically.</para>
<para>If this setting is on, but the unit doesn't have the <constant>CAP_SYS_ADMIN</constant>
capability (e.g. services for which <varname>User=</varname> is set),
<varname>NoNewPrivileges=yes</varname> is implied.</para>
<xi:include href="system-only.xml" xpointer="singular"/></listitem> <xi:include href="system-only.xml" xpointer="singular"/></listitem>
</varlistentry> </varlistentry>
@ -1710,7 +1723,9 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
clock, and <varname>DeviceAllow=char-rtc r</varname> is implied. This ensures <filename>/dev/rtc0</filename>, clock, and <varname>DeviceAllow=char-rtc r</varname> is implied. This ensures <filename>/dev/rtc0</filename>,
<filename>/dev/rtc1</filename>, etc. are made read-only to the service. See <filename>/dev/rtc1</filename>, etc. are made read-only to the service. See
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry> <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for the details about <varname>DeviceAllow=</varname>.</para> for the details about <varname>DeviceAllow=</varname>. If this setting is on, but the unit
doesn't have the <constant>CAP_SYS_ADMIN</constant> capability (e.g. services for which
<varname>User=</varname> is set), <varname>NoNewPrivileges=yes</varname> is implied.</para>
<xi:include href="system-only.xml" xpointer="singular"/></listitem> <xi:include href="system-only.xml" xpointer="singular"/></listitem>
</varlistentry> </varlistentry>
@ -1727,13 +1742,14 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
<citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry> mechanism. Few <citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry> mechanism. Few
services need to write to these at runtime; it is hence recommended to turn this on for most services. For this services need to write to these at runtime; it is hence recommended to turn this on for most services. For this
setting the same restrictions regarding mount propagation and privileges apply as for setting the same restrictions regarding mount propagation and privileges apply as for
<varname>ReadOnlyPaths=</varname> and related calls, see above. Defaults to off. If turned on and if running <varname>ReadOnlyPaths=</varname> and related calls, see above. Defaults to off. If this
in user mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. services setting is on, but the unit doesn't have the <constant>CAP_SYS_ADMIN</constant> capability
for which <varname>User=</varname> is set), <varname>NoNewPrivileges=yes</varname> is implied. Note that this (e.g. services for which <varname>User=</varname> is set),
option does not prevent indirect changes to kernel tunables effected by IPC calls to other processes. However, <varname>NoNewPrivileges=yes</varname> is implied. Note that this option does not prevent
<varname>InaccessiblePaths=</varname> may be used to make relevant IPC file system objects inaccessible. If indirect changes to kernel tunables effected by IPC calls to other processes. However,
<varname>ProtectKernelTunables=</varname> is set, <varname>MountAPIVFS=yes</varname> is <varname>InaccessiblePaths=</varname> may be used to make relevant IPC file system objects
implied.</para> inaccessible. If <varname>ProtectKernelTunables=</varname> is set,
<varname>MountAPIVFS=yes</varname> is implied.</para>
<xi:include href="system-only.xml" xpointer="singular"/></listitem> <xi:include href="system-only.xml" xpointer="singular"/></listitem>
</varlistentry> </varlistentry>
@ -1752,9 +1768,9 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
both privileged and unprivileged. To disable module auto-load feature please see both privileged and unprivileged. To disable module auto-load feature please see
<citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry> <citerefentry><refentrytitle>sysctl.d</refentrytitle><manvolnum>5</manvolnum></citerefentry>
<constant>kernel.modules_disabled</constant> mechanism and <constant>kernel.modules_disabled</constant> mechanism and
<filename>/proc/sys/kernel/modules_disabled</filename> documentation. If turned on and if running in user <filename>/proc/sys/kernel/modules_disabled</filename> documentation. If this setting is on,
mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting but the unit doesn't have the <constant>CAP_SYS_ADMIN</constant> capability (e.g. services for
<varname>User=</varname>), <varname>NoNewPrivileges=yes</varname> is implied.</para> which <varname>User=</varname> is set), <varname>NoNewPrivileges=yes</varname> is implied.</para>
<xi:include href="system-only.xml" xpointer="singular"/></listitem> <xi:include href="system-only.xml" xpointer="singular"/></listitem>
</varlistentry> </varlistentry>
@ -1770,7 +1786,10 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
system call (not to be confused with the libc API system call (not to be confused with the libc API
<citerefentry project='man-pages'><refentrytitle>syslog</refentrytitle><manvolnum>3</manvolnum></citerefentry> <citerefentry project='man-pages'><refentrytitle>syslog</refentrytitle><manvolnum>3</manvolnum></citerefentry>
for userspace logging). The kernel exposes its log buffer to userspace via <filename>/dev/kmsg</filename> and for userspace logging). The kernel exposes its log buffer to userspace via <filename>/dev/kmsg</filename> and
<filename>/proc/kmsg</filename>. If enabled, these are made inaccessible to all the processes in the unit.</para> <filename>/proc/kmsg</filename>. If enabled, these are made inaccessible to all the processes in the unit.
If this setting is on, but the unit doesn't have the <constant>CAP_SYS_ADMIN</constant>
capability (e.g. services for which <varname>User=</varname> is set),
<varname>NoNewPrivileges=yes</varname> is implied.</para>
<xi:include href="system-only.xml" xpointer="singular"/></listitem> <xi:include href="system-only.xml" xpointer="singular"/></listitem>
</varlistentry> </varlistentry>
@ -1810,7 +1829,7 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
restrictions of this option. Specifically, it is recommended to combine this option with restrictions of this option. Specifically, it is recommended to combine this option with
<varname>SystemCallArchitectures=native</varname> or similar. If running in user mode, or in system <varname>SystemCallArchitectures=native</varname> or similar. If running in user mode, or in system
mode, but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting mode, but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting
<varname>User=nobody</varname>), <varname>NoNewPrivileges=yes</varname> is implied. By default, no <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname> is implied. By default, no
restrictions apply, all address families are accessible to processes. If assigned the empty string, restrictions apply, all address families are accessible to processes. If assigned the empty string,
any previous address family restriction changes are undone. This setting does not affect commands any previous address family restriction changes are undone. This setting does not affect commands
prefixed with <literal>+</literal>.</para> prefixed with <literal>+</literal>.</para>
@ -2040,7 +2059,7 @@ RestrictNamespaces=~cgroup net</programlisting>
explicitly specify killing. This value takes precedence over the one given in explicitly specify killing. This value takes precedence over the one given in
<varname>SystemCallErrorNumber=</varname>, see below. If running in user mode, or in system mode, <varname>SystemCallErrorNumber=</varname>, see below. If running in user mode, or in system mode,
but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting
<varname>User=nobody</varname>), <varname>NoNewPrivileges=yes</varname> is implied. This feature <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname> is implied. This feature
makes use of the Secure Computing Mode 2 interfaces of the kernel ('seccomp filtering') and is useful makes use of the Secure Computing Mode 2 interfaces of the kernel ('seccomp filtering') and is useful
for enforcing a minimal sandboxing environment. Note that the <function>execve()</function>, for enforcing a minimal sandboxing environment. Note that the <function>execve()</function>,
<function>exit()</function>, <function>exit_group()</function>, <function>getrlimit()</function>, <function>exit()</function>, <function>exit_group()</function>, <function>getrlimit()</function>,
@ -2262,7 +2281,7 @@ SystemCallErrorNumber=EPERM</programlisting>
the special identifier <constant>native</constant>. The special identifier <constant>native</constant> the special identifier <constant>native</constant>. The special identifier <constant>native</constant>
implicitly maps to the native architecture of the system (or more precisely: to the architecture the system implicitly maps to the native architecture of the system (or more precisely: to the architecture the system
manager is compiled for). If running in user mode, or in system mode, but without the manager is compiled for). If running in user mode, or in system mode, but without the
<constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=nobody</varname>), <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=</varname>),
<varname>NoNewPrivileges=yes</varname> is implied. By default, this option is set to the empty list, i.e. no <varname>NoNewPrivileges=yes</varname> is implied. By default, this option is set to the empty list, i.e. no
filtering is applied.</para> filtering is applied.</para>
@ -2291,7 +2310,7 @@ SystemCallErrorNumber=EPERM</programlisting>
system calls executed by the unit processes for the listed ones will be logged. If the first system calls executed by the unit processes for the listed ones will be logged. If the first
character of the list is <literal>~</literal>, the effect is inverted: all system calls except the character of the list is <literal>~</literal>, the effect is inverted: all system calls except the
listed system calls will be logged. If running in user mode, or in system mode, but without the listed system calls will be logged. If running in user mode, or in system mode, but without the
<constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=nobody</varname>), <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=</varname>),
<varname>NoNewPrivileges=yes</varname> is implied. This feature makes use of the Secure Computing <varname>NoNewPrivileges=yes</varname> is implied. This feature makes use of the Secure Computing
Mode 2 interfaces of the kernel ('seccomp filtering') and is useful for auditing or setting up a Mode 2 interfaces of the kernel ('seccomp filtering') and is useful for auditing or setting up a
minimal sandboxing environment. This option may be specified more than once, in which case the filter minimal sandboxing environment. This option may be specified more than once, in which case the filter

View File

@ -1430,21 +1430,21 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
return false; return false;
/* We need NNP if we have any form of seccomp and are unprivileged */ /* We need NNP if we have any form of seccomp and are unprivileged */
return context_has_address_families(c) || return c->lock_personality ||
c->memory_deny_write_execute || c->memory_deny_write_execute ||
c->restrict_realtime || c->private_devices ||
c->restrict_suid_sgid ||
exec_context_restrict_namespaces_set(c) ||
c->protect_clock || c->protect_clock ||
c->protect_hostname ||
c->protect_kernel_tunables || c->protect_kernel_tunables ||
c->protect_kernel_modules || c->protect_kernel_modules ||
c->protect_kernel_logs || c->protect_kernel_logs ||
c->private_devices || context_has_address_families(c) ||
context_has_syscall_filters(c) || exec_context_restrict_namespaces_set(c) ||
context_has_syscall_logs(c) || c->restrict_realtime ||
c->restrict_suid_sgid ||
!set_isempty(c->syscall_archs) || !set_isempty(c->syscall_archs) ||
c->lock_personality || context_has_syscall_filters(c) ||
c->protect_hostname; context_has_syscall_logs(c);
} }
static bool exec_context_has_credentials(const ExecContext *context) { static bool exec_context_has_credentials(const ExecContext *context) {

View File

@ -2250,8 +2250,8 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
[META_HOSTNAME] = "/etc/hostname\0", [META_HOSTNAME] = "/etc/hostname\0",
[META_MACHINE_ID] = "/etc/machine-id\0", [META_MACHINE_ID] = "/etc/machine-id\0",
[META_MACHINE_INFO] = "/etc/machine-info\0", [META_MACHINE_INFO] = "/etc/machine-info\0",
[META_OS_RELEASE] = ("/etc/os-release\0" [META_OS_RELEASE] = "/etc/os-release\0"
"/usr/lib/os-release\0"), "/usr/lib/os-release\0",
[META_EXTENSION_RELEASE] = NULL, [META_EXTENSION_RELEASE] = NULL,
}; };
@ -2272,7 +2272,9 @@ int dissected_image_acquire_metadata(DissectedImage *m) {
/* As per the os-release spec, if the image is an extension it will have a file /* As per the os-release spec, if the image is an extension it will have a file
* named after the image name in extension-release.d/ */ * named after the image name in extension-release.d/ */
if (m->image_name) { if (m->image_name) {
char *ext = strjoina("/usr/lib/extension-release.d/extension-release.", m->image_name, "0"); char *ext;
ext = strjoina("/usr/lib/extension-release.d/extension-release.", m->image_name, "0");
ext[strlen(ext) - 1] = '\0'; /* Extra \0 for NULSTR_FOREACH using placeholder from above */ ext[strlen(ext) - 1] = '\0'; /* Extra \0 for NULSTR_FOREACH using placeholder from above */
paths[META_EXTENSION_RELEASE] = ext; paths[META_EXTENSION_RELEASE] = ext;
} else } else

View File

@ -1078,17 +1078,21 @@ int table_set_empty_string(Table *t, const char *empty) {
return free_and_strdup(&t->empty_string, empty); return free_and_strdup(&t->empty_string, empty);
} }
int table_set_display_all(Table *t) { static int table_set_display_all(Table *t) {
size_t *d;
assert(t); assert(t);
size_t allocated = t->n_display_map; /* Initialize the display map to the identity */
if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, allocated))) d = reallocarray(t->display_map, t->n_columns, sizeof(size_t));
if (!d)
return -ENOMEM; return -ENOMEM;
for (size_t i = 0; i < t->n_columns; i++) for (size_t i = 0; i < t->n_columns; i++)
t->display_map[i] = i; d[i] = i;
t->display_map = d;
t->n_display_map = t->n_columns; t->n_display_map = t->n_columns;
return 0; return 0;

View File

@ -100,7 +100,6 @@ void table_set_header(Table *table, bool b);
void table_set_width(Table *t, size_t width); void table_set_width(Table *t, size_t width);
void table_set_cell_height_max(Table *t, size_t height); void table_set_cell_height_max(Table *t, size_t height);
int table_set_empty_string(Table *t, const char *empty); int table_set_empty_string(Table *t, const char *empty);
int table_set_display_all(Table *t);
int table_set_display_internal(Table *t, size_t first_column, ...); int table_set_display_internal(Table *t, size_t first_column, ...);
#define table_set_display(...) table_set_display_internal(__VA_ARGS__, SIZE_MAX) #define table_set_display(...) table_set_display_internal(__VA_ARGS__, SIZE_MAX)
int table_set_sort_internal(Table *t, size_t first_column, ...); int table_set_sort_internal(Table *t, size_t first_column, ...);