Compare commits

...

2 Commits

Author SHA1 Message Date
Ryan Wilson bd04c73f82
Merge bf05e30acc into d5bb359429 2024-11-19 17:50:16 +01:00
Ryan Wilson bf05e30acc core: Add PrivateUsers=full
Recently, PrivateUsers=identity was added to support mapping the first
65536 UIDs/GIDs from parent to the child namespace and mapping the other
UID/GIDs to the nobody user.

However, there are use cases where users have UIDs/GIDs > 65536 and need
to do a similar identity mapping. Moreover, in some of those cases, users
want a full identity mapping from 0 -> UID_MAX.

To support this, we add PrivateUsers=full that does identity mapping for
all available UID/GIDs.

Note to differentiate ourselves from the init user namespace, we need to
set up the uid_map/gid_map like:
```
0 0 1
1 1 UINT32_MAX - 1
```

as the init user namedspace uses `0 0 UINT32_MAX` and some applications -
like systemd itself - determine if its a non-init user namespace based on
uid_map/gid_map files.

Fixes: #35168
2024-11-15 12:30:51 -08:00
5 changed files with 32 additions and 2 deletions

View File

@ -2009,8 +2009,8 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
<varlistentry> <varlistentry>
<term><varname>PrivateUsers=</varname></term> <term><varname>PrivateUsers=</varname></term>
<listitem><para>Takes a boolean argument or one of <literal>self</literal> or <listitem><para>Takes a boolean argument or one of <literal>self</literal>, <literal>identity</literal>,
<literal>identity</literal>. Defaults to false. If enabled, sets up a new user namespace for the or <literal>full</literal>. Defaults to false. If enabled, sets up a new user namespace for the
executed processes and configures a user and group mapping. If set to a true value or executed processes and configures a user and group mapping. If set to a true value or
<literal>self</literal>, a minimal user and group mapping is configured that maps the <literal>self</literal>, a minimal user and group mapping is configured that maps the
<literal>root</literal> user and group as well as the unit's own user and group to themselves and <literal>root</literal> user and group as well as the unit's own user and group to themselves and
@ -2026,6 +2026,10 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
since all UIDs/GIDs are chosen identically it does provide process capability isolation, and hence is since all UIDs/GIDs are chosen identically it does provide process capability isolation, and hence is
often a good choice if proper user namespacing with distinct UID maps is not appropriate.</para> often a good choice if proper user namespacing with distinct UID maps is not appropriate.</para>
<para>If the parameter is <literal>full</literal>, user namespacing is set up with an identity
mapping for all UIDs/GIDs. Similar to <literal>identity</literal>, this does not provide UID/GID
isolation, but it does provide process capability isolation.</para>
<para>If this mode is enabled, all unit processes are run without privileges in the host user <para>If this mode is enabled, all unit processes are run without privileges in the host user
namespace (regardless if the unit's own user/group is <literal>root</literal> or not). Specifically namespace (regardless if the unit's own user/group is <literal>root</literal> or not). Specifically
this means that the process will have zero process capabilities on the host's user namespace, but this means that the process will have zero process capabilities on the host's user namespace, but

View File

@ -2103,6 +2103,23 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
uid_map = strdup("0 0 65536\n"); uid_map = strdup("0 0 65536\n");
if (!uid_map) if (!uid_map)
return -ENOMEM; return -ENOMEM;
} else if (private_users == PRIVATE_USERS_FULL) {
/* Map all UID/GID from original to new user namespace. We can't use `0 0 UINT32_MAX` because
* this is the same UID/GID map as the init user namespace and there are various applications
* (i.e. systemd's running_in_userns()) that check whether they are in a user namespace by
* comparing uid_map/gid_map to `0 0 UINT32_MAX`. Thus, we still map all UIDs/GIDs but do it
* using two extents to differentiate the new user namespace from the init namespace:
* 0 0 1
* 1 1 UINT32_MAX - 1
*
* Note the kernel defines the UID range between 0 and UINT32_MAX so we map all UIDs even though
* the UID range beyond INT32_MAX (e.g. i.e. the range above the signed 32-bit range) is
* icky. For example, setfsuid() returns the old UID as signed integer. But units can decide to
* use these UIDs/GIDs so we need to map them. */
r = asprintf(&uid_map, "0 0 1\n"
"1 1 " UID_FMT "\n", UINT32_MAX - 1);
if (r < 0)
return -ENOMEM;
/* Can only set up multiple mappings with CAP_SETUID. */ /* Can only set up multiple mappings with CAP_SETUID. */
} else if (have_effective_cap(CAP_SETUID) > 0 && uid != ouid && uid_is_valid(uid)) { } else if (have_effective_cap(CAP_SETUID) > 0 && uid != ouid && uid_is_valid(uid)) {
r = asprintf(&uid_map, r = asprintf(&uid_map,
@ -2123,6 +2140,11 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
gid_map = strdup("0 0 65536\n"); gid_map = strdup("0 0 65536\n");
if (!gid_map) if (!gid_map)
return -ENOMEM; return -ENOMEM;
} else if (private_users == PRIVATE_USERS_FULL) {
r = asprintf(&gid_map, "0 0 1\n"
"1 1 " UID_FMT "\n", UINT32_MAX - 1);
if (r < 0)
return -ENOMEM;
/* Can only set up multiple mappings with CAP_SETGID. */ /* Can only set up multiple mappings with CAP_SETGID. */
} else if (have_effective_cap(CAP_SETGID) > 0 && gid != ogid && gid_is_valid(gid)) { } else if (have_effective_cap(CAP_SETGID) > 0 && gid != ogid && gid_is_valid(gid)) {
r = asprintf(&gid_map, r = asprintf(&gid_map,

View File

@ -3364,6 +3364,7 @@ static const char* const private_users_table[_PRIVATE_USERS_MAX] = {
[PRIVATE_USERS_NO] = "no", [PRIVATE_USERS_NO] = "no",
[PRIVATE_USERS_SELF] = "self", [PRIVATE_USERS_SELF] = "self",
[PRIVATE_USERS_IDENTITY] = "identity", [PRIVATE_USERS_IDENTITY] = "identity",
[PRIVATE_USERS_FULL] = "full",
}; };
DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_users, PrivateUsers, PRIVATE_USERS_SELF); DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_users, PrivateUsers, PRIVATE_USERS_SELF);

View File

@ -65,6 +65,7 @@ typedef enum PrivateUsers {
PRIVATE_USERS_NO, PRIVATE_USERS_NO,
PRIVATE_USERS_SELF, PRIVATE_USERS_SELF,
PRIVATE_USERS_IDENTITY, PRIVATE_USERS_IDENTITY,
PRIVATE_USERS_FULL,
_PRIVATE_USERS_MAX, _PRIVATE_USERS_MAX,
_PRIVATE_USERS_INVALID = -EINVAL, _PRIVATE_USERS_INVALID = -EINVAL,
} PrivateUsers; } PrivateUsers;

View File

@ -10,3 +10,5 @@ systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/uid_ma
systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"' systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"'
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"' systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"'
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"' systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"'
systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'