Compare commits

..

8 Commits

Author SHA1 Message Date
Yu Watanabe f7c79f0911
Merge pull request #14320 from yuwata/network-tc-fq_codel-more
network: tc: introduce more FQ-CoDel settings
2019-12-13 11:09:36 +09:00
Lennart Poettering a724732208
Merge pull request #14269 from DaanDeMeyer/enable-mounts-on-root
nspawn: Enable specifying root as the mount target directory.
2019-12-13 00:05:38 +01:00
Daan De Meyer bd6609eb11 nspawn-mount: Use FLAGS_SET to check flags. 2019-12-12 20:18:37 +01:00
Daan De Meyer 5530dc87f2 nspawn: Only bind-mount directory when necessary. 2019-12-12 20:15:10 +01:00
Daan De Meyer e091a5dfd1 nspawn-mount: Remove unused parameters 2019-12-12 20:15:10 +01:00
Daan De Meyer 5f0a6347ac nspawn: Enable specifying root as the mount target directory.
Fixes #3847.
2019-12-12 20:15:03 +01:00
Yu Watanabe 9401e48855 test-network: add a test case for the new settings of FQ-CoDel 2019-12-12 23:37:03 +09:00
Yu Watanabe ac810b75c1 network: tc: support more attributes for FQ-CoDel 2019-12-12 23:36:48 +09:00
12 changed files with 421 additions and 71 deletions

View File

@ -2450,6 +2450,64 @@
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term><varname>FairQueuingControlledDelayMemoryLimit=</varname></term>
<listitem>
<para>Specifies the limit on the total number of bytes that can be queued in this FQ-CoDel instance.
When suffixed with K, M, or G, the specified size is parsed as Kilobytes, Megabytes, or Gigabytes,
respectively, to the base of 1024. Defaults to unset and kernel's default is used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>FairQueuingControlledDelayFlows=</varname></term>
<listitem>
<para>Specifies the number of flows into which the incoming packets are classified.
Defaults to unset and kernel's default is used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>FairQueuingControlledDelayTargetSec=</varname></term>
<listitem>
<para>Takes a timespan. Specifies the acceptable minimum standing/persistent queue delay.
Defaults to unset and kernel's default is used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>FairQueuingControlledDelayIntervalSec=</varname></term>
<listitem>
<para>Takes a timespan. This is used to ensure that the measured minimum delay does not
become too stale. Defaults to unset and kernel's default is used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>FairQueuingControlledDelayQuantum=</varname></term>
<listitem>
<para>Specifies the number of bytes used as 'deficit' in the fair queuing algorithmtimespan.
When suffixed with K, M, or G, the specified size is parsed as Kilobytes, Megabytes, or Gigabytes,
respectively, to the base of 1024. Defaults to unset and kernel's default is used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>FairQueuingControlledDelayECN=</varname></term>
<listitem>
<para>Takes a boolean. This can be used to mark packets instead of dropping them. Defaults to
unset and kernel's default is used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>FairQueuingControlledDelayCEThresholdSec=</varname></term>
<listitem>
<para>Takes a timespan. This sets a threshold above which all packets are marked with ECN
Congestion Experienced (CE). Defaults to unset and kernel's default is used.</para>
</listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term><varname>FairQueueTrafficPolicingPacketLimit=</varname></term> <term><varname>FairQueueTrafficPolicingPacketLimit=</varname></term>
<listitem> <listitem>

View File

@ -258,7 +258,14 @@ TrafficControlQueueingDiscipline.TokenBufferFilterMPUBytes, con
TrafficControlQueueingDiscipline.TokenBufferFilterPeakRate, config_parse_tc_token_buffer_filter_size, 0, 0 TrafficControlQueueingDiscipline.TokenBufferFilterPeakRate, config_parse_tc_token_buffer_filter_size, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterLatencySec, config_parse_tc_token_buffer_filter_latency, 0, 0 TrafficControlQueueingDiscipline.TokenBufferFilterLatencySec, config_parse_tc_token_buffer_filter_latency, 0, 0
TrafficControlQueueingDiscipline.StochasticFairnessQueueingPerturbPeriodSec, config_parse_tc_stochastic_fairness_queueing_perturb_period, 0, 0 TrafficControlQueueingDiscipline.StochasticFairnessQueueingPerturbPeriodSec, config_parse_tc_stochastic_fairness_queueing_perturb_period, 0, 0
TrafficControlQueueingDiscipline.FairQueuingControlledDelayPacketLimit, config_parse_tc_fair_queuing_controlled_delay_limit, 0, 0 TrafficControlQueueingDiscipline.FairQueuingControlledDelayPacketLimit, config_parse_tc_fair_queuing_controlled_delay_u32, 0, 0
TrafficControlQueueingDiscipline.FairQueuingControlledDelayMemoryLimit, config_parse_tc_fair_queuing_controlled_delay_size, 0, 0
TrafficControlQueueingDiscipline.FairQueuingControlledDelayFlows, config_parse_tc_fair_queuing_controlled_delay_u32, 0, 0
TrafficControlQueueingDiscipline.FairQueuingControlledDelayQuantum, config_parse_tc_fair_queuing_controlled_delay_size, 0, 0
TrafficControlQueueingDiscipline.FairQueuingControlledDelayTargetSec, config_parse_tc_fair_queuing_controlled_delay_usec, 0, 0
TrafficControlQueueingDiscipline.FairQueuingControlledDelayIntervalSec, config_parse_tc_fair_queuing_controlled_delay_usec, 0, 0
TrafficControlQueueingDiscipline.FairQueuingControlledDelayCEThresholdSec, config_parse_tc_fair_queuing_controlled_delay_usec, 0, 0
TrafficControlQueueingDiscipline.FairQueuingControlledDelayECN, config_parse_tc_fair_queuing_controlled_delay_bool, 0, 0
TrafficControlQueueingDiscipline.FairQueueTrafficPolicingPacketLimit, config_parse_tc_fair_queue_traffic_policing_packet_limit, 0, 0 TrafficControlQueueingDiscipline.FairQueueTrafficPolicingPacketLimit, config_parse_tc_fair_queue_traffic_policing_packet_limit, 0, 0
/* backwards compatibility: do not add new entries to this section */ /* backwards compatibility: do not add new entries to this section */
Network.IPv4LL, config_parse_ipv4ll, 0, offsetof(Network, link_local) Network.IPv4LL, config_parse_ipv4ll, 0, offsetof(Network, link_local)

View File

@ -10,6 +10,20 @@
#include "qdisc.h" #include "qdisc.h"
#include "string-util.h" #include "string-util.h"
static int fair_queuing_controlled_delay_init(QDisc *qdisc) {
FairQueuingControlledDelay *fqcd;
assert(qdisc);
fqcd = FQ_CODEL(qdisc);
fqcd->memory_limit = UINT32_MAX;
fqcd->ce_threshold_usec = USEC_INFINITY;
fqcd->ecn = -1;
return 0;
}
static int fair_queuing_controlled_delay_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) { static int fair_queuing_controlled_delay_fill_message(Link *link, QDisc *qdisc, sd_netlink_message *req) {
FairQueuingControlledDelay *fqcd; FairQueuingControlledDelay *fqcd;
int r; int r;
@ -24,9 +38,53 @@ static int fair_queuing_controlled_delay_fill_message(Link *link, QDisc *qdisc,
if (r < 0) if (r < 0)
return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m"); return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_LIMIT, fqcd->limit); if (fqcd->packet_limit > 0) {
if (r < 0) r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_LIMIT, fqcd->packet_limit);
return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_LIMIT attribute: %m"); if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_LIMIT attribute: %m");
}
if (fqcd->flows > 0) {
r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_FLOWS, fqcd->flows);
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_FLOWS attribute: %m");
}
if (fqcd->quantum > 0) {
r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_QUANTUM, fqcd->quantum);
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_QUANTUM attribute: %m");
}
if (fqcd->interval_usec > 0) {
r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_INTERVAL, fqcd->interval_usec);
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_INTERVAL attribute: %m");
}
if (fqcd->target_usec > 0) {
r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_TARGET, fqcd->target_usec);
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_TARGET attribute: %m");
}
if (fqcd->ecn >= 0) {
r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_ECN, fqcd->ecn);
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_ECN attribute: %m");
}
if (fqcd->ce_threshold_usec != USEC_INFINITY) {
r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_CE_THRESHOLD, fqcd->ce_threshold_usec);
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_CE_THRESHOLD attribute: %m");
}
if (fqcd->memory_limit != UINT32_MAX) {
r = sd_netlink_message_append_u32(req, TCA_FQ_CODEL_MEMORY_LIMIT, fqcd->memory_limit);
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_FQ_CODEL_MEMORY_LIMIT attribute: %m");
}
r = sd_netlink_message_close_container(req); r = sd_netlink_message_close_container(req);
if (r < 0) if (r < 0)
@ -35,7 +93,130 @@ static int fair_queuing_controlled_delay_fill_message(Link *link, QDisc *qdisc,
return 0; return 0;
} }
int config_parse_tc_fair_queuing_controlled_delay_limit( int config_parse_tc_fair_queuing_controlled_delay_u32(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
FairQueuingControlledDelay *fqcd;
Network *network = data;
uint32_t *p;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
return log_syntax(unit, LOG_ERR, filename, line, r,
"More than one kind of queueing discipline, ignoring assignment: %m");
fqcd = FQ_CODEL(qdisc);
if (streq(lvalue, "FairQueuingControlledDelayPacketLimit"))
p = &fqcd->packet_limit;
else if (streq(lvalue, "FairQueuingControlledDelayFlows"))
p = &fqcd->flows;
else
assert_not_reached("Invalid lvalue.");
if (isempty(rvalue)) {
*p = 0;
qdisc = NULL;
return 0;
}
r = safe_atou32(rvalue, p);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to parse '%s=', ignoring assignment: %s",
lvalue, rvalue);
return 0;
}
qdisc = NULL;
return 0;
}
int config_parse_tc_fair_queuing_controlled_delay_usec(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
FairQueuingControlledDelay *fqcd;
Network *network = data;
usec_t *p;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
return log_syntax(unit, LOG_ERR, filename, line, r,
"More than one kind of queueing discipline, ignoring assignment: %m");
fqcd = FQ_CODEL(qdisc);
if (streq(lvalue, "FairQueuingControlledDelayTargetSec"))
p = &fqcd->target_usec;
else if (streq(lvalue, "FairQueuingControlledDelayIntervalSec"))
p = &fqcd->interval_usec;
else if (streq(lvalue, "FairQueuingControlledDelayCEThresholdSec"))
p = &fqcd->ce_threshold_usec;
else
assert_not_reached("Invalid lvalue.");
if (isempty(rvalue)) {
if (streq(lvalue, "FairQueuingControlledDelayCEThresholdSec"))
*p = USEC_INFINITY;
else
*p = 0;
qdisc = NULL;
return 0;
}
r = parse_sec(rvalue, p);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to parse '%s=', ignoring assignment: %s",
lvalue, rvalue);
return 0;
}
qdisc = NULL;
return 0;
}
int config_parse_tc_fair_queuing_controlled_delay_bool(
const char *unit, const char *unit,
const char *filename, const char *filename,
unsigned line, unsigned line,
@ -67,13 +248,13 @@ int config_parse_tc_fair_queuing_controlled_delay_limit(
fqcd = FQ_CODEL(qdisc); fqcd = FQ_CODEL(qdisc);
if (isempty(rvalue)) { if (isempty(rvalue)) {
fqcd->limit = 0; fqcd->ecn = -1;
qdisc = NULL; qdisc = NULL;
return 0; return 0;
} }
r = safe_atou32(rvalue, &fqcd->limit); r = parse_boolean(rvalue);
if (r < 0) { if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to parse '%s=', ignoring assignment: %s", "Failed to parse '%s=', ignoring assignment: %s",
@ -81,6 +262,77 @@ int config_parse_tc_fair_queuing_controlled_delay_limit(
return 0; return 0;
} }
fqcd->ecn = r;
qdisc = NULL;
return 0;
}
int config_parse_tc_fair_queuing_controlled_delay_size(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
FairQueuingControlledDelay *fqcd;
Network *network = data;
uint64_t sz;
uint32_t *p;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
r = qdisc_new_static(QDISC_KIND_FQ_CODEL, network, filename, section_line, &qdisc);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
return log_syntax(unit, LOG_ERR, filename, line, r,
"More than one kind of queueing discipline, ignoring assignment: %m");
fqcd = FQ_CODEL(qdisc);
if (streq(lvalue, "FairQueuingControlledDelayMemoryLimit"))
p = &fqcd->memory_limit;
else if (streq(lvalue, "FairQueuingControlledDelayQuantum"))
p = &fqcd->quantum;
else
assert_not_reached("Invalid lvalue.");
if (isempty(rvalue)) {
if (streq(lvalue, "FairQueuingControlledMemoryLimit"))
*p = UINT32_MAX;
else
*p = 0;
qdisc = NULL;
return 0;
}
r = parse_size(rvalue, 1024, &sz);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to parse '%s=', ignoring assignment: %s",
lvalue, rvalue);
return 0;
}
if (sz >= UINT32_MAX) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Specified '%s=' is too large, ignoring assignment: %s",
lvalue, rvalue);
return 0;
}
*p = sz;
qdisc = NULL; qdisc = NULL;
return 0; return 0;
@ -89,5 +341,6 @@ int config_parse_tc_fair_queuing_controlled_delay_limit(
const QDiscVTable fq_codel_vtable = { const QDiscVTable fq_codel_vtable = {
.object_size = sizeof(FairQueuingControlledDelay), .object_size = sizeof(FairQueuingControlledDelay),
.tca_kind = "fq_codel", .tca_kind = "fq_codel",
.init = fair_queuing_controlled_delay_init,
.fill_message = fair_queuing_controlled_delay_fill_message, .fill_message = fair_queuing_controlled_delay_fill_message,
}; };

View File

@ -4,13 +4,25 @@
#include "conf-parser.h" #include "conf-parser.h"
#include "qdisc.h" #include "qdisc.h"
#include "time-util.h"
typedef struct FairQueuingControlledDelay { typedef struct FairQueuingControlledDelay {
QDisc meta; QDisc meta;
uint32_t limit;
uint32_t packet_limit;
uint32_t flows;
uint32_t quantum;
uint32_t memory_limit;
usec_t target_usec;
usec_t interval_usec;
usec_t ce_threshold_usec;
int ecn;
} FairQueuingControlledDelay; } FairQueuingControlledDelay;
DEFINE_QDISC_CAST(FQ_CODEL, FairQueuingControlledDelay); DEFINE_QDISC_CAST(FQ_CODEL, FairQueuingControlledDelay);
extern const QDiscVTable fq_codel_vtable; extern const QDiscVTable fq_codel_vtable;
CONFIG_PARSER_PROTOTYPE(config_parse_tc_fair_queuing_controlled_delay_limit); CONFIG_PARSER_PROTOTYPE(config_parse_tc_fair_queuing_controlled_delay_u32);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_fair_queuing_controlled_delay_usec);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_fair_queuing_controlled_delay_bool);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_fair_queuing_controlled_delay_size);

View File

@ -23,6 +23,7 @@ const QDiscVTable * const qdisc_vtable[_QDISC_KIND_MAX] = {
static int qdisc_new(QDiscKind kind, QDisc **ret) { static int qdisc_new(QDiscKind kind, QDisc **ret) {
QDisc *qdisc; QDisc *qdisc;
int r;
if (kind == _QDISC_KIND_INVALID) { if (kind == _QDISC_KIND_INVALID) {
qdisc = new(QDisc, 1); qdisc = new(QDisc, 1);
@ -42,6 +43,12 @@ static int qdisc_new(QDiscKind kind, QDisc **ret) {
qdisc->family = AF_UNSPEC; qdisc->family = AF_UNSPEC;
qdisc->parent = TC_H_ROOT; qdisc->parent = TC_H_ROOT;
qdisc->kind = kind; qdisc->kind = kind;
if (QDISC_VTABLE(qdisc)->init) {
r = QDISC_VTABLE(qdisc)->init(qdisc);
if (r < 0)
return r;
}
} }
*ret = TAKE_PTR(qdisc); *ret = TAKE_PTR(qdisc);

View File

@ -32,6 +32,8 @@ typedef struct QDisc {
typedef struct QDiscVTable { typedef struct QDiscVTable {
size_t object_size; size_t object_size;
const char *tca_kind; const char *tca_kind;
/* called in qdisc_new() */
int (*init)(QDisc *qdisc);
int (*fill_message)(Link *link, QDisc *qdisc, sd_netlink_message *m); int (*fill_message)(Link *link, QDisc *qdisc, sd_netlink_message *m);
int (*verify)(QDisc *qdisc); int (*verify)(QDisc *qdisc);
} QDiscVTable; } QDiscVTable;

View File

@ -222,8 +222,6 @@ int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only)
if (!path_is_absolute(destination)) if (!path_is_absolute(destination))
return -EINVAL; return -EINVAL;
if (empty_or_root(destination))
return -EINVAL;
m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND); m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
if (!m) if (!m)
@ -262,8 +260,6 @@ int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) {
if (!path_is_absolute(path)) if (!path_is_absolute(path))
return -EINVAL; return -EINVAL;
if (empty_or_root(path))
return -EINVAL;
m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS); m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
if (!m) if (!m)
@ -323,9 +319,6 @@ int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_onl
return -EINVAL; return -EINVAL;
} }
if (empty_or_root(destination))
return -EINVAL;
m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY); m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
if (!m) if (!m)
return -ENOMEM; return -ENOMEM;
@ -422,7 +415,7 @@ int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
(void) mkdir(full, 0755); (void) mkdir(full, 0755);
if (mount_settings & MOUNT_APPLY_APIVFS_RO) if (FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO))
extra_flags |= MS_RDONLY; extra_flags |= MS_RDONLY;
r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs", r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs",
@ -608,29 +601,29 @@ int mount_all(const char *dest,
#endif #endif
}; };
bool use_userns = (mount_settings & MOUNT_USE_USERNS); bool use_userns = FLAGS_SET(mount_settings, MOUNT_USE_USERNS);
bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS); bool netns = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_NETNS);
bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO); bool ro = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO);
bool in_userns = (mount_settings & MOUNT_IN_USERNS); bool in_userns = FLAGS_SET(mount_settings, MOUNT_IN_USERNS);
bool tmpfs_tmp = (mount_settings & MOUNT_APPLY_TMPFS_TMP); bool tmpfs_tmp = FLAGS_SET(mount_settings, MOUNT_APPLY_TMPFS_TMP);
size_t k; size_t k;
int r; int r;
for (k = 0; k < ELEMENTSOF(mount_table); k++) { for (k = 0; k < ELEMENTSOF(mount_table); k++) {
_cleanup_free_ char *where = NULL, *options = NULL; _cleanup_free_ char *where = NULL, *options = NULL;
const char *o; const char *o;
bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL); bool fatal = FLAGS_SET(mount_table[k].mount_settings, MOUNT_FATAL);
if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS)) if (in_userns != FLAGS_SET(mount_table[k].mount_settings, MOUNT_IN_USERNS))
continue; continue;
if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS)) if (!netns && FLAGS_SET(mount_table[k].mount_settings, MOUNT_APPLY_APIVFS_NETNS))
continue; continue;
if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO)) if (!ro && FLAGS_SET(mount_table[k].mount_settings, MOUNT_APPLY_APIVFS_RO))
continue; continue;
if (!tmpfs_tmp && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_TMPFS_TMP)) if (!tmpfs_tmp && FLAGS_SET(mount_table[k].mount_settings, MOUNT_APPLY_TMPFS_TMP))
continue; continue;
r = chase_symlinks(mount_table[k].where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where, NULL); r = chase_symlinks(mount_table[k].where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where, NULL);
@ -782,11 +775,7 @@ static int mount_bind(const char *dest, CustomMount *m) {
return 0; return 0;
} }
static int mount_tmpfs( static int mount_tmpfs(const char *dest, CustomMount *m, uid_t uid_shift, const char *selinux_apifs_context) {
const char *dest,
CustomMount *m,
bool userns, uid_t uid_shift, uid_t uid_range,
const char *selinux_apifs_context) {
const char *options; const char *options;
_cleanup_free_ char *buf = NULL, *where = NULL; _cleanup_free_ char *buf = NULL, *where = NULL;
@ -921,9 +910,9 @@ static int mount_arbitrary(const char *dest, CustomMount *m) {
int mount_custom( int mount_custom(
const char *dest, const char *dest,
CustomMount *mounts, size_t n, CustomMount *mounts, size_t n,
bool userns, uid_t uid_shift, uid_t uid_range, uid_t uid_shift,
const char *selinux_apifs_context, const char *selinux_apifs_context,
bool in_userns) { MountSettingsMask mount_settings) {
size_t i; size_t i;
int r; int r;
@ -933,7 +922,13 @@ int mount_custom(
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
CustomMount *m = mounts + i; CustomMount *m = mounts + i;
if (m->in_userns != in_userns) if (FLAGS_SET(mount_settings, MOUNT_IN_USERNS) != m->in_userns)
continue;
if (FLAGS_SET(mount_settings, MOUNT_ROOT_ONLY) && !path_equal(m->destination, "/"))
continue;
if (FLAGS_SET(mount_settings, MOUNT_NON_ROOT_ONLY) && path_equal(m->destination, "/"))
continue; continue;
switch (m->type) { switch (m->type) {
@ -943,7 +938,7 @@ int mount_custom(
break; break;
case CUSTOM_MOUNT_TMPFS: case CUSTOM_MOUNT_TMPFS:
r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context); r = mount_tmpfs(dest, m, uid_shift, selinux_apifs_context);
break; break;
case CUSTOM_MOUNT_OVERLAY: case CUSTOM_MOUNT_OVERLAY:
@ -969,10 +964,7 @@ int mount_custom(
return 0; return 0;
} }
static int setup_volatile_state( static int setup_volatile_state(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
const char *directory,
bool userns, uid_t uid_shift, uid_t uid_range,
const char *selinux_apifs_context) {
_cleanup_free_ char *buf = NULL; _cleanup_free_ char *buf = NULL;
const char *p, *options; const char *p, *options;
@ -1001,10 +993,7 @@ static int setup_volatile_state(
return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options); return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
} }
static int setup_volatile_yes( static int setup_volatile_yes(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
const char *directory,
bool userns, uid_t uid_shift, uid_t uid_range,
const char *selinux_apifs_context) {
bool tmpfs_mounted = false, bind_mounted = false; bool tmpfs_mounted = false, bind_mounted = false;
char template[] = "/tmp/nspawn-volatile-XXXXXX"; char template[] = "/tmp/nspawn-volatile-XXXXXX";
@ -1091,10 +1080,7 @@ fail:
return r; return r;
} }
static int setup_volatile_overlay( static int setup_volatile_overlay(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
const char *directory,
bool userns, uid_t uid_shift, uid_t uid_range,
const char *selinux_apifs_context) {
_cleanup_free_ char *buf = NULL, *escaped_directory = NULL, *escaped_upper = NULL, *escaped_work = NULL; _cleanup_free_ char *buf = NULL, *escaped_directory = NULL, *escaped_upper = NULL, *escaped_work = NULL;
char template[] = "/tmp/nspawn-volatile-XXXXXX"; char template[] = "/tmp/nspawn-volatile-XXXXXX";
@ -1159,19 +1145,19 @@ finish:
int setup_volatile_mode( int setup_volatile_mode(
const char *directory, const char *directory,
VolatileMode mode, VolatileMode mode,
bool userns, uid_t uid_shift, uid_t uid_range, uid_t uid_shift,
const char *selinux_apifs_context) { const char *selinux_apifs_context) {
switch (mode) { switch (mode) {
case VOLATILE_YES: case VOLATILE_YES:
return setup_volatile_yes(directory, userns, uid_shift, uid_range, selinux_apifs_context); return setup_volatile_yes(directory, uid_shift, selinux_apifs_context);
case VOLATILE_STATE: case VOLATILE_STATE:
return setup_volatile_state(directory, userns, uid_shift, uid_range, selinux_apifs_context); return setup_volatile_state(directory, uid_shift, selinux_apifs_context);
case VOLATILE_OVERLAY: case VOLATILE_OVERLAY:
return setup_volatile_overlay(directory, userns, uid_shift, uid_range, selinux_apifs_context); return setup_volatile_overlay(directory, uid_shift, selinux_apifs_context);
default: default:
return 0; return 0;

View File

@ -14,6 +14,8 @@ typedef enum MountSettingsMask {
MOUNT_APPLY_APIVFS_NETNS = 1 << 4, /* if set, /proc/sys/net will be mounted read-write. MOUNT_APPLY_APIVFS_NETNS = 1 << 4, /* if set, /proc/sys/net will be mounted read-write.
Works only if MOUNT_APPLY_APIVFS_RO is also set. */ Works only if MOUNT_APPLY_APIVFS_RO is also set. */
MOUNT_APPLY_TMPFS_TMP = 1 << 5, /* if set, /tmp will be mounted as tmpfs */ MOUNT_APPLY_TMPFS_TMP = 1 << 5, /* if set, /tmp will be mounted as tmpfs */
MOUNT_ROOT_ONLY = 1 << 6, /* if set, only root mounts are mounted */
MOUNT_NON_ROOT_ONLY = 1 << 7, /* if set, only non-root mounts are mounted */
} MountSettingsMask; } MountSettingsMask;
typedef enum CustomMountType { typedef enum CustomMountType {
@ -52,9 +54,9 @@ int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s);
int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context); int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context);
int mount_sysfs(const char *dest, MountSettingsMask mount_settings); int mount_sysfs(const char *dest, MountSettingsMask mount_settings);
int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool in_userns); int mount_custom(const char *dest, CustomMount *mounts, size_t n, uid_t uid_shift, const char *selinux_apifs_context, MountSettingsMask mount_settings);
int setup_volatile_mode(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); int setup_volatile_mode(const char *directory, VolatileMode mode, uid_t uid_shift, const char *selinux_apifs_context);
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s); int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s);
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old); int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);

View File

@ -2975,11 +2975,9 @@ static int inner_child(
"/", "/",
arg_custom_mounts, arg_custom_mounts,
arg_n_custom_mounts, arg_n_custom_mounts,
false,
0,
0, 0,
arg_selinux_apifs_context, arg_selinux_apifs_context,
true); MOUNT_NON_ROOT_ONLY | MOUNT_IN_USERNS);
if (r < 0) if (r < 0)
return r; return r;
@ -3345,13 +3343,6 @@ static int outer_child(
return r; return r;
directory = "/run/systemd/nspawn-root"; directory = "/run/systemd/nspawn-root";
} else if (!dissected_image) {
/* Turn directory into bind mount (we need that so that we can move the bind mount to root
* later on). */
r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
if (r < 0)
return r;
} }
r = setup_pivot_root( r = setup_pivot_root(
@ -3364,13 +3355,28 @@ static int outer_child(
r = setup_volatile_mode( r = setup_volatile_mode(
directory, directory,
arg_volatile_mode, arg_volatile_mode,
arg_userns_mode != USER_NAMESPACE_NO,
arg_uid_shift, arg_uid_shift,
arg_uid_range,
arg_selinux_apifs_context); arg_selinux_apifs_context);
if (r < 0) if (r < 0)
return r; return r;
r = mount_custom(
directory,
arg_custom_mounts,
arg_n_custom_mounts,
arg_uid_shift,
arg_selinux_apifs_context,
MOUNT_ROOT_ONLY);
if (r < 0)
return r;
/* Make sure we always have a mount that we can move to root later on. */
if (!path_is_mount_point(directory, NULL, 0)) {
r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL);
if (r < 0)
return r;
}
if (dissected_image) { if (dissected_image) {
/* Now we know the uid shift, let's now mount everything else that might be in the image. */ /* Now we know the uid shift, let's now mount everything else that might be in the image. */
r = dissected_image_mount(dissected_image, directory, arg_uid_shift, r = dissected_image_mount(dissected_image, directory, arg_uid_shift,
@ -3401,7 +3407,12 @@ static int outer_child(
* inside the container that create a new mount namespace. * inside the container that create a new mount namespace.
* See https://github.com/systemd/systemd/issues/3860 * See https://github.com/systemd/systemd/issues/3860
* Further submounts (such as /dev) done after this will inherit the * Further submounts (such as /dev) done after this will inherit the
* shared propagation mode. */ * shared propagation mode.
*
* IMPORTANT: Do not overmount the root directory anymore from now on to
* enable moving the root directory mount to root later on.
* https://github.com/systemd/systemd/issues/3847#issuecomment-562735251
*/
r = mount_verbose(LOG_ERR, NULL, directory, NULL, MS_SHARED|MS_REC, NULL); r = mount_verbose(LOG_ERR, NULL, directory, NULL, MS_SHARED|MS_REC, NULL);
if (r < 0) if (r < 0)
return r; return r;
@ -3470,11 +3481,9 @@ static int outer_child(
directory, directory,
arg_custom_mounts, arg_custom_mounts,
arg_n_custom_mounts, arg_n_custom_mounts,
arg_userns_mode != USER_NAMESPACE_NO,
arg_uid_shift, arg_uid_shift,
arg_uid_range,
arg_selinux_apifs_context, arg_selinux_apifs_context,
false); MOUNT_NON_ROOT_ONLY);
if (r < 0) if (r < 0)
return r; return r;

View File

@ -279,4 +279,11 @@ TokenBufferFilterPeakRate=
TokenBufferFilterLatencySec= TokenBufferFilterLatencySec=
StochasticFairnessQueueingPerturbPeriodSec= StochasticFairnessQueueingPerturbPeriodSec=
FairQueuingControlledDelayPacketLimit= FairQueuingControlledDelayPacketLimit=
FairQueuingControlledDelayMemoryLimit=
FairQueuingControlledDelayFlows=
FairQueuingControlledDelayQuantum=
FairQueuingControlledDelayTargetSec=
FairQueuingControlledDelayIntervalSec=
FairQueuingControlledDelayCEThresholdSec=
FairQueuingControlledDelayECN=
FairQueueTrafficPolicingPacketLimit= FairQueueTrafficPolicingPacketLimit=

View File

@ -15,3 +15,10 @@ NetworkEmulatorPacketLimit=100
[TrafficControlQueueingDiscipline] [TrafficControlQueueingDiscipline]
Parent=ingress Parent=ingress
FairQueuingControlledDelayPacketLimit=20480 FairQueuingControlledDelayPacketLimit=20480
FairQueuingControlledDelayMemoryLimit=64M
FairQueuingControlledDelayFlows=2048
FairQueuingControlledDelayTargetSec=10ms
FairQueuingControlledDelayIntervalSec=200ms
FairQueuingControlledDelayQuantum=1400
FairQueuingControlledDelayECN=yes
FairQueuingControlledDelayCEThresholdSec=100ms

View File

@ -2095,7 +2095,7 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
self.assertRegex(output, 'qdisc netem') self.assertRegex(output, 'qdisc netem')
self.assertRegex(output, 'limit 100 delay 50.0ms 10.0ms loss 20%') self.assertRegex(output, 'limit 100 delay 50.0ms 10.0ms loss 20%')
self.assertRegex(output, 'qdisc fq_codel') self.assertRegex(output, 'qdisc fq_codel')
self.assertRegex(output, 'limit 20480p') self.assertRegex(output, 'limit 20480p flows 2048 quantum 1400 target 10.0ms ce_threshold 100.0ms interval 200.0ms memory_limit 64Mb ecn')
output = check_output('tc qdisc show dev test1') output = check_output('tc qdisc show dev test1')
print(output) print(output)
self.assertRegex(output, 'qdisc tbf') self.assertRegex(output, 'qdisc tbf')