Compare commits

..

20 Commits

Author SHA1 Message Date
Anita Zhang da4dd97405
Merge pull request #14173 from ssahani/tc-sfq
network: tc: introduce sfq and tbf
2019-12-04 16:14:11 -08:00
Anita Zhang adae5eb977
Merge pull request #14219 from poettering/homed-preparatory-loop
preparatory /dev/loopN support split out of homed PR
2019-12-04 16:07:41 -08:00
Yu Watanabe 0ab9279132 test-network: add a test case for SFQ 2019-12-04 20:58:33 +09:00
Yu Watanabe b2340fbb5a network: SFQ cannot be configured with netem or TBF 2019-12-04 20:58:33 +09:00
Susant Sahani 9942b71089 network: tc introduce sfq - Stochastic Fairness Queueing
Stochastic Fairness Queueing is a classless queueing discipline.
SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'.
The goal is to ensure fairness so that each flow is able to send data in turn,
thus preventing any single flow from drowning out the rest.
2019-12-04 20:58:17 +09:00
Yu Watanabe 1b628c4f64 test-network: add test case for TBF 2019-12-04 20:57:48 +09:00
Yu Watanabe f1dba55565 network: drop unnecessary headers 2019-12-04 20:57:48 +09:00
Yu Watanabe 6483f04381 network: make network_emulator_fill_message() take NetworkEmulator 2019-12-04 20:57:39 +09:00
Yu Watanabe edc54f2f75 network: rename QDiscs to QDisc 2019-12-04 20:57:01 +09:00
Yu Watanabe 8efb93f02d network: ignore sections which have both NetworkEmulator and TokenBufferFilter settings 2019-12-04 20:55:55 +09:00
Susant Sahani ba5841b520 networkd tc: introduce tbf
See https://linux.die.net/man/8/tc-tbf
2019-12-04 20:55:48 +09:00
Lennart Poettering 10c1b18888 valgrind: temporarily handle that valgrind still doesn't know LOOP_GET_STATUS64
Should be removed once valgrind learns it.
2019-12-02 10:06:56 +01:00
Lennart Poettering 50d046993b loop-util: if we fail to fully set up a loop device, detach it again 2019-12-02 10:06:45 +01:00
Lennart Poettering b26c39ad2c loop-util: fill in the loopback number, even a posteriori 2019-12-02 10:06:34 +01:00
Lennart Poettering f1443709e0 loop-util: optionally also resize partitions 2019-12-02 10:06:23 +01:00
Lennart Poettering 441ec80468 loop-util: add api for locking the block device with flock() 2019-12-02 10:06:12 +01:00
Lennart Poettering c37878fced loop-util: allow refreshing offset 2019-12-02 10:06:00 +01:00
Lennart Poettering ed9eeb7b0b loop-util: allow creating loopback block devices with offset/length 2019-12-02 10:05:49 +01:00
Lennart Poettering 9dabc4fda5 loop-util: add API to refresh loopback device size and opening existing loopback block devices 2019-12-02 10:05:38 +01:00
Lennart Poettering e08f94acf5 loop-util: accept loopback flags when creating loopback device
This way callers can choose if they want partition scanning or not.
2019-12-02 10:05:09 +01:00
27 changed files with 783 additions and 95 deletions

View File

@ -2368,6 +2368,40 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>TokenBufferFilterLatencySec=</varname></term>
<listitem>
<para>Specifies the latency parameter, which specifies the maximum amount of time a
packet can sit in the Token Buffer Filter (TBF). Defaults to unset.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>TokenBufferFilterBurst=</varname></term>
<listitem>
<para>Specifies the size of the bucket. This is the maximum amount of bytes that tokens
can be available for instantaneous transfer. When the size is suffixed with K, M, or G, it is
parsed as Kilobytes, Megabytes, or Gigabytes, respectively, to the base of 1000. Defaults to
unset.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>TokenBufferFilterRate=</varname></term>
<listitem>
<para>Specifies the device specific bandwidth. When suffixed with K, M, or G, the specified
bandwidth is parsed as Kilobytes, Megabytes, or Gigabytes, respectively, to the base of 1000.
Defaults to unset.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>StochasticFairnessQueueingPerturbPeriodSec=</varname></term>
<listitem>
<para>Specifies the interval in seconds for queue algorithm perturbation. Defaults to unset.</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <errno.h>
#include <linux/loop.h>
#include <sched.h>
#include <stdio.h>
#include <sys/mount.h>
@ -1220,6 +1221,7 @@ int setup_namespace(
r = loop_device_make_by_path(root_image,
dissect_image_flags & DISSECT_IMAGE_READ_ONLY ? O_RDONLY : O_RDWR,
LO_FLAGS_PARTSCAN,
&loop_device);
if (r < 0)
return log_debug_errno(r, "Failed to create loop device for root image: %m");

View File

@ -1,8 +1,9 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <fcntl.h>
#include <stdio.h>
#include <getopt.h>
#include <linux/loop.h>
#include <stdio.h>
#include "architecture.h"
#include "dissect-image.h"
@ -171,7 +172,7 @@ static int run(int argc, char *argv[]) {
if (r <= 0)
return r;
r = loop_device_make_by_path(arg_image, (arg_flags & DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : O_RDWR, &d);
r = loop_device_make_by_path(arg_image, (arg_flags & DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : O_RDWR, LO_FLAGS_PARTSCAN, &d);
if (r < 0)
return log_error_errno(r, "Failed to set up loopback device: %m");

View File

@ -532,7 +532,6 @@ int sd_netlink_message_open_array(sd_netlink_message *m, uint16_t type) {
assert_return(m, -EINVAL);
assert_return(!m->sealed, -EPERM);
assert_return(m->n_containers > 0, -EINVAL);
r = add_rtattr(m, type | NLA_F_NESTED, NULL, 0);
if (r < 0)

View File

@ -109,6 +109,10 @@ sources = files('''
tc/netem.h
tc/qdisc.c
tc/qdisc.h
tc/sfq.c
tc/sfq.h
tc/tbf.c
tc/tbf.h
tc/tc-util.c
tc/tc-util.h
'''.split())

View File

@ -2585,7 +2585,7 @@ static int link_drop_config(Link *link) {
}
static int link_configure_qdiscs(Link *link) {
QDiscs *qdisc;
QDisc *qdisc;
Iterator i;
int r;
@ -2601,7 +2601,7 @@ static int link_configure_qdiscs(Link *link) {
if (link->qdisc_messages == 0)
link->qdiscs_configured = true;
else
log_link_debug(link, "Configuring QDiscs");
log_link_debug(link, "Configuring queuing discipline (qdisc)");
return 0;
}

View File

@ -244,12 +244,16 @@ CAN.BitRate, config_parse_si_size,
CAN.SamplePoint, config_parse_permille, 0, offsetof(Network, can_sample_point)
CAN.RestartSec, config_parse_sec, 0, offsetof(Network, can_restart_us)
CAN.TripleSampling, config_parse_tristate, 0, offsetof(Network, can_triple_sampling)
TrafficControlQueueingDiscipline.Parent, config_parse_tc_qdiscs_parent, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorDelaySec, config_parse_tc_network_emulator_delay, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorDelayJitterSec, config_parse_tc_network_emulator_delay, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorLossRate, config_parse_tc_network_emulator_rate, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorDuplicateRate, config_parse_tc_network_emulator_rate, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorPacketLimit, config_parse_tc_network_emulator_packet_limit, 0, 0
TrafficControlQueueingDiscipline.Parent, config_parse_tc_qdiscs_parent, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorDelaySec, config_parse_tc_network_emulator_delay, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorDelayJitterSec, config_parse_tc_network_emulator_delay, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorLossRate, config_parse_tc_network_emulator_rate, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorDuplicateRate, config_parse_tc_network_emulator_rate, 0, 0
TrafficControlQueueingDiscipline.NetworkEmulatorPacketLimit, config_parse_tc_network_emulator_packet_limit, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterRate, config_parse_tc_token_buffer_filter_size, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterBurst, config_parse_tc_token_buffer_filter_size, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterLatencySec, config_parse_tc_token_buffer_filter_latency, 0, 0
TrafficControlQueueingDiscipline.StochasticFairnessQueueingPerturbPeriodSec, config_parse_tc_stochastic_fairness_queueing_perturb_period, 0, 0
/* backwards compatibility: do not add new entries to this section */
Network.IPv4LL, config_parse_ipv4ll, 0, offsetof(Network, link_local)
DHCP.ClientIdentifier, config_parse_dhcp_client_identifier, 0, offsetof(Network, dhcp_client_identifier)

View File

@ -154,6 +154,8 @@ int network_verify(Network *network) {
Prefix *prefix, *prefix_next;
Route *route, *route_next;
FdbEntry *fdb, *fdb_next;
QDisc *qdisc;
Iterator i;
assert(network);
assert(network->filename);
@ -313,6 +315,11 @@ int network_verify(Network *network) {
if (routing_policy_rule_section_verify(rule) < 0)
routing_policy_rule_free(rule);
bool has_root = false, has_clsact = false;
ORDERED_HASHMAP_FOREACH(qdisc, network->qdiscs_by_section, i)
if (qdisc_section_verify(qdisc, &has_root, &has_clsact) < 0)
qdisc_free(qdisc);
return 0;
}

View File

@ -2,12 +2,9 @@
* Copyright © 2019 VMware, Inc. */
#include <linux/pkt_sched.h>
#include <math.h>
#include "alloc-util.h"
#include "conf-parser.h"
#include "hashmap.h"
#include "in-addr-util.h"
#include "netem.h"
#include "netlink-util.h"
#include "networkd-manager.h"
@ -15,7 +12,6 @@
#include "qdisc.h"
#include "string-util.h"
#include "tc-util.h"
#include "util.h"
int network_emulator_new(NetworkEmulator **ret) {
NetworkEmulator *ne = NULL;
@ -34,33 +30,33 @@ int network_emulator_new(NetworkEmulator **ret) {
return 0;
}
int network_emulator_fill_message(Link *link, QDiscs *qdisc, sd_netlink_message *req) {
int network_emulator_fill_message(Link *link, const NetworkEmulator *ne, sd_netlink_message *req) {
struct tc_netem_qopt opt = {
.limit = 1000,
};
int r;
assert(link);
assert(qdisc);
assert(ne);
assert(req);
if (qdisc->ne.limit > 0)
opt.limit = qdisc->ne.limit;
if (ne->limit > 0)
opt.limit = ne->limit;
if (qdisc->ne.loss > 0)
opt.loss = qdisc->ne.loss;
if (ne->loss > 0)
opt.loss = ne->loss;
if (qdisc->ne.duplicate > 0)
opt.duplicate = qdisc->ne.duplicate;
if (ne->duplicate > 0)
opt.duplicate = ne->duplicate;
if (qdisc->ne.delay != USEC_INFINITY) {
r = tc_time_to_tick(qdisc->ne.delay, &opt.latency);
if (ne->delay != USEC_INFINITY) {
r = tc_time_to_tick(ne->delay, &opt.latency);
if (r < 0)
return log_link_error_errno(link, r, "Failed to calculate latency in TCA_OPTION: %m");
}
if (qdisc->ne.jitter != USEC_INFINITY) {
r = tc_time_to_tick(qdisc->ne.jitter, &opt.jitter);
if (ne->jitter != USEC_INFINITY) {
r = tc_time_to_tick(ne->jitter, &opt.jitter);
if (r < 0)
return log_link_error_errno(link, r, "Failed to calculate jitter in TCA_OPTION: %m");
}
@ -84,7 +80,7 @@ int config_parse_tc_network_emulator_delay(
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
Network *network = data;
usec_t u;
int r;
@ -139,7 +135,7 @@ int config_parse_tc_network_emulator_rate(
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
Network *network = data;
uint32_t rate;
int r;
@ -189,7 +185,7 @@ int config_parse_tc_network_emulator_packet_limit(
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
Network *network = data;
int r;

View File

@ -8,8 +8,6 @@
#include "networkd-link.h"
#include "time-util.h"
typedef struct QDiscs QDiscs;
typedef struct NetworkEmulator {
usec_t delay;
usec_t jitter;
@ -20,7 +18,7 @@ typedef struct NetworkEmulator {
} NetworkEmulator;
int network_emulator_new(NetworkEmulator **ret);
int network_emulator_fill_message(Link *link, QDiscs *qdisc, sd_netlink_message *req);
int network_emulator_fill_message(Link *link, const NetworkEmulator *ne, sd_netlink_message *req);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_network_emulator_delay);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_network_emulator_rate);

View File

@ -12,16 +12,15 @@
#include "qdisc.h"
#include "set.h"
#include "string-util.h"
#include "util.h"
static int qdisc_new(QDiscs **ret) {
QDiscs *qdisc;
static int qdisc_new(QDisc **ret) {
QDisc *qdisc;
qdisc = new(QDiscs, 1);
qdisc = new(QDisc, 1);
if (!qdisc)
return -ENOMEM;
*qdisc = (QDiscs) {
*qdisc = (QDisc) {
.family = AF_UNSPEC,
.parent = TC_H_ROOT,
};
@ -31,9 +30,9 @@ static int qdisc_new(QDiscs **ret) {
return 0;
}
int qdisc_new_static(Network *network, const char *filename, unsigned section_line, QDiscs **ret) {
int qdisc_new_static(Network *network, const char *filename, unsigned section_line, QDisc **ret) {
_cleanup_(network_config_section_freep) NetworkConfigSection *n = NULL;
_cleanup_(qdisc_freep) QDiscs *qdisc = NULL;
_cleanup_(qdisc_freep) QDisc *qdisc = NULL;
int r;
assert(network);
@ -76,7 +75,7 @@ int qdisc_new_static(Network *network, const char *filename, unsigned section_li
return 0;
}
void qdisc_free(QDiscs *qdisc) {
void qdisc_free(QDisc *qdisc) {
if (!qdisc)
return;
@ -106,7 +105,7 @@ static int qdisc_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
}
if (link->route_messages == 0) {
log_link_debug(link, "QDiscs configured");
log_link_debug(link, "QDisc configured");
link->qdiscs_configured = true;
link_check_ready(link);
}
@ -114,7 +113,7 @@ static int qdisc_handler(sd_netlink *rtnl, sd_netlink_message *m, Link *link) {
return 1;
}
int qdisc_configure(Link *link, QDiscs *qdisc) {
int qdisc_configure(Link *link, QDisc *qdisc) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
_cleanup_free_ char *tca_kind = NULL;
int r;
@ -147,7 +146,27 @@ int qdisc_configure(Link *link, QDiscs *qdisc) {
if (r < 0)
return log_oom();
r = network_emulator_fill_message(link, qdisc, req);
r = network_emulator_fill_message(link, &qdisc->ne, req);
if (r < 0)
return r;
}
if (qdisc->has_token_buffer_filter) {
r = free_and_strdup(&tca_kind, "tbf");
if (r < 0)
return log_oom();
r = token_buffer_filter_fill_message(link, &qdisc->tbf, req);
if (r < 0)
return r;
}
if (qdisc->has_stochastic_fairness_queueing) {
r = free_and_strdup(&tca_kind, "sfq");
if (r < 0)
return log_oom();
r = stochastic_fairness_queueing_fill_message(link, &qdisc->sfq, req);
if (r < 0)
return r;
}
@ -168,6 +187,42 @@ int qdisc_configure(Link *link, QDiscs *qdisc) {
return 0;
}
int qdisc_section_verify(QDisc *qdisc, bool *has_root, bool *has_clsact) {
unsigned i;
assert(qdisc);
assert(has_root);
assert(has_clsact);
if (section_is_invalid(qdisc->section))
return -EINVAL;
i = qdisc->has_network_emulator + qdisc->has_token_buffer_filter + qdisc->has_stochastic_fairness_queueing;
if (i > 1)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"%s: TrafficControlQueueingDiscipline section has more than one type of discipline. "
"Ignoring [TrafficControlQueueingDiscipline] section from line %u.",
qdisc->section->filename, qdisc->section->line);
if (qdisc->parent == TC_H_ROOT) {
if (*has_root)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"%s: More than one root TrafficControlQueueingDiscipline sections are defined. "
"Ignoring [TrafficControlQueueingDiscipline] section from line %u.",
qdisc->section->filename, qdisc->section->line);
*has_root = true;
} else if (qdisc->parent == TC_H_CLSACT) {
if (*has_clsact)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"%s: More than one clsact TrafficControlQueueingDiscipline sections are defined. "
"Ignoring [TrafficControlQueueingDiscipline] section from line %u.",
qdisc->section->filename, qdisc->section->line);
*has_clsact = true;
}
return 0;
}
int config_parse_tc_qdiscs_parent(
const char *unit,
const char *filename,
@ -180,7 +235,7 @@ int config_parse_tc_qdiscs_parent(
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDiscs *qdisc = NULL;
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
Network *network = data;
int r;

View File

@ -7,8 +7,10 @@
#include "networkd-link.h"
#include "networkd-network.h"
#include "networkd-util.h"
#include "sfq.h"
#include "tbf.h"
typedef struct QDiscs {
typedef struct QDisc {
NetworkConfigSection *section;
Network *network;
@ -20,15 +22,21 @@ typedef struct QDiscs {
uint32_t parent;
bool has_network_emulator:1;
bool has_token_buffer_filter:1;
bool has_stochastic_fairness_queueing:1;
NetworkEmulator ne;
} QDiscs;
TokenBufferFilter tbf;
StochasticFairnessQueueing sfq;
} QDisc;
void qdisc_free(QDiscs *qdisc);
int qdisc_new_static(Network *network, const char *filename, unsigned section_line, QDiscs **ret);
void qdisc_free(QDisc *qdisc);
int qdisc_new_static(Network *network, const char *filename, unsigned section_line, QDisc **ret);
int qdisc_configure(Link *link, QDiscs *qdisc);
int qdisc_configure(Link *link, QDisc *qdisc);
DEFINE_NETWORK_SECTION_FUNCTIONS(QDiscs, qdisc_free);
int qdisc_section_verify(QDisc *qdisc, bool *has_root, bool *has_clsact);
DEFINE_NETWORK_SECTION_FUNCTIONS(QDisc, qdisc_free);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_qdiscs_parent);

87
src/network/tc/sfq.c Normal file
View File

@ -0,0 +1,87 @@
/* SPDX-License-Identifier: LGPL-2.1+
* Copyright © 2019 VMware, Inc. */
#include <linux/pkt_sched.h>
#include "alloc-util.h"
#include "conf-parser.h"
#include "netlink-util.h"
#include "parse-util.h"
#include "qdisc.h"
#include "sfq.h"
#include "string-util.h"
int stochastic_fairness_queueing_new(StochasticFairnessQueueing **ret) {
StochasticFairnessQueueing *sfq = NULL;
sfq = new0(StochasticFairnessQueueing, 1);
if (!sfq)
return -ENOMEM;
*ret = TAKE_PTR(sfq);
return 0;
}
int stochastic_fairness_queueing_fill_message(Link *link, const StochasticFairnessQueueing *sfq, sd_netlink_message *req) {
struct tc_sfq_qopt_v1 opt = {};
int r;
assert(link);
assert(sfq);
assert(req);
opt.v0.perturb_period = sfq->perturb_period / USEC_PER_SEC;
r = sd_netlink_message_append_data(req, TCA_OPTIONS, &opt, sizeof(struct tc_sfq_qopt_v1));
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_OPTIONS attribute: %m");
return 0;
}
int config_parse_tc_stochastic_fairness_queueing_perturb_period(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
Network *network = data;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
r = qdisc_new_static(network, filename, section_line, &qdisc);
if (r < 0)
return r;
if (isempty(rvalue)) {
qdisc->sfq.perturb_period = 0;
qdisc = NULL;
return 0;
}
r = parse_sec(rvalue, &qdisc->sfq.perturb_period);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to parse '%s=', ignoring assignment: %s",
lvalue, rvalue);
return 0;
}
qdisc->has_stochastic_fairness_queueing = true;
qdisc = NULL;
return 0;
}

17
src/network/tc/sfq.h Normal file
View File

@ -0,0 +1,17 @@
/* SPDX-License-Identifier: LGPL-2.1+
* Copyright © 2019 VMware, Inc. */
#pragma once
#include "sd-netlink.h"
#include "conf-parser.h"
#include "networkd-link.h"
typedef struct StochasticFairnessQueueing {
usec_t perturb_period;
} StochasticFairnessQueueing;
int stochastic_fairness_queueing_new(StochasticFairnessQueueing **ret);
int stochastic_fairness_queueing_fill_message(Link *link, const StochasticFairnessQueueing *sfq, sd_netlink_message *req);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_stochastic_fairness_queueing_perturb_period);

167
src/network/tc/tbf.c Normal file
View File

@ -0,0 +1,167 @@
/* SPDX-License-Identifier: LGPL-2.1+
* Copyright © 2019 VMware, Inc. */
#include <linux/pkt_sched.h>
#include <math.h>
#include "alloc-util.h"
#include "conf-parser.h"
#include "netem.h"
#include "netlink-util.h"
#include "networkd-manager.h"
#include "parse-util.h"
#include "qdisc.h"
#include "string-util.h"
#include "util.h"
int token_buffer_filter_new(TokenBufferFilter **ret) {
TokenBufferFilter *ne = NULL;
ne = new0(TokenBufferFilter, 1);
if (!ne)
return -ENOMEM;
*ret = TAKE_PTR(ne);
return 0;
}
int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, sd_netlink_message *req) {
struct tc_tbf_qopt opt = {};
int r;
assert(link);
assert(tbf);
assert(req);
opt.rate.rate = tbf->rate >= (1ULL << 32) ? ~0U : tbf->rate;
opt.limit = tbf->rate * (double) tbf->latency / USEC_PER_SEC + tbf->burst;
r = sd_netlink_message_open_array(req, TCA_OPTIONS);
if (r < 0)
return log_link_error_errno(link, r, "Could not open container TCA_OPTIONS: %m");
r = sd_netlink_message_append_data(req, TCA_TBF_PARMS, &opt, sizeof(struct tc_tbf_qopt));
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_TBF_PARMS attribute: %m");
r = sd_netlink_message_append_data(req, TCA_TBF_BURST, &tbf->burst, sizeof(tbf->burst));
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_TBF_BURST attribute: %m");
if (tbf->rate >= (1ULL << 32)) {
r = sd_netlink_message_append_data(req, TCA_TBF_RATE64, &tbf->rate, sizeof(tbf->rate));
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_TBF_RATE64 attribute: %m");
}
r = sd_netlink_message_close_container(req);
if (r < 0)
return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
return 0;
}
int config_parse_tc_token_buffer_filter_size(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
Network *network = data;
uint64_t k;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
r = qdisc_new_static(network, filename, section_line, &qdisc);
if (r < 0)
return r;
if (isempty(rvalue)) {
if (streq(lvalue, "TokenBufferFilterRate"))
qdisc->tbf.rate = 0;
else if (streq(lvalue, "TokenBufferFilterBurst"))
qdisc->tbf.burst = 0;
qdisc = NULL;
return 0;
}
r = parse_size(rvalue, 1000, &k);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to parse '%s=', ignoring assignment: %s",
lvalue, rvalue);
return 0;
}
if (streq(lvalue, "TokenBufferFilterRate"))
qdisc->tbf.rate = k / 8;
else if (streq(lvalue, "TokenBufferFilterBurst"))
qdisc->tbf.burst = k;
qdisc->has_token_buffer_filter = true;
qdisc = NULL;
return 0;
}
int config_parse_tc_token_buffer_filter_latency(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(qdisc_free_or_set_invalidp) QDisc *qdisc = NULL;
Network *network = data;
usec_t u;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
r = qdisc_new_static(network, filename, section_line, &qdisc);
if (r < 0)
return r;
if (isempty(rvalue)) {
qdisc->tbf.latency = 0;
qdisc = NULL;
return 0;
}
r = parse_sec(rvalue, &u);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r,
"Failed to parse '%s=', ignoring assignment: %s",
lvalue, rvalue);
return 0;
}
qdisc->tbf.latency = u;
qdisc->has_token_buffer_filter = true;
qdisc = NULL;
return 0;
}

21
src/network/tc/tbf.h Normal file
View File

@ -0,0 +1,21 @@
/* SPDX-License-Identifier: LGPL-2.1+
* Copyright © 2019 VMware, Inc. */
#pragma once
#include "sd-netlink.h"
#include "conf-parser.h"
#include "networkd-link.h"
typedef struct TokenBufferFilter {
uint64_t rate;
uint32_t burst;
uint32_t latency;
} TokenBufferFilter;
int token_buffer_filter_new(TokenBufferFilter **ret);
int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, sd_netlink_message *req);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_token_buffer_filter_latency);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_token_buffer_filter_size);

View File

@ -5036,7 +5036,7 @@ static int run(int argc, char *argv[]) {
goto finish;
}
r = loop_device_make_by_path(arg_image, arg_read_only ? O_RDONLY : O_RDWR, &loop);
r = loop_device_make_by_path(arg_image, arg_read_only ? O_RDONLY : O_RDWR, LO_FLAGS_PARTSCAN, &loop);
if (r < 0) {
log_error_errno(r, "Failed to set up loopback block device: %m");
goto finish;

View File

@ -1,5 +1,7 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <linux/loop.h>
#include "bus-common-errors.h"
#include "bus-error.h"
#include "conf-files.h"
@ -359,7 +361,7 @@ static int portable_extract_by_path(
assert(path);
r = loop_device_make_by_path(path, O_RDONLY, &d);
r = loop_device_make_by_path(path, O_RDONLY, LO_FLAGS_PARTSCAN, &d);
if (r == -EISDIR) {
/* We can't turn this into a loop-back block device, and this returns EISDIR? Then this is a directory
* tree and not a raw device. It's easy then. */

View File

@ -1,5 +1,9 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#if HAVE_VALGRIND_MEMCHECK_H
#include <valgrind/memcheck.h>
#endif
#include <linux/dm-ioctl.h>
#include <linux/loop.h>
#include <sys/mount.h>
@ -215,9 +219,15 @@ static int wait_for_partitions_to_appear(
* an explicit recognizable error about this, so that callers can generate a
* proper message explaining the situation. */
if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
log_debug("Device is a loop device and partition scanning is off!");
return -EPROTONOSUPPORT;
if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
#if HAVE_VALGRIND_MEMCHECK_H
/* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
#endif
if ((info.lo_flags & LO_FLAGS_PARTSCAN) == 0)
return log_debug_errno(EPROTONOSUPPORT,
"Device is a loop device and partition scanning is off!");
}
}
if (r != -EBUSY)

View File

@ -1,26 +1,40 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#if HAVE_VALGRIND_MEMCHECK_H
#include <valgrind/memcheck.h>
#endif
#include <errno.h>
#include <fcntl.h>
#include <linux/blkpg.h>
#include <linux/fs.h>
#include <linux/loop.h>
#include <sys/file.h>
#include <sys/ioctl.h>
#include "alloc-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "loop-util.h"
#include "parse-util.h"
#include "stat-util.h"
#include "stdio-util.h"
int loop_device_make(int fd, int open_flags, LoopDevice **ret) {
const struct loop_info64 info = {
.lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN|(open_flags == O_RDONLY ? LO_FLAGS_READ_ONLY : 0),
};
int loop_device_make_full(
int fd,
int open_flags,
uint64_t offset,
uint64_t size,
uint32_t loop_flags,
LoopDevice **ret) {
_cleanup_close_ int control = -1, loop = -1;
_cleanup_free_ char *loopdev = NULL;
unsigned n_attempts = 0;
struct loop_info64 info;
LoopDevice *d = NULL;
struct stat st;
LoopDevice *d;
int nr, r;
int nr = -1, r;
assert(fd >= 0);
assert(ret);
@ -30,32 +44,48 @@ int loop_device_make(int fd, int open_flags, LoopDevice **ret) {
return -errno;
if (S_ISBLK(st.st_mode)) {
int copy;
if (ioctl(loop, LOOP_GET_STATUS64, &info) >= 0) {
/* Oh! This is a loopback device? That's interesting! */
/* If this is already a block device, store a copy of the fd as it is */
#if HAVE_VALGRIND_MEMCHECK_H
/* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
#endif
nr = info.lo_number;
copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
if (copy < 0)
return -errno;
if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
return -ENOMEM;
}
d = new0(LoopDevice, 1);
if (!d)
return -ENOMEM;
if (offset == 0 && IN_SET(size, 0, UINT64_MAX)) {
int copy;
*d = (LoopDevice) {
.fd = copy,
.nr = -1,
.relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */
};
/* If this is already a block device, store a copy of the fd as it is */
*ret = d;
return d->fd;
copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
if (copy < 0)
return -errno;
d = new(LoopDevice, 1);
if (!d)
return -ENOMEM;
*d = (LoopDevice) {
.fd = copy,
.nr = nr,
.node = TAKE_PTR(loopdev),
.relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */
};
*ret = d;
return d->fd;
}
} else {
r = stat_verify_regular(&st);
if (r < 0)
return r;
}
r = stat_verify_regular(&st);
if (r < 0)
return r;
control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
if (control < 0)
return -errno;
@ -86,12 +116,23 @@ int loop_device_make(int fd, int open_flags, LoopDevice **ret) {
loop = safe_close(loop);
}
if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0)
return -errno;
info = (struct loop_info64) {
/* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
.lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((loop_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
.lo_offset = offset,
.lo_sizelimit = size == UINT64_MAX ? 0 : size,
};
if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) {
r = -errno;
goto fail;
}
d = new(LoopDevice, 1);
if (!d)
return -ENOMEM;
if (!d) {
r = -ENOMEM;
goto fail;
}
*d = (LoopDevice) {
.fd = TAKE_FD(loop),
@ -101,9 +142,17 @@ int loop_device_make(int fd, int open_flags, LoopDevice **ret) {
*ret = d;
return d->fd;
fail:
if (fd >= 0)
(void) ioctl(fd, LOOP_CLR_FD);
if (d && d->fd >= 0)
(void) ioctl(d->fd, LOOP_CLR_FD);
return r;
}
int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret) {
int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret) {
_cleanup_close_ int fd = -1;
assert(path);
@ -114,7 +163,7 @@ int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret)
if (fd < 0)
return -errno;
return loop_device_make(fd, open_flags, ret);
return loop_device_make(fd, open_flags, loop_flags, ret);
}
LoopDevice* loop_device_unref(LoopDevice *d) {
@ -156,3 +205,190 @@ void loop_device_relinquish(LoopDevice *d) {
d->relinquished = true;
}
int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret) {
_cleanup_close_ int loop_fd = -1;
_cleanup_free_ char *p = NULL;
struct loop_info64 info;
struct stat st;
LoopDevice *d;
int nr;
assert(loop_path);
assert(ret);
loop_fd = open(loop_path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
if (loop_fd < 0)
return -errno;
if (fstat(loop_fd, &st) < 0)
return -errno;
if (!S_ISBLK(st.st_mode))
return -ENOTBLK;
if (ioctl(loop_fd, LOOP_GET_STATUS64, &info) >= 0) {
#if HAVE_VALGRIND_MEMCHECK_H
/* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
#endif
nr = info.lo_number;
} else
nr = -1;
p = strdup(loop_path);
if (!p)
return -ENOMEM;
d = new(LoopDevice, 1);
if (!d)
return -ENOMEM;
*d = (LoopDevice) {
.fd = TAKE_FD(loop_fd),
.nr = nr,
.node = TAKE_PTR(p),
.relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
};
*ret = d;
return d->fd;
}
static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
_cleanup_free_ char *whole = NULL, *buffer = NULL;
uint64_t current_offset, current_size, partno;
_cleanup_close_ int whole_fd = -1;
struct stat st;
dev_t devno;
int r;
assert(partition_fd >= 0);
/* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
* loopback device), and changes the offset, if needed. This is a fancy wrapper around
* BLKPG_RESIZE_PARTITION. */
if (fstat(partition_fd, &st) < 0)
return -errno;
assert(S_ISBLK(st.st_mode));
xsprintf(sysfs, "/sys/dev/block/%u:%u/partition", major(st.st_rdev), minor(st.st_rdev));
r = read_one_line_file(sysfs, &buffer);
if (r == -ENOENT) /* not a partition, cannot resize */
return -ENOTTY;
if (r < 0)
return r;
r = safe_atou64(buffer, &partno);
if (r < 0)
return r;
xsprintf(sysfs, "/sys/dev/block/%u:%u/start", major(st.st_rdev), minor(st.st_rdev));
buffer = mfree(buffer);
r = read_one_line_file(sysfs, &buffer);
if (r < 0)
return r;
r = safe_atou64(buffer, &current_offset);
if (r < 0)
return r;
if (current_offset > UINT64_MAX/512U)
return -EINVAL;
current_offset *= 512U;
if (ioctl(partition_fd, BLKGETSIZE64, &current_size) < 0)
return -EINVAL;
if (size == UINT64_MAX && offset == UINT64_MAX)
return 0;
if (current_size == size && current_offset == offset)
return 0;
xsprintf(sysfs, "/sys/dev/block/%u:%u/../dev", major(st.st_rdev), minor(st.st_rdev));
buffer = mfree(buffer);
r = read_one_line_file(sysfs, &buffer);
if (r < 0)
return r;
r = parse_dev(buffer, &devno);
if (r < 0)
return r;
r = device_path_make_major_minor(S_IFBLK, devno, &whole);
if (r < 0)
return r;
whole_fd = open(whole, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
if (whole_fd < 0)
return -errno;
struct blkpg_partition bp = {
.pno = partno,
.start = offset == UINT64_MAX ? current_offset : offset,
.length = size == UINT64_MAX ? current_size : size,
};
struct blkpg_ioctl_arg ba = {
.op = BLKPG_RESIZE_PARTITION,
.data = &bp,
.datalen = sizeof(bp),
};
if (ioctl(whole_fd, BLKPG, &ba) < 0)
return -errno;
return 0;
}
int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
struct loop_info64 info;
assert(d);
/* Changes the offset/start of the loop device relative to the beginning of the underlying file or
* block device. If this loop device actually refers to a partition and not a loopback device, we'll
* try to adjust the partition offsets instead.
*
* If either offset or size is UINT64_MAX we won't change that parameter. */
if (d->fd < 0)
return -EBADF;
if (d->nr < 0) /* not a loopback device */
return resize_partition(d->fd, offset, size);
if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
return -errno;
#if HAVE_VALGRIND_MEMCHECK_H
/* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
#endif
if (size == UINT64_MAX && offset == UINT64_MAX)
return 0;
if (info.lo_sizelimit == size && info.lo_offset == offset)
return 0;
if (size != UINT64_MAX)
info.lo_sizelimit = size;
if (offset != UINT64_MAX)
info.lo_offset = offset;
if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
return -errno;
return 0;
}
int loop_device_flock(LoopDevice *d, int operation) {
assert(d);
if (d->fd < 0)
return -EBADF;
if (flock(d->fd, operation) < 0)
return -errno;
return 0;
}

View File

@ -14,10 +14,19 @@ struct LoopDevice {
bool relinquished;
};
int loop_device_make(int fd, int open_flags, LoopDevice **ret);
int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret);
int loop_device_make_full(int fd, int open_flags, uint64_t offset, uint64_t size, uint32_t loop_flags, LoopDevice **ret);
static inline int loop_device_make(int fd, int open_flags, uint32_t loop_flags, LoopDevice **ret) {
return loop_device_make_full(fd, open_flags, 0, 0, loop_flags, ret);
}
int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret);
int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret);
LoopDevice* loop_device_unref(LoopDevice *d);
DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref);
void loop_device_relinquish(LoopDevice *d);
int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size);
int loop_device_flock(LoopDevice *d, int operation);

View File

@ -2,13 +2,14 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/fs.h>
#include <linux/loop.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <linux/fs.h>
#include "alloc-util.h"
#include "btrfs-util.h"
@ -1166,7 +1167,7 @@ int image_read_metadata(Image *i) {
_cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
_cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
r = loop_device_make_by_path(i->path, O_RDONLY, &d);
r = loop_device_make_by_path(i->path, O_RDONLY, LO_FLAGS_PARTSCAN, &d);
if (r < 0)
return r;

View File

@ -1,6 +1,7 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
#include <fcntl.h>
#include <linux/loop.h>
#include <stdio.h>
#include "dissect-image.h"
@ -21,7 +22,7 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
r = loop_device_make_by_path(argv[1], O_RDONLY, &d);
r = loop_device_make_by_path(argv[1], O_RDONLY, LO_FLAGS_PARTSCAN, &d);
if (r < 0) {
log_error_errno(r, "Failed to set up loopback device: %m");
return EXIT_FAILURE;

View File

@ -270,3 +270,7 @@ NetworkEmulatorDelayJitterSec=
NetworkEmulatorLossRate=
NetworkEmulatorDuplicateRate=
NetworkEmulatorPacketLimit=
TokenBufferFilterRate=
TokenBufferFilterBurst=
TokenBufferFilterLatencySec=
StochasticFairnessQueueingPerturbPeriodSec=

View File

@ -0,0 +1,16 @@
[Match]
Name=test1
[Network]
IPv6AcceptRA=no
Address=10.1.2.4/16
[TrafficControlQueueingDiscipline]
Parent=root
TokenBufferFilterRate=0.5M
TokenBufferFilterBurst=5K
TokenBufferFilterLatencySec=70msec
[TrafficControlQueueingDiscipline]
Parent=clsact
StochasticFairnessQueueingPerturbPeriodSec=5sec

View File

@ -1498,7 +1498,8 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
'25-neighbor-ip-dummy.network',
'25-neighbor-ip.network',
'25-nexthop.network',
'25-qdisc.network',
'25-qdisc-netem.network',
'25-qdisc-tbf-and-sfq.network',
'25-route-ipv6-src.network',
'25-route-static.network',
'25-gateway-static.network',
@ -2057,15 +2058,23 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
self.assertRegex(output, '192.168.5.1')
def test_qdisc(self):
copy_unit_to_networkd_unit_path('25-qdisc.network', '12-dummy.netdev')
copy_unit_to_networkd_unit_path('25-qdisc-netem.network', '12-dummy.netdev',
'25-qdisc-tbf-and-sfq.network', '11-dummy.netdev')
start_networkd()
self.wait_online(['dummy98:routable'])
self.wait_online(['dummy98:routable', 'test1:routable'])
output = check_output('tc qdisc show dev dummy98')
print(output)
self.assertRegex(output, 'qdisc netem')
self.assertRegex(output, 'limit 100 delay 50.0ms 10.0ms loss 20%')
self.assertRegex(output, 'limit 200 delay 100.0ms 13.0ms loss 20.5%')
output = check_output('tc qdisc show dev test1')
print(output)
self.assertRegex(output, 'qdisc tbf')
self.assertRegex(output, 'rate 500Kbit burst 5000b lat 70.0ms')
self.assertRegex(output, 'qdisc sfq')
self.assertRegex(output, 'perturb 5sec')
class NetworkdStateFileTests(unittest.TestCase, Utilities):
links = [