Compare commits

...

8 Commits

Author SHA1 Message Date
Yu Watanabe bfb68540d5
Merge pull request #14280 from yuwata/network-tbf-more
network: tc: add more options for TBF
2019-12-11 00:02:43 +09:00
Yu Watanabe 393f783ed3
Merge pull request #14295 from poettering/greedy-alloc-round-up
add new GREEDY_ALLOC_ROUND_UP() to make strv_extend() a bit less slow
2019-12-10 20:17:33 +09:00
Lennart Poettering 6047637645 strv: when growing strv arrays piecemeal actually allocate memory in exponential steps
Let's improve memory allocation for call such as strv_extend() that just
one item to an strv: these are often called in a loop, where they used
to be very ineffecient, since we'd allocate byte-exact space. With this
change let's improve on that, by allocating exponentially by rounding up
to the next exponent of 2. This way we get GREEDY_REALLOC()-like
behaviour without passing around state.

In fact this should be good enough so that we could replace existing
loops around GREEDY_REALLOC() for strv build-up with plain strv_extend()
and get similar behaviour.
2019-12-09 18:36:03 +01:00
Lennart Poettering 47ac31f792 test-util: add more tests for ALIGN_POWER2 2019-12-09 18:36:00 +01:00
Lennart Poettering e49e4c33dc macro: introduce new GREEDY_ALLOC_ROUND_UP() helper 2019-12-09 18:35:10 +01:00
Lennart Poettering 85c267afa7 macro: avoid subtraction overflow in ALIGN_POWER2() 2019-12-09 18:34:05 +01:00
Yu Watanabe 886e07a9cf test-network: add tests for new TBF settings 2019-12-10 01:28:38 +09:00
Yu Watanabe dcfc23ae77 network: tc: add more options for TBF 2019-12-10 01:28:38 +09:00
13 changed files with 262 additions and 17 deletions

View File

@ -2380,6 +2380,15 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>TokenBufferFilterLimitSize=</varname></term>
<listitem>
<para>Takes the number of bytes that can be queued waiting for tokens to become available.
When the size is suffixed with K, M, or G, it is parsed as Kilobytes, Megabytes, or Gigabytes,
respectively, to the base of 1000. Defaults to unset.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>TokenBufferFilterBurst=</varname></term>
<listitem>
@ -2399,6 +2408,33 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>TokenBufferFilterMPUBytes=</varname></term>
<listitem>
<para>The Minimum Packet Unit (MPU) determines the minimal token usage (specified in bytes)
for a packet. When suffixed with K, M, or G, the specified size is parsed as Kilobytes,
Megabytes, or Gigabytes, respectively, to the base of 1000. Defaults to zero.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>TokenBufferFilterPeakRate=</varname></term>
<listitem>
<para>Takes the maximum depletion rate of the bucket. When suffixed with K, M, or G, the
specified size is parsed as Kilobytes, Megabytes, or Gigabytes, respectively, to the base of
1000. Defaults to unset.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>TokenBufferFilterMTUBytes=</varname></term>
<listitem>
<para>Specifies the size of the peakrate bucket. When suffixed with K, M, or G, the specified
size is parsed as Kilobytes, Megabytes, or Gigabytes, respectively, to the base of 1000.
Defaults to unset.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>StochasticFairnessQueueingPerturbPeriodSec=</varname></term>
<listitem>

View File

@ -163,6 +163,11 @@ static inline size_t ALIGN_TO(size_t l, size_t ali) {
/* align to next higher power-of-2 (except for: 0 => 0, overflow => 0) */
static inline unsigned long ALIGN_POWER2(unsigned long u) {
/* Avoid subtraction overflow */
if (u == 0)
return 0;
/* clz(0) is undefined */
if (u == 1)
return 1;
@ -174,6 +179,29 @@ static inline unsigned long ALIGN_POWER2(unsigned long u) {
return 1UL << (sizeof(u) * 8 - __builtin_clzl(u - 1UL));
}
static inline size_t GREEDY_ALLOC_ROUND_UP(size_t l) {
size_t m;
/* Round up allocation sizes a bit to some reasonable, likely larger value. This is supposed to be
* used for cases which are likely called in an allocation loop of some form, i.e. that repetitively
* grow stuff, for example strv_extend() and suchlike.
*
* Note the difference to GREEDY_REALLOC() here, as this helper operates on a single size value only,
* and rounds up to next multiple of 2, needing no further counter.
*
* Note the benefits of direct ALIGN_POWER2() usage: type-safety for size_t, sane handling for very
* small (i.e. <= 2) and safe handling for very large (i.e. > SSIZE_MAX) values. */
if (l <= 2)
return 2; /* Never allocate less than 2 of something. */
m = ALIGN_POWER2(l);
if (m == 0) /* overflow? */
return l;
return m;
}
#ifndef __COVERITY__
# define VOID_0 ((void)0)
#else

View File

@ -193,7 +193,10 @@ int strv_extend_strv(char ***a, char **b, bool filter_duplicates) {
p = strv_length(*a);
q = strv_length(b);
t = reallocarray(*a, p + q + 1, sizeof(char *));
if (p >= SIZE_MAX - q)
return -ENOMEM;
t = reallocarray(*a, GREEDY_ALLOC_ROUND_UP(p + q + 1), sizeof(char *));
if (!t)
return -ENOMEM;
@ -383,19 +386,18 @@ char *strv_join_prefix(char **l, const char *separator, const char *prefix) {
int strv_push(char ***l, char *value) {
char **c;
size_t n, m;
size_t n;
if (!value)
return 0;
n = strv_length(*l);
/* Increase and check for overflow */
m = n + 2;
if (m < n)
/* Check for overflow */
if (n > SIZE_MAX-2)
return -ENOMEM;
c = reallocarray(*l, m, sizeof(char*));
c = reallocarray(*l, GREEDY_ALLOC_ROUND_UP(n + 2), sizeof(char*));
if (!c)
return -ENOMEM;
@ -408,19 +410,19 @@ int strv_push(char ***l, char *value) {
int strv_push_pair(char ***l, char *a, char *b) {
char **c;
size_t n, m;
size_t n;
if (!a && !b)
return 0;
n = strv_length(*l);
/* increase and check for overflow */
m = n + !!a + !!b + 1;
if (m < n)
/* Check for overflow */
if (n > SIZE_MAX-3)
return -ENOMEM;
c = reallocarray(*l, m, sizeof(char*));
/* increase and check for overflow */
c = reallocarray(*l, GREEDY_ALLOC_ROUND_UP(n + !!a + !!b + 1), sizeof(char*));
if (!c)
return -ENOMEM;
@ -846,8 +848,10 @@ int strv_extend_n(char ***l, const char *value, size_t n) {
/* Adds the value n times to l */
k = strv_length(*l);
if (n >= SIZE_MAX - k)
return -ENOMEM;
nl = reallocarray(*l, k + n + 1, sizeof(char *));
nl = reallocarray(*l, GREEDY_ALLOC_ROUND_UP(k + n + 1), sizeof(char *));
if (!nl)
return -ENOMEM;

View File

@ -252,6 +252,10 @@ TrafficControlQueueingDiscipline.NetworkEmulatorDuplicateRate, con
TrafficControlQueueingDiscipline.NetworkEmulatorPacketLimit, config_parse_tc_network_emulator_packet_limit, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterRate, config_parse_tc_token_buffer_filter_size, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterBurst, config_parse_tc_token_buffer_filter_size, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterLimitSize, config_parse_tc_token_buffer_filter_size, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterMTUBytes, config_parse_tc_token_buffer_filter_size, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterMPUBytes, config_parse_tc_token_buffer_filter_size, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterPeakRate, config_parse_tc_token_buffer_filter_size, 0, 0
TrafficControlQueueingDiscipline.TokenBufferFilterLatencySec, config_parse_tc_token_buffer_filter_latency, 0, 0
TrafficControlQueueingDiscipline.StochasticFairnessQueueingPerturbPeriodSec, config_parse_tc_stochastic_fairness_queueing_perturb_period, 0, 0
/* backwards compatibility: do not add new entries to this section */

View File

@ -189,6 +189,7 @@ int qdisc_configure(Link *link, QDisc *qdisc) {
int qdisc_section_verify(QDisc *qdisc, bool *has_root, bool *has_clsact) {
unsigned i;
int r;
assert(qdisc);
assert(has_root);
@ -204,6 +205,12 @@ int qdisc_section_verify(QDisc *qdisc, bool *has_root, bool *has_clsact) {
"Ignoring [TrafficControlQueueingDiscipline] section from line %u.",
qdisc->section->filename, qdisc->section->line);
if (qdisc->has_token_buffer_filter) {
r = token_buffer_filter_section_verify(&qdisc->tbf, qdisc->section);
if (r < 0)
return r;
}
if (qdisc->parent == TC_H_ROOT) {
if (*has_root)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),

View File

@ -12,6 +12,7 @@
#include "parse-util.h"
#include "qdisc.h"
#include "string-util.h"
#include "tc-util.h"
#include "util.h"
int token_buffer_filter_new(TokenBufferFilter **ret) {
@ -27,6 +28,7 @@ int token_buffer_filter_new(TokenBufferFilter **ret) {
}
int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, sd_netlink_message *req) {
uint32_t rtab[256], ptab[256];
struct tc_tbf_qopt opt = {};
int r;
@ -35,7 +37,42 @@ int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, s
assert(req);
opt.rate.rate = tbf->rate >= (1ULL << 32) ? ~0U : tbf->rate;
opt.limit = tbf->rate * (double) tbf->latency / USEC_PER_SEC + tbf->burst;
opt.peakrate.rate = tbf->peak_rate >= (1ULL << 32) ? ~0U : tbf->peak_rate;
if (tbf->limit > 0)
opt.limit = tbf->limit;
else {
double lim, lim2;
lim = tbf->rate * (double) tbf->latency / USEC_PER_SEC + tbf->burst;
if (tbf->peak_rate > 0) {
lim2 = tbf->peak_rate * (double) tbf->latency / USEC_PER_SEC + tbf->mtu;
lim = MIN(lim, lim2);
}
opt.limit = lim;
}
opt.rate.mpu = tbf->mpu;
r = tc_fill_ratespec_and_table(&opt.rate, rtab, tbf->mtu);
if (r < 0)
return log_link_error_errno(link, r, "Failed to calculate ratespec: %m");
r = tc_transmit_time(opt.rate.rate, tbf->burst, &opt.buffer);
if (r < 0)
return log_link_error_errno(link, r, "Failed to calculate buffer size: %m");
if (opt.peakrate.rate > 0) {
opt.peakrate.mpu = tbf->mpu;
r = tc_fill_ratespec_and_table(&opt.peakrate, ptab, tbf->mtu);
if (r < 0)
return log_link_error_errno(link, r, "Failed to calculate ratespec: %m");
r = tc_transmit_time(opt.peakrate.rate, tbf->mtu, &opt.mtu);
if (r < 0)
return log_link_error_errno(link, r, "Failed to calculate mtu size: %m");
}
r = sd_netlink_message_open_array(req, TCA_OPTIONS);
if (r < 0)
@ -55,6 +92,26 @@ int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, s
return log_link_error_errno(link, r, "Could not append TCA_TBF_RATE64 attribute: %m");
}
r = sd_netlink_message_append_data(req, TCA_TBF_RTAB, rtab, sizeof(rtab));
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_TBF_RTAB attribute: %m");
if (opt.peakrate.rate > 0) {
if (tbf->peak_rate >= (1ULL << 32)) {
r = sd_netlink_message_append_data(req, TCA_TBF_PRATE64, &tbf->peak_rate, sizeof(tbf->peak_rate));
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_TBF_PRATE64 attribute: %m");
}
r = sd_netlink_message_append_data(req, TCA_TBF_PBURST, &tbf->mtu, sizeof(tbf->mtu));
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_TBF_PBURST attribute: %m");
r = sd_netlink_message_append_data(req, TCA_TBF_PTAB, ptab, sizeof(ptab));
if (r < 0)
return log_link_error_errno(link, r, "Could not append TCA_TBF_PTAB attribute: %m");
}
r = sd_netlink_message_close_container(req);
if (r < 0)
return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m");
@ -93,6 +150,14 @@ int config_parse_tc_token_buffer_filter_size(
qdisc->tbf.rate = 0;
else if (streq(lvalue, "TokenBufferFilterBurst"))
qdisc->tbf.burst = 0;
else if (streq(lvalue, "TokenBufferFilterLimitSize"))
qdisc->tbf.limit = 0;
else if (streq(lvalue, "TokenBufferFilterMTUBytes"))
qdisc->tbf.mtu = 0;
else if (streq(lvalue, "TokenBufferFilterMPUBytes"))
qdisc->tbf.mpu = 0;
else if (streq(lvalue, "TokenBufferFilterPeakRate"))
qdisc->tbf.peak_rate = 0;
qdisc = NULL;
return 0;
@ -110,6 +175,14 @@ int config_parse_tc_token_buffer_filter_size(
qdisc->tbf.rate = k / 8;
else if (streq(lvalue, "TokenBufferFilterBurst"))
qdisc->tbf.burst = k;
else if (streq(lvalue, "TokenBufferFilterLimitSize"))
qdisc->tbf.limit = k;
else if (streq(lvalue, "TokenBufferFilterMPUBytes"))
qdisc->tbf.mpu = k;
else if (streq(lvalue, "TokenBufferFilterMTUBytes"))
qdisc->tbf.mtu = k;
else if (streq(lvalue, "TokenBufferFilterPeakRate"))
qdisc->tbf.peak_rate = k / 8;
qdisc->has_token_buffer_filter = true;
qdisc = NULL;
@ -165,3 +238,37 @@ int config_parse_tc_token_buffer_filter_latency(
return 0;
}
int token_buffer_filter_section_verify(const TokenBufferFilter *tbf, const NetworkConfigSection *section) {
if (tbf->limit > 0 && tbf->latency > 0)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"%s: Specifying both TokenBufferFilterLimitSize= and TokenBufferFilterLatencySec= is not allowed. "
"Ignoring [TrafficControlQueueingDiscipline] section from line %u.",
section->filename, section->line);
if (tbf->limit == 0 && tbf->latency == 0)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"%s: Either TokenBufferFilterLimitSize= or TokenBufferFilterLatencySec= is required. "
"Ignoring [TrafficControlQueueingDiscipline] section from line %u.",
section->filename, section->line);
if (tbf->rate == 0)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"%s: TokenBufferFilterRate= is mandatory. "
"Ignoring [TrafficControlQueueingDiscipline] section from line %u.",
section->filename, section->line);
if (tbf->burst == 0)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"%s: TokenBufferFilterBurst= is mandatory. "
"Ignoring [TrafficControlQueueingDiscipline] section from line %u.",
section->filename, section->line);
if (tbf->peak_rate > 0 && tbf->mtu == 0)
return log_warning_errno(SYNTHETIC_ERRNO(EINVAL),
"%s: TokenBufferFilterMTUBytes= is mandatory when TokenBufferFilterPeakRate= is specified. "
"Ignoring [TrafficControlQueueingDiscipline] section from line %u.",
section->filename, section->line);
return 0;
}

View File

@ -6,16 +6,22 @@
#include "conf-parser.h"
#include "networkd-link.h"
#include "networkd-util.h"
#include "tc-util.h"
typedef struct TokenBufferFilter {
uint64_t rate;
uint64_t peak_rate;
uint32_t burst;
uint32_t latency;
uint32_t mtu;
usec_t latency;
size_t limit;
size_t mpu;
} TokenBufferFilter;
int token_buffer_filter_new(TokenBufferFilter **ret);
int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, sd_netlink_message *req);
int token_buffer_filter_section_verify(const TokenBufferFilter *tbf, const NetworkConfigSection *section);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_token_buffer_filter_latency);
CONFIG_PARSER_PROTOTYPE(config_parse_tc_token_buffer_filter_size);

View File

@ -61,3 +61,34 @@ int parse_tc_percent(const char *s, uint32_t *percent) {
*percent = (double) r / 1000 * UINT32_MAX;
return 0;
}
int tc_transmit_time(uint64_t rate, uint32_t size, uint32_t *ret) {
return tc_time_to_tick(USEC_PER_SEC * ((double)size / (double)rate), ret);
}
int tc_fill_ratespec_and_table(struct tc_ratespec *rate, uint32_t *rtab, uint32_t mtu) {
uint32_t cell_log = 0;
int r;
if (mtu == 0)
mtu = 2047;
while ((mtu >> cell_log) > 255)
cell_log++;
for (size_t i = 0; i < 256; i++) {
uint32_t sz;
sz = (i + 1) << cell_log;
if (sz < rate->mpu)
sz = rate->mpu;
r = tc_transmit_time(rate->rate, sz, &rtab[i]);
if (r < 0)
return r;
}
rate->cell_align = -1;
rate->cell_log = cell_log;
rate->linklayer = TC_LINKLAYER_ETHERNET;
return 0;
}

View File

@ -2,7 +2,11 @@
* Copyright © 2019 VMware, Inc. */
#pragma once
#include <linux/pkt_sched.h>
#include "time-util.h"
int tc_time_to_tick(usec_t t, uint32_t *ret);
int parse_tc_percent(const char *s, uint32_t *percent);
int tc_transmit_time(uint64_t rate, uint32_t size, uint32_t *ret);
int tc_fill_ratespec_and_table(struct tc_ratespec *rate, uint32_t *rtab, uint32_t mtu);

View File

@ -26,7 +26,19 @@ static void test_align_power2(void) {
assert_se(ALIGN_POWER2(1) == 1);
assert_se(ALIGN_POWER2(2) == 2);
assert_se(ALIGN_POWER2(3) == 4);
assert_se(ALIGN_POWER2(4) == 4);
assert_se(ALIGN_POWER2(5) == 8);
assert_se(ALIGN_POWER2(6) == 8);
assert_se(ALIGN_POWER2(7) == 8);
assert_se(ALIGN_POWER2(9) == 16);
assert_se(ALIGN_POWER2(10) == 16);
assert_se(ALIGN_POWER2(11) == 16);
assert_se(ALIGN_POWER2(12) == 16);
assert_se(ALIGN_POWER2(13) == 16);
assert_se(ALIGN_POWER2(14) == 16);
assert_se(ALIGN_POWER2(15) == 16);
assert_se(ALIGN_POWER2(16) == 16);
assert_se(ALIGN_POWER2(17) == 32);
assert_se(ALIGN_POWER2(ULONG_MAX) == 0);
assert_se(ALIGN_POWER2(ULONG_MAX - 1) == 0);

View File

@ -272,5 +272,9 @@ NetworkEmulatorDuplicateRate=
NetworkEmulatorPacketLimit=
TokenBufferFilterRate=
TokenBufferFilterBurst=
TokenBufferFilterLimitSize=
TokenBufferFilterMTUBytes=
TokenBufferFilterMPUBytes=
TokenBufferFilterPeakRate=
TokenBufferFilterLatencySec=
StochasticFairnessQueueingPerturbPeriodSec=

View File

@ -7,9 +7,11 @@ Address=10.1.2.4/16
[TrafficControlQueueingDiscipline]
Parent=root
TokenBufferFilterRate=0.5M
TokenBufferFilterRate=1G
TokenBufferFilterBurst=5K
TokenBufferFilterLatencySec=70msec
TokenBufferFilterPeakRate=100G
TokenBufferFilterMTUBytes=1M
[TrafficControlQueueingDiscipline]
Parent=clsact

View File

@ -2097,7 +2097,7 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
output = check_output('tc qdisc show dev test1')
print(output)
self.assertRegex(output, 'qdisc tbf')
self.assertRegex(output, 'rate 500Kbit burst 5000b lat 70.0ms')
self.assertRegex(output, 'rate 1Gbit burst 5000b peakrate 100Gbit minburst 987500b lat 70.0ms')
self.assertRegex(output, 'qdisc sfq')
self.assertRegex(output, 'perturb 5sec')