Download raw body.
fq_codel: uncap count
Dave Taht reported a bug of fq_codel count being capped at 400 [0]. Our
implementation uses a lookup table of the first ~400 inverse square
roots, which is used to reach target, instead of a free running
variable.
I would not go as far of calling this a bug, instead I think this is a
design choice (?). However you look at it, it is seemingly a deviation
from RFC 8290, which describes the use of a free running variable, which
is calculated. Per Dave's suggestion I had a look at the implementation
of linux and FreeBSD.
Diff below replaces the lookup table by a calculation based on Newton's
method for finding the inverse root of a square root. This is the same
approach as used by FreeBSD.
I have been running with this diff on amd64 without any noticeable
impact on performance. I'm hoping that someone is willing to review the
bits below, and helps testing.
While here zap a couple of white spaces.
[0] https://marc.info/?l=openbsd-bugs&m=170741059623394&w=2
diff --git sys/net/fq_codel.c sys/net/fq_codel.c
index 297280bf995..bc7601108a1 100644
--- sys/net/fq_codel.c
+++ sys/net/fq_codel.c
@@ -17,16 +17,15 @@
*/
/*
- * Codel - The Controlled-Delay Active Queue Management algorithm
- * IETF draft-ietf-aqm-codel-07
+ * Controlled Delay Active Queue Management, RFC 8289
*
* Based on the algorithm by Kathleen Nichols and Van Jacobson with
* improvements from Dave Taht and Eric Dumazet.
*/
/*
- * The FlowQueue-CoDel Packet Scheduler and Active Queue Management
- * IETF draft-ietf-aqm-fq-codel-06
+ * The Flow Queue CoDel Packet Scheduler and Active Queue Management
+ * Algorithm, RFC 8290
*
* Based on the implementation by Rasool Al-Saadi, Centre for Advanced
* Internet Architectures, Swinburne University of Technology, Melbourne,
@@ -64,24 +63,23 @@ struct codel {
int64_t start; /* The moment queue was above target */
int64_t next; /* Next interval */
int64_t delay; /* Delay incurred by the last packet */
+
+ uint16_t inv_sqrt; /* Inverse square root computation */
};
struct codel_params {
int64_t target;
int64_t interval;
int quantum;
-
- uint32_t *intervals;
};
void codel_initparams(struct codel_params *, unsigned int,
unsigned int, int);
-void codel_freeparams(struct codel_params *);
void codel_enqueue(struct codel *, int64_t, struct mbuf *);
struct mbuf *codel_dequeue(struct codel *, struct codel_params *, int64_t,
struct mbuf_list *, uint64_t *, uint64_t *);
struct mbuf *codel_commit(struct codel *, struct mbuf *);
-void codel_purge(struct codel *, struct mbuf_list *ml);
+void codel_purge(struct codel *, struct mbuf_list *);
struct flow {
struct codel cd;
@@ -113,8 +111,8 @@ struct fqcodel {
#define FQCF_FIXED_QUANTUM 0x1
/* stats */
- struct fqcodel_pktcntr xmit_cnt;
- struct fqcodel_pktcntr drop_cnt;
+ struct fqcodel_pktcntr xmit_cnt;
+ struct fqcodel_pktcntr drop_cnt;
};
unsigned int fqcodel_idx(unsigned int, const struct mbuf *);
@@ -145,15 +143,15 @@ static const struct ifq_ops fqcodel_ops = {
fqcodel_free
};
-const struct ifq_ops * const ifq_fqcodel_ops = &fqcodel_ops;
+const struct ifq_ops *const ifq_fqcodel_ops = &fqcodel_ops;
void *fqcodel_pf_alloc(struct ifnet *);
int fqcodel_pf_addqueue(void *, struct pf_queuespec *);
void fqcodel_pf_free(void *);
int fqcodel_pf_qstats(struct pf_queuespec *, void *, int *);
unsigned int fqcodel_pf_qlength(void *);
-struct mbuf * fqcodel_pf_enqueue(void *, struct mbuf *);
-struct mbuf * fqcodel_pf_deq_begin(void *, void **, struct mbuf_list *);
+struct mbuf *fqcodel_pf_enqueue(void *, struct mbuf *);
+struct mbuf *fqcodel_pf_deq_begin(void *, void **, struct mbuf_list *);
void fqcodel_pf_deq_commit(void *, struct mbuf *, void *);
void fqcodel_pf_purge(void *, struct mbuf_list *);
@@ -173,7 +171,7 @@ static const struct pfq_ops fqcodel_pf_ops = {
fqcodel_pf_purge
};
-const struct pfq_ops * const pfq_fqcodel_ops = &fqcodel_pf_ops;
+const struct pfq_ops *const pfq_fqcodel_ops = &fqcodel_pf_ops;
/* Default aggregate queue depth */
static const unsigned int fqcodel_qlimit = 1024;
@@ -182,116 +180,51 @@ static const unsigned int fqcodel_qlimit = 1024;
* CoDel implementation
*/
-/* Delay target, 5ms */
-static const int64_t codel_target = 5000000;
-
-/* Grace period after last drop, 16 100ms intervals */
-static const int64_t codel_grace = 1600000000;
-
-/* First 399 "100 / sqrt(x)" intervals, ns precision */
-static const uint32_t codel_intervals[] = {
- 100000000, 70710678, 57735027, 50000000, 44721360, 40824829, 37796447,
- 35355339, 33333333, 31622777, 30151134, 28867513, 27735010, 26726124,
- 25819889, 25000000, 24253563, 23570226, 22941573, 22360680, 21821789,
- 21320072, 20851441, 20412415, 20000000, 19611614, 19245009, 18898224,
- 18569534, 18257419, 17960530, 17677670, 17407766, 17149859, 16903085,
- 16666667, 16439899, 16222142, 16012815, 15811388, 15617376, 15430335,
- 15249857, 15075567, 14907120, 14744196, 14586499, 14433757, 14285714,
- 14142136, 14002801, 13867505, 13736056, 13608276, 13483997, 13363062,
- 13245324, 13130643, 13018891, 12909944, 12803688, 12700013, 12598816,
- 12500000, 12403473, 12309149, 12216944, 12126781, 12038585, 11952286,
- 11867817, 11785113, 11704115, 11624764, 11547005, 11470787, 11396058,
- 11322770, 11250879, 11180340, 11111111, 11043153, 10976426, 10910895,
- 10846523, 10783277, 10721125, 10660036, 10599979, 10540926, 10482848,
- 10425721, 10369517, 10314212, 10259784, 10206207, 10153462, 10101525,
- 10050378, 10000000, 9950372, 9901475, 9853293, 9805807, 9759001,
- 9712859, 9667365, 9622504, 9578263, 9534626, 9491580, 9449112,
- 9407209, 9365858, 9325048, 9284767, 9245003, 9205746, 9166985,
- 9128709, 9090909, 9053575, 9016696, 8980265, 8944272, 8908708,
- 8873565, 8838835, 8804509, 8770580, 8737041, 8703883, 8671100,
- 8638684, 8606630, 8574929, 8543577, 8512565, 8481889, 8451543,
- 8421519, 8391814, 8362420, 8333333, 8304548, 8276059, 8247861,
- 8219949, 8192319, 8164966, 8137885, 8111071, 8084521, 8058230,
- 8032193, 8006408, 7980869, 7955573, 7930516, 7905694, 7881104,
- 7856742, 7832604, 7808688, 7784989, 7761505, 7738232, 7715167,
- 7692308, 7669650, 7647191, 7624929, 7602859, 7580980, 7559289,
- 7537784, 7516460, 7495317, 7474351, 7453560, 7432941, 7412493,
- 7392213, 7372098, 7352146, 7332356, 7312724, 7293250, 7273930,
- 7254763, 7235746, 7216878, 7198158, 7179582, 7161149, 7142857,
- 7124705, 7106691, 7088812, 7071068, 7053456, 7035975, 7018624,
- 7001400, 6984303, 6967330, 6950480, 6933752, 6917145, 6900656,
- 6884284, 6868028, 6851887, 6835859, 6819943, 6804138, 6788442,
- 6772855, 6757374, 6741999, 6726728, 6711561, 6696495, 6681531,
- 6666667, 6651901, 6637233, 6622662, 6608186, 6593805, 6579517,
- 6565322, 6551218, 6537205, 6523281, 6509446, 6495698, 6482037,
- 6468462, 6454972, 6441566, 6428243, 6415003, 6401844, 6388766,
- 6375767, 6362848, 6350006, 6337243, 6324555, 6311944, 6299408,
- 6286946, 6274558, 6262243, 6250000, 6237829, 6225728, 6213698,
- 6201737, 6189845, 6178021, 6166264, 6154575, 6142951, 6131393,
- 6119901, 6108472, 6097108, 6085806, 6074567, 6063391, 6052275,
- 6041221, 6030227, 6019293, 6008418, 5997601, 5986843, 5976143,
- 5965500, 5954913, 5944383, 5933908, 5923489, 5913124, 5902813,
- 5892557, 5882353, 5872202, 5862104, 5852057, 5842062, 5832118,
- 5822225, 5812382, 5802589, 5792844, 5783149, 5773503, 5763904,
- 5754353, 5744850, 5735393, 5725983, 5716620, 5707301, 5698029,
- 5688801, 5679618, 5670480, 5661385, 5652334, 5643326, 5634362,
- 5625440, 5616560, 5607722, 5598925, 5590170, 5581456, 5572782,
- 5564149, 5555556, 5547002, 5538488, 5530013, 5521576, 5513178,
- 5504819, 5496497, 5488213, 5479966, 5471757, 5463584, 5455447,
- 5447347, 5439283, 5431254, 5423261, 5415304, 5407381, 5399492,
- 5391639, 5383819, 5376033, 5368281, 5360563, 5352877, 5345225,
- 5337605, 5330018, 5322463, 5314940, 5307449, 5299989, 5292561,
- 5285164, 5277798, 5270463, 5263158, 5255883, 5248639, 5241424,
- 5234239, 5227084, 5219958, 5212860, 5205792, 5198752, 5191741,
- 5184758, 5177804, 5170877, 5163978, 5157106, 5150262, 5143445,
- 5136655, 5129892, 5123155, 5116445, 5109761, 5103104, 5096472,
- 5089866, 5083286, 5076731, 5070201, 5063697, 5057217, 5050763,
- 5044333, 5037927, 5031546, 5025189, 5018856, 5012547, 5006262
-};
+/*
+ * Run a Newton method step:
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+static inline void
+codel_step(struct codel *cd)
+{
+ uint32_t invsqrt, invsqrt2;
+ uint64_t val;
+
+/* sizeof_in_bits(inv_sqrt) */
+#define INV_SQRT_BITS (8 * sizeof(uint16_t))
+/* needed shift to get a Q0.32 number from inv_sqrt */
+#define INV_SQRT_SHIFT (32 - INV_SQRT_BITS)
+
+ invsqrt = ((uint32_t)cd->inv_sqrt) << INV_SQRT_SHIFT;
+ invsqrt2 = ((uint64_t)invsqrt * invsqrt) >> 32;
+ val = (3LL << 32) - ((uint64_t)cd->drops * invsqrt2);
+ val >>= 2; /* avoid overflow in following multiply */
+ val = (val * invsqrt) >> (32 - 2 + 1);
+
+ cd->inv_sqrt = val >> INV_SQRT_SHIFT;
+}
void
codel_initparams(struct codel_params *cp, unsigned int target,
unsigned int interval, int quantum)
{
- uint64_t mult;
- unsigned int i;
-
- /*
- * Update observation intervals table according to the configured
- * initial interval value.
- */
- if (interval > codel_intervals[0]) {
- /* Select either specified target or 5% of an interval */
- cp->target = MAX(target, interval / 5);
+ /* Default interval, 100ms (in ns), RFC 8289 section 4.2 */
+ if (interval > 0)
cp->interval = interval;
+ else
+ cp->interval = 100000000;
- /* The coefficient is scaled up by a 1000 */
- mult = ((uint64_t)cp->interval * 1000) / codel_intervals[0];
-
- /* Prepare table of intervals */
- cp->intervals = mallocarray(nitems(codel_intervals),
- sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO);
- for (i = 0; i < nitems(codel_intervals); i++)
- cp->intervals[i] = ((uint64_t)codel_intervals[i] *
- mult) / 1000;
- } else {
- cp->target = MAX(target, codel_target);
- cp->interval = codel_intervals[0];
- cp->intervals = (uint32_t *)codel_intervals;
- }
+ /* Default delay target, 5ms (in ns), RFC 8289 section 4.3 */
+ if (target > 0)
+ cp->target = target;
+ else
+ cp->target = 5000000;
cp->quantum = quantum;
}
-void
-codel_freeparams(struct codel_params *cp)
-{
- if (cp->intervals != codel_intervals)
- free(cp->intervals, M_DEVBUF, nitems(codel_intervals) *
- sizeof(codel_intervals[0]));
- cp->intervals = NULL;
-}
-
static inline void
codel_gettime(int64_t *now)
{
@@ -329,16 +262,14 @@ codel_enqueue(struct codel *cd, int64_t now, struct mbuf *m)
}
/*
- * Select the next interval according to the number of drops
+ * Set the next interval according to the number of drops
* in the current one relative to the provided timestamp.
*/
static inline void
control_law(struct codel *cd, struct codel_params *cp, int64_t rts)
{
- unsigned int idx;
-
- idx = min(cd->drops, nitems(codel_intervals) - 1);
- cd->next = rts + cp->intervals[idx];
+ cd->next = rts + (uint32_t)(((uint64_t)cp->interval *
+ (cd->inv_sqrt << INV_SQRT_SHIFT)) >> 32);
}
/*
@@ -442,21 +373,22 @@ codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now,
cd->dropping = 1;
/*
- * If we're still within the grace period and not
- * meeting our minimal delay target we treat this
- * as a continuation of the previous observation
+ * If we're still within the grace period (16 *
+ * interval, RFC 8289 section 5.5) and not meeting
+ * our minimal delay target we treat this as a
+ * continuation of the previous observation
* interval and shrink it further. Otherwise we
* start from the initial one.
*/
delta = cd->drops - cd->ldrops;
- if (delta > 1) {
- if (now < cd->next ||
- now - cd->next < codel_grace)
- cd->drops = delta;
- else
- cd->drops = 1;
- } else
+ if (delta > 1 && (now < cd->next ||
+ now - cd->next < 16 * cp->interval)) {
+ cd->drops = delta;
+ codel_step(cd);
+ } else {
cd->drops = 1;
+ cd->inv_sqrt = ~0U >> INV_SQRT_SHIFT;
+ }
control_law(cd, cp, now);
cd->ldrops = cd->drops;
@@ -466,6 +398,7 @@ codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now,
m = codel_commit(cd, m);
ml_enqueue(free_ml, m);
cd->drops++;
+ codel_step(cd);
*dpkts += 1;
*dbytes += m->m_pkthdr.len;
@@ -653,7 +586,7 @@ fqcodel_deq_begin(struct fqcodel *fqc, void **cookiep,
codel_gettime(&now);
for (flow = first_flow(fqc, &fq); flow != NULL;
- flow = next_flow(fqc, flow, &fq)) {
+ flow = next_flow(fqc, flow, &fq)) {
m = codel_dequeue(&flow->cd, &fqc->cparams, now, &ml,
&fqc->drop_cnt.packets, &fqc->drop_cnt.bytes);
@@ -790,7 +723,6 @@ fqcodel_pf_free(void *arg)
{
struct fqcodel *fqc = arg;
- codel_freeparams(&fqc->cparams);
free(fqc->flows, M_DEVBUF, fqc->nflows * sizeof(struct flow));
free(fqc, M_DEVBUF, sizeof(struct fqcodel));
}
fq_codel: uncap count