lots of useless recvfrom() calls
Lennert Buytenhek
buytenh at wantstofly.org
Sun Jun 3 00:52:06 MSK 2018
On Tue, May 08, 2018 at 12:12:54PM +0300, Lennert Buytenhek wrote:
> > > I am using the patch below to avoid one useless recvfrom() call (that
> > > returns EAGAIN) for every call to dns_ioevent(). Under low load, where
> > > every call to dns_ioevent() processes one DNS reply, this saves half of
> > > the total number of recvfrom() calls.
> >
> > Is it really a problem under low load? If the load is low, it doesn't
> > really matter how many recvfrom() system calls are made, I think.
>
> 'low load' here is 'a low expected number of reply packets per POLLIN
> cycle', but even for a large number of queries per second and multiple
> parallel queries, I would not expect there to be more than one reply
> packet per POLLIN cycle very often.
>
> Is it a 'problem' per se, well, probably not, since it's not a
> correctness issue. But it looks ugly in strace, and the difference in
> (system) CPU time used is visible in a microbenchmark (kPTI woohoo),
> and it's basically just stealing some CPU cycles from more useful tasks.
>
> (Unfortunately, my only benchmark is synthetic, but I've attached it
> below. It needs ivykis, which should be in your distro, and iv_udns
> which I CCd you on. Run it as:
>
> $ ./bench <parallel_queries> <seconds_to_run_for>
>
> )
I've been testing with the ugly patch below, which adds dns_ioevent_lt(),
which is an alternative version of dns_ioevent() that only ever polls
the receive fd once. (The naming is horrible, I agree. Maybe something
like dns_ioevent_once() would be better?)
My benchmark is between two Skylake machines, both connected to a
gigabit LAN, one running bind serving an authoritative zone, and the
other running the benchmark program I sent earlier, with 15 second runs
of DNS queries at 1 concurrent outstanding query, and using dns_ioevent()
("stock") and dns_ioevent_lt() ("earlyout") on alternating runs, for a
whole bunch of runs, where I measure the amount of user time used in
every 15 second run. (The local network is slightly noisy, and
sometimes it drops queries, so I discard data for runs where we had to
retransmit.)
The benchmark is still running, but so far I have this:
$ ministat -c 99.5 -w 73 -q stock.user earlyout.user
x stock.user
+ earlyout.user
N Min Max Median Avg Stddev
x 747 0.17 0.94 0.57 0.55676037 0.10852262
+ 743 0.19 0.86 0.54 0.5276716 0.10745821
Difference at 99.5% confidence
-0.0290888 +/- 0.0172899
-5.22465% +/- 3.10545%
(Student's t, pooled s = 0.107993)
$
This shows a small but statistically significant difference in user time
between the recv-until-EAGAIN and recv-only-once approaches, where the
latter uses on average 5.2% less user CPU time (+/- 3.1%). This is on
kernel 4.16.12, with kPTI enabled.
> > Also, if the number of system calls really is a concern, maybe using
> > recvmmsg() instead of recvfrom(), on linux, will be a good idea.
>
> Sounds good! But it's probably worth microbenchmarking to see how
> much slower recvmmsg() is compared to recvfrom() for the case where
> there is one reply, I'll do this if you don't beat me to it.
I'll do this next.
diff --git a/udns.h b/udns.h
index 371e697..e9e283e 100644
--- a/udns.h
+++ b/udns.h
@@ -460,6 +460,8 @@ dns_setstatus(struct dns_ctx *ctx, int status);
/* handle I/O event on UDP socket */
UDNS_API void
dns_ioevent(struct dns_ctx *ctx, time_t now);
+UDNS_API void
+dns_ioevent_lt(struct dns_ctx *ctx, time_t now);
/* process any timeouts, return time in secounds to the
* next timeout (or -1 if none) but not greather than maxwait */
diff --git a/udns_resolver.c b/udns_resolver.c
index b8f899a..a626fa6 100644
--- a/udns_resolver.c
+++ b/udns_resolver.c
@@ -947,18 +947,7 @@ dns_submit_p(struct dns_ctx *ctx,
dns_submit_dn(ctx, ctx->dnsc_pbuf, qcls, qtyp, flags, parse, cbck, data);
}
-/* process readable fd condition.
- * To be usable in edge-triggered environment, the routine
- * should consume all input so it should loop over.
- * Note it isn't really necessary to loop here, because
- * an application may perform the loop just fine by it's own,
- * but in this case we should return some sensitive result,
- * to indicate when to stop calling and error conditions.
- * Note also we may encounter all sorts of recvfrom()
- * errors which aren't fatal, and at the same time we may
- * loop forever if an error IS fatal.
- */
-void dns_ioevent(struct dns_ctx *ctx, time_t now) {
+static int dns_ioevent_internal(struct dns_ctx *ctx, time_t now) {
int r;
unsigned servi;
struct dns_query *q;
@@ -968,16 +957,8 @@ void dns_ioevent(struct dns_ctx *ctx, time_t now) {
union sockaddr_ns sns;
socklen_t slen;
- SETCTX(ctx);
- if (!CTXOPEN(ctx))
- return;
- dns_assert_ctx(ctx);
pbuf = ctx->dnsc_pbuf;
- if (!now) now = time(NULL);
-
-again: /* receive the reply */
-
slen = sizeof(sns);
r = recvfrom(ctx->dnsc_udpsock, (void*)pbuf, ctx->dnsc_udpbuf,
MSG_DONTWAIT, &sns.sa, &slen);
@@ -996,11 +977,8 @@ again: /* receive the reply */
#else
if (errno == EAGAIN)
#endif
- {
- dns_request_utm(ctx, now);
- return;
- }
- goto again;
+ return 0;
+ return 1;
}
pend = pbuf + r;
@@ -1009,7 +987,7 @@ again: /* receive the reply */
/* check reply header */
if (pcur > pend || dns_numqd(pbuf) > 1 || dns_opcode(pbuf) != 0) {
DNS_DBG(ctx, -1/*bad reply*/, &sns.sa, slen, pbuf, r);
- goto again;
+ return 1;
}
/* find the matching query, by qID */
@@ -1017,7 +995,7 @@ again: /* receive the reply */
if (!q) {
/* no more requests: old reply? */
DNS_DBG(ctx, -5/*no matching query*/, &sns.sa, slen, pbuf, r);
- goto again;
+ return 1;
}
if (pbuf[DNS_H_QID1] == q->dnsq_id[0] &&
pbuf[DNS_H_QID2] == q->dnsq_id[1])
@@ -1031,13 +1009,13 @@ again: /* receive the reply */
if (dns_getdn(pbuf, &pcur, pend, dn, sizeof(dn)) < 0 ||
pcur + 4 > pend) {
DNS_DBG(ctx, -1/*bad reply*/, &sns.sa, slen, pbuf, r);
- goto again;
+ return 1;
}
if (!dns_dnequal(dn, q->dnsq_dn) ||
memcmp(pcur, q->dnsq_typcls, 4) != 0) {
/* not this query */
DNS_DBG(ctx, -5/*no matching query*/, &sns.sa, slen, pbuf, r);
- goto again;
+ return 1;
}
/* here, query match, and pcur points past qDN in query section in pbuf */
}
@@ -1045,7 +1023,7 @@ again: /* receive the reply */
else if (dns_rcode(pbuf) != DNS_R_FORMERR) {
/* treat it as bad reply if !FORMERR */
DNS_DBG(ctx, -1/*bad reply*/, &sns.sa, slen, pbuf, r);
- goto again;
+ return 1;
}
else {
/* else it's FORMERR, handled below */
@@ -1072,7 +1050,7 @@ again: /* receive the reply */
* Note we can receive reply from first try if we're already at next */
if (!(q->dnsq_servwait & (1 << servi))) { /* if ever asked this NS */
DNS_DBG(ctx, -2/*wrong server*/, &sns.sa, slen, pbuf, r);
- goto again;
+ return 1;
}
/* we got (some) reply for our query */
@@ -1125,7 +1103,7 @@ again: /* receive the reply */
memcpy(result, pbuf, r);
dns_end_query(ctx, q, r, result);
}
- goto again;
+ return 1;
case DNS_R_NXDOMAIN: /* Non-existing domain. */
if (dns_next_srch(ctx, q))
@@ -1136,7 +1114,7 @@ again: /* receive the reply */
* if we've seen it before, or NXDOMAIN if not. */
dns_end_query(ctx, q,
q->dnsq_flags & DNS_SEEN_NODATA ? DNS_E_NODATA : DNS_E_NXDOMAIN, 0);
- goto again;
+ return 1;
case DNS_R_FORMERR:
case DNS_R_NOTIMPL:
@@ -1155,7 +1133,7 @@ again: /* receive the reply */
*/
q->dnsq_servnEDNS0 |= 1 << servi;
dns_send_this(ctx, q, servi, now);
- goto again;
+ return 1;
}
/* else we handle it the same as SERVFAIL etc */
@@ -1195,8 +1173,44 @@ again: /* receive the reply */
else {
/* else don't do anything - not all servers replied yet */
}
- goto again;
+ return 1;
+}
+
+/* process readable fd condition.
+ * To be usable in edge-triggered environment, the routine
+ * should consume all input so it should loop over.
+ * Note it isn't really necessary to loop here, because
+ * an application may perform the loop just fine by it's own,
+ * but in this case we should return some sensitive result,
+ * to indicate when to stop calling and error conditions.
+ * Note also we may encounter all sorts of recvfrom()
+ * errors which aren't fatal, and at the same time we may
+ * loop forever if an error IS fatal.
+ */
+void dns_ioevent(struct dns_ctx *ctx, time_t now) {
+ SETCTX(ctx);
+ if (!CTXOPEN(ctx))
+ return;
+ dns_assert_ctx(ctx);
+
+ if (!now) now = time(NULL);
+
+ while (dns_ioevent_internal(ctx, now));
+
+ dns_request_utm(ctx, now);
+}
+
+void dns_ioevent_lt(struct dns_ctx *ctx, time_t now) {
+ SETCTX(ctx);
+ if (!CTXOPEN(ctx))
+ return;
+ dns_assert_ctx(ctx);
+
+ if (!now) now = time(NULL);
+
+ dns_ioevent_internal(ctx, now);
+ dns_request_utm(ctx, now);
}
/* handle all timeouts */
More information about the udns
mailing list