Hello,
On Tue, 3 Dec 2024, Florian Westphal wrote:
> Reading is very slow because ->start() performs a linear re-scan of the
> entire hash table until it finds the successor to the last dumped
> element. The current implementation uses 'pos' as the 'number of
> elements to skip, then does linear iteration until it has skipped
> 'pos' entries.
>
> Store the last bucket and the number of elements to skip in that
> bucket instead, so we can resume from bucket b directly.
>
> before this patch, its possible to read ~35k entries in one second, but
> each read() gets slower as the number of entries to skip grows:
>
> time timeout 60 cat /proc/net/ip_vs_conn > /tmp/all; wc -l /tmp/all
> real 1m0.007s
> user 0m0.003s
> sys 0m59.956s
> 140386 /tmp/all
>
> Only ~100k more got read in remaining the remaining 59s, and did not get
> nowhere near the 1m entries that are stored at the time.
>
> after this patch, dump completes very quickly:
> time cat /proc/net/ip_vs_conn > /tmp/all; wc -l /tmp/all
> real 0m2.286s
> user 0m0.004s
> sys 0m2.281s
> 1000001 /tmp/all
>
> Signed-off-by: Florian Westphal <fw@xxxxxxxxx>
Nice improvement, thanks!
Acked-by: Julian Anastasov <ja@xxxxxx>
> ---
> net/netfilter/ipvs/ip_vs_conn.c | 50 ++++++++++++++++++---------------
> 1 file changed, 28 insertions(+), 22 deletions(-)
>
> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index 7aba4760bbff..73f3dac159bb 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -1046,28 +1046,35 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int
> dest_af,
> #ifdef CONFIG_PROC_FS
> struct ip_vs_iter_state {
> struct seq_net_private p;
> - struct hlist_head *l;
> + unsigned int bucket;
> + unsigned int skip_elems;
> };
>
> -static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
> +static void *ip_vs_conn_array(struct ip_vs_iter_state *iter)
> {
> int idx;
> struct ip_vs_conn *cp;
> - struct ip_vs_iter_state *iter = seq->private;
>
> - for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
> + for (idx = iter->bucket; idx < ip_vs_conn_tab_size; idx++) {
> + unsigned int skip = 0;
> +
> hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
> /* __ip_vs_conn_get() is not needed by
> * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
> */
> - if (pos-- == 0) {
> - iter->l = &ip_vs_conn_tab[idx];
> + if (skip >= iter->skip_elems) {
> + iter->bucket = idx;
> return cp;
> }
> +
> + ++skip;
> }
> +
> + iter->skip_elems = 0;
> cond_resched_rcu();
> }
>
> + iter->bucket = idx;
> return NULL;
> }
>
> @@ -1076,9 +1083,14 @@ static void *ip_vs_conn_seq_start(struct seq_file
> *seq, loff_t *pos)
> {
> struct ip_vs_iter_state *iter = seq->private;
>
> - iter->l = NULL;
> rcu_read_lock();
> - return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
> + if (*pos == 0) {
> + iter->skip_elems = 0;
> + iter->bucket = 0;
> + return SEQ_START_TOKEN;
> + }
> +
> + return ip_vs_conn_array(iter);
> }
>
> static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> @@ -1086,28 +1098,22 @@ static void *ip_vs_conn_seq_next(struct seq_file
> *seq, void *v, loff_t *pos)
> struct ip_vs_conn *cp = v;
> struct ip_vs_iter_state *iter = seq->private;
> struct hlist_node *e;
> - struct hlist_head *l = iter->l;
> - int idx;
>
> ++*pos;
> if (v == SEQ_START_TOKEN)
> - return ip_vs_conn_array(seq, 0);
> + return ip_vs_conn_array(iter);
>
> /* more on same hash chain? */
> e = rcu_dereference(hlist_next_rcu(&cp->c_list));
> - if (e)
> + if (e) {
> + iter->skip_elems++;
> return hlist_entry(e, struct ip_vs_conn, c_list);
> -
> - idx = l - ip_vs_conn_tab;
> - while (++idx < ip_vs_conn_tab_size) {
> - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
> - iter->l = &ip_vs_conn_tab[idx];
> - return cp;
> - }
> - cond_resched_rcu();
> }
> - iter->l = NULL;
> - return NULL;
> +
> + iter->skip_elems = 0;
> + iter->bucket++;
> +
> + return ip_vs_conn_array(iter);
> }
>
> static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
> --
> 2.45.2
Regards
--
Julian Anastasov <ja@xxxxxx>
|