LVS
lvs-devel
Google
 
Web LinuxVirtualServer.org

[PATCHv2 net 1/3] ipvs: fixes for the new ip_vs_status info

To: Simon Horman <horms@xxxxxxxxxxxx>
Subject: [PATCHv2 net 1/3] ipvs: fixes for the new ip_vs_status info
Cc: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>, Florian Westphal <fw@xxxxxxxxx>, lvs-devel@xxxxxxxxxxxxxxx, netfilter-devel@xxxxxxxxxxxxxxx
From: Julian Anastasov <ja@xxxxxx>
Date: Mon, 20 Apr 2026 19:55:37 +0300
Sashiko reports some problems for the recently added
/proc/net/ip_vs_status:

* ip_vs_status_show() as a table reader may run long after the
conn_tab and svc_table table are released. While ip_vs_conn_flush()
properly changes the conn_tab_changes counter when conn_tab is removed,
ip_vs_del_service() and ip_vs_flush() were missing such change for
the svc_table_changes counter. As result, readers like
ip_vs_dst_event() and ip_vs_status_show() may continue to use
a freed table after a cond_resched_rcu() call.

* While counting the buckets in ip_vs_status_show() make sure we
traverse only the needed number of entries in the chain. This also
prevents possible overflow of the 'count' variable.

* Add check for 'loops' to prevent infinite loops while restarting
the traversal on table change.

* While IP_VS_CONN_TAB_MAX_BITS is 20 on 32-bit platforms and
there is no risk to overflow when multiplying the number of
conn_tab buckets to 100, prefer the div_u64() helper to make
the following dividing safer.

* Use 0440 permissions for ip_vs_status to restrict the
info only to root due to the exported information for hash
distribution.

Link: https://sashiko.dev/#/patchset/20260410112352.23599-1-fw%40strlen.de
Signed-off-by: Julian Anastasov <ja@xxxxxx>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 51 ++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6632daa87ded..27e50afe9a54 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2032,6 +2032,9 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
                cancel_delayed_work_sync(&ipvs->svc_resize_work);
                if (t) {
                        rcu_assign_pointer(ipvs->svc_table, NULL);
+                       /* Inform readers that table is removed */
+                       smp_mb__before_atomic();
+                       atomic_inc(&ipvs->svc_table_changes);
                        while (1) {
                                p = rcu_dereference_protected(t->new_tbl, 1);
                                call_rcu(&t->rcu_head, ip_vs_rht_rcu_free);
@@ -2078,6 +2081,9 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool 
cleanup)
        t = rcu_dereference_protected(ipvs->svc_table, 1);
        if (t) {
                rcu_assign_pointer(ipvs->svc_table, NULL);
+               /* Inform readers that table is removed */
+               smp_mb__before_atomic();
+               atomic_inc(&ipvs->svc_table_changes);
                while (1) {
                        p = rcu_dereference_protected(t->new_tbl, 1);
                        call_rcu(&t->rcu_head, ip_vs_rht_rcu_free);
@@ -3004,7 +3010,8 @@ static int ip_vs_status_show(struct seq_file *seq, void 
*v)
        int old_gen, new_gen;
        u32 counts[8];
        u32 bucket;
-       int count;
+       u32 count;
+       int loops;
        u32 sum1;
        u32 sum;
        int i;
@@ -3020,6 +3027,7 @@ static int ip_vs_status_show(struct seq_file *seq, void 
*v)
        if (!atomic_read(&ipvs->conn_count))
                goto after_conns;
        old_gen = atomic_read(&ipvs->conn_tab_changes);
+       loops = 0;
 
 repeat_conn:
        smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */
@@ -3032,8 +3040,11 @@ static int ip_vs_status_show(struct seq_file *seq, void 
*v)
                        resched_score++;
                        ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
                                count = 0;
-                               hlist_bl_for_each_entry_rcu(hn, e, head, node)
+                               hlist_bl_for_each_entry_rcu(hn, e, head, node) {
                                        count++;
+                                       if (count >= ARRAY_SIZE(counts) - 1)
+                                               break;
+                               }
                        }
                        resched_score += count;
                        if (resched_score >= 100) {
@@ -3042,37 +3053,41 @@ static int ip_vs_status_show(struct seq_file *seq, void 
*v)
                                new_gen = atomic_read(&ipvs->conn_tab_changes);
                                /* New table installed ? */
                                if (old_gen != new_gen) {
+                                       /* Too many changes? */
+                                       if (++loops >= 5)
+                                               goto after_conns;
                                        old_gen = new_gen;
                                        goto repeat_conn;
                                }
                        }
-                       counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++;
+                       counts[count]++;
                }
        }
        for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
                sum += counts[i];
        sum1 = sum - counts[0];
-       seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n",
-                  counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U));
+       seq_printf(seq, "Conn buckets empty:\t%u (%llu%%)\n",
+                  counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U)));
        for (i = 1; i < ARRAY_SIZE(counts); i++) {
                if (!counts[i])
                        continue;
-               seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n",
+               seq_printf(seq, "Conn buckets len-%d:\t%u (%llu%%)\n",
                           i, counts[i],
-                          (unsigned long)counts[i] * 100 / max(sum1, 1U));
+                          div_u64((u64)counts[i] * 100U, max(sum1, 1U)));
        }
 
 after_conns:
        t = rcu_dereference(ipvs->svc_table);
 
        count = ip_vs_get_num_services(ipvs);
-       seq_printf(seq, "Services:\t%d\n", count);
+       seq_printf(seq, "Services:\t%u\n", count);
        seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n",
                   t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0);
 
        if (!count)
                goto after_svc;
        old_gen = atomic_read(&ipvs->svc_table_changes);
+       loops = 0;
 
 repeat_svc:
        smp_rmb(); /* ipvs->svc_table and svc_table_changes */
@@ -3086,8 +3101,11 @@ static int ip_vs_status_show(struct seq_file *seq, void 
*v)
                        ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
                                count = 0;
                                hlist_bl_for_each_entry_rcu(svc, e, head,
-                                                           s_list)
+                                                           s_list) {
                                        count++;
+                                       if (count >= ARRAY_SIZE(counts) - 1)
+                                               break;
+                               }
                        }
                        resched_score += count;
                        if (resched_score >= 100) {
@@ -3096,24 +3114,27 @@ static int ip_vs_status_show(struct seq_file *seq, void 
*v)
                                new_gen = atomic_read(&ipvs->svc_table_changes);
                                /* New table installed ? */
                                if (old_gen != new_gen) {
+                                       /* Too many changes? */
+                                       if (++loops >= 5)
+                                               goto after_svc;
                                        old_gen = new_gen;
                                        goto repeat_svc;
                                }
                        }
-                       counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++;
+                       counts[count]++;
                }
        }
        for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
                sum += counts[i];
        sum1 = sum - counts[0];
-       seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n",
-                  counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U));
+       seq_printf(seq, "Service buckets empty:\t%u (%llu%%)\n",
+                  counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U)));
        for (i = 1; i < ARRAY_SIZE(counts); i++) {
                if (!counts[i])
                        continue;
-               seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n",
+               seq_printf(seq, "Service buckets len-%d:\t%u (%llu%%)\n",
                           i, counts[i],
-                          (unsigned long)counts[i] * 100 / max(sum1, 1U));
+                          div_u64((u64)counts[i] * 100U, max(sum1, 1U)));
        }
 
 after_svc:
@@ -5039,7 +5060,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs 
*ipvs)
                                    ipvs->net->proc_net,
                                    ip_vs_stats_percpu_show, NULL))
                goto err_percpu;
-       if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net,
+       if (!proc_create_net_single("ip_vs_status", 0440, ipvs->net->proc_net,
                                    ip_vs_status_show, NULL))
                goto err_status;
 #endif
-- 
2.53.0




<Prev in Thread] Current Thread [Next in Thread>