LVS
lvs-devel
Google
 
Web LinuxVirtualServer.org

[PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable

To: ja@xxxxxx
Subject: [PATCH v2] net/netfilter/ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable
Cc: coreteam@xxxxxxxxxxxxx, davem@xxxxxxxxxxxxx, dsahern@xxxxxxxxxx, edumazet@xxxxxxxxxx, fw@xxxxxxxxx, horms@xxxxxxxxxxxx, kadlec@xxxxxxxxxxxxx, kuba@xxxxxxxxxx, lvs-devel@xxxxxxxxxxxxxxx, netfilter-devel@xxxxxxxxxxxxxxx, pabeni@xxxxxxxxxx, pablo@xxxxxxxxxxxxx, syzbot+1651b5234028c294c339@xxxxxxxxxxxxxxxxxxxxxxxxx, zhtfdev@xxxxxxxxx
From: Zhang Tengfei <zhtfdev@xxxxxxxxx>
Date: Thu, 28 Aug 2025 06:33:23 +0800
KCSAN reported a data-race on the `ipvs->enable` flag, which is
written in the control path and read concurrently from many other
contexts.

Following a suggestion by Julian, this patch fixes the race by
converting all accesses to use `WRITE_ONCE()/READ_ONCE()`.
This lightweight approach ensures atomic access and acts as a
compiler barrier, preventing unsafe optimizations where the flag
is checked in loops (e.g., in ip_vs_est.c).

Additionally, the `enable` checks in the fast-path hooks
(`ip_vs_in_hook`, `ip_vs_out_hook`, `ip_vs_forward_icmp`) are
removed. They are considered unnecessary because the `enable=0`
condition they check for can only occur in two rare and non-fatal
scenarios: 1) after hooks are registered but before the flag is set,
and 2) after hooks are unregistered on cleanup_net. In the worst
case, a single packet might be mishandled (e.g., dropped), which
does not lead to a system crash or data corruption. Adding a check
in the performance-critical fast-path to handle this harmless
condition is not a worthwhile trade-off.

Reported-by: syzbot+1651b5234028c294c339@xxxxxxxxxxxxxxxxxxxxxxxxx
Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
Suggested-by: Julian Anastasov <ja@xxxxxx>
Link: 
https://lore.kernel.org/lvs-devel/2189fc62-e51e-78c9-d1de-d35b8e3657e3@xxxxxx/
Signed-off-by: Zhang Tengfei <zhtfdev@xxxxxxxxx>

---
v2:
- Switched from atomic_t to the suggested READ_ONCE()/WRITE_ONCE().
- Removed obsolete checks from the packet processing hooks.
- Polished commit message based on feedback from maintainers.
---
 net/netfilter/ipvs/ip_vs_conn.c |  4 ++--
 net/netfilter/ipvs/ip_vs_core.c | 11 ++++-------
 net/netfilter/ipvs/ip_vs_ctl.c  |  6 +++---
 net/netfilter/ipvs/ip_vs_est.c  | 16 ++++++++--------
 4 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 965f3c8e5..37ebb0cb6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
                         * conntrack cleanup for the net.
                         */
                        smp_rmb();
-                       if (ipvs->enable)
+                       if (READ_ONCE(ipvs->enable))
                                ip_vs_conn_drop_conntrack(cp);
                }
 
@@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs 
*ipvs)
                cond_resched_rcu();
 
                /* netns clean up started, abort delayed work */
-               if (!ipvs->enable)
+               if (!READ_ONCE(ipvs->enable))
                        break;
        }
        rcu_read_unlock();
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c7a8a08b7..5ea7ab8bf 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const 
struct nf_hook_state *stat
        if (unlikely(!skb_dst(skb)))
                return NF_ACCEPT;
 
-       if (!ipvs->enable)
-               return NF_ACCEPT;
-
        ip_vs_fill_iph_skb(af, skb, false, &iph);
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
@@ -1940,7 +1937,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const 
struct nf_hook_state *state
                return NF_ACCEPT;
        }
        /* ipvs enabled in this netns ? */
-       if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+       if (unlikely(sysctl_backup_only(ipvs)))
                return NF_ACCEPT;
 
        ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -2108,7 +2105,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
        int r;
 
        /* ipvs enabled in this netns ? */
-       if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
+       if (unlikely(sysctl_backup_only(ipvs)))
                return NF_ACCEPT;
 
        if (state->pf == NFPROTO_IPV4) {
@@ -2295,7 +2292,7 @@ static int __net_init __ip_vs_init(struct net *net)
                return -ENOMEM;
 
        /* Hold the beast until a service is registered */
-       ipvs->enable = 0;
+       WRITE_ONCE(ipvs->enable, 0);
        ipvs->net = net;
        /* Counters used for creating unique names */
        ipvs->gen = atomic_read(&ipvs_netns_cnt);
@@ -2367,7 +2364,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct 
list_head *net_list)
                ipvs = net_ipvs(net);
                ip_vs_unregister_hooks(ipvs, AF_INET);
                ip_vs_unregister_hooks(ipvs, AF_INET6);
-               ipvs->enable = 0;       /* Disable packet reception */
+               WRITE_ONCE(ipvs->enable, 0);    /* Disable packet reception */
                smp_wmb();
                ip_vs_sync_net_cleanup(ipvs);
        }
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6a6fc4478..4c8fa22be 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct 
*work)
                struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
 
                /* netns clean up started, abort delayed work */
-               if (!ipvs->enable)
+               if (!READ_ONCE(ipvs->enable))
                        goto unlock;
                if (!kd)
                        continue;
@@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct 
ip_vs_service_user_kern *u,
 
        *svc_p = svc;
 
-       if (!ipvs->enable) {
+       if (!READ_ONCE(ipvs->enable)) {
                /* Now there is a service - full throttle */
-               ipvs->enable = 1;
+               WRITE_ONCE(ipvs->enable, 1);
 
                /* Start estimation for first time */
                ip_vs_est_reload_start(ipvs);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 15049b826..93a925f1e 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
 void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
 {
        /* Ignore reloads before first service is added */
-       if (!ipvs->enable)
+       if (!READ_ONCE(ipvs->enable))
                return;
        ip_vs_est_stopped_recalc(ipvs);
        /* Bump the kthread configuration genid */
@@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
        int i;
 
        if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
-           ipvs->enable && ipvs->est_max_threads)
+           READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
                return -EINVAL;
 
        mutex_lock(&ipvs->est_mutex);
@@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
        }
 
        /* Start kthread tasks only when services are present */
-       if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
+       if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
                ret = ip_vs_est_kthread_start(ipvs, kd);
                if (ret < 0)
                        goto out;
@@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct 
ip_vs_stats *stats)
        struct ip_vs_estimator *est = &stats->est;
        int ret;
 
-       if (!ipvs->est_max_threads && ipvs->enable)
+       if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
                ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
 
        est->ktid = -1;
@@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, 
int *chain_max)
                        /* Wait for cpufreq frequency transition */
                        wait_event_idle_timeout(wq, kthread_should_stop(),
                                                HZ / 50);
-                       if (!ipvs->enable || kthread_should_stop())
+                       if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
                                goto stop;
                }
 
@@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, 
int *chain_max)
                rcu_read_unlock();
                local_bh_enable();
 
-               if (!ipvs->enable || kthread_should_stop())
+               if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
                        goto stop;
                cond_resched();
 
@@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
        mutex_lock(&ipvs->est_mutex);
        for (id = 1; id < ipvs->est_kt_count; id++) {
                /* netns clean up started, abort */
-               if (!ipvs->enable)
+               if (!READ_ONCE(ipvs->enable))
                        goto unlock2;
                kd = ipvs->est_kt_arr[id];
                if (!kd)
@@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
        id = ipvs->est_kt_count;
 
 next_kt:
-       if (!ipvs->enable || kthread_should_stop())
+       if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
                goto unlock;
        id--;
        if (id < 0)
-- 
2.34.1



<Prev in Thread] Current Thread [Next in Thread>