LVS
lvs-devel
Google
 
Web LinuxVirtualServer.org

Re: [PATCH v3 nf-next] ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable

To: Zhang Tengfei <zhtfdev@xxxxxxxxx>
Subject: Re: [PATCH v3 nf-next] ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable
Cc: coreteam@xxxxxxxxxxxxx, davem@xxxxxxxxxxxxx, dsahern@xxxxxxxxxx, edumazet@xxxxxxxxxx, fw@xxxxxxxxx, horms@xxxxxxxxxxxx, kadlec@xxxxxxxxxxxxx, kuba@xxxxxxxxxx, lvs-devel@xxxxxxxxxxxxxxx, netfilter-devel@xxxxxxxxxxxxxxx, pabeni@xxxxxxxxxx, pablo@xxxxxxxxxxxxx, syzbot+1651b5234028c294c339@xxxxxxxxxxxxxxxxxxxxxxxxx
From: Julian Anastasov <ja@xxxxxx>
Date: Wed, 3 Sep 2025 20:31:22 +0300 (EEST)
        Hello,

On Mon, 1 Sep 2025, Zhang Tengfei wrote:

> KCSAN reported a data-race on the `ipvs->enable` flag, which is
> written in the control path and read concurrently from many other
> contexts.
> 
> Following a suggestion by Julian, this patch fixes the race by
> converting all accesses to use `WRITE_ONCE()/READ_ONCE()`.
> This lightweight approach ensures atomic access and acts as a
> compiler barrier, preventing unsafe optimizations where the flag
> is checked in loops (e.g., in ip_vs_est.c).
> 
> Additionally, the `enable` checks in the fast-path hooks
> (`ip_vs_in_hook`, `ip_vs_out_hook`, `ip_vs_forward_icmp`) are
> removed. These are unnecessary since commit 857ca89711de
> ("ipvs: register hooks only with services"). The `enable=0`
> condition they check for can only occur in two rare and non-fatal
> scenarios: 1) after hooks are registered but before the flag is set,
> and 2) after hooks are unregistered on cleanup_net. In the worst
> case, a single packet might be mishandled (e.g., dropped), which
> does not lead to a system crash or data corruption. Adding a check
> in the performance-critical fast-path to handle this harmless
> condition is not a worthwhile trade-off.
> 
> Fixes: 857ca89711de ("ipvs: register hooks only with services")
> Reported-by: syzbot+1651b5234028c294c339@xxxxxxxxxxxxxxxxxxxxxxxxx
> Closes: https://syzkaller.appspot.com/bug?extid=1651b5234028c294c339
> Suggested-by: Julian Anastasov <ja@xxxxxx>
> Link: 
> https://lore.kernel.org/lvs-devel/2189fc62-e51e-78c9-d1de-d35b8e3657e3@xxxxxx/
> Signed-off-by: Zhang Tengfei <zhtfdev@xxxxxxxxx>

        Looks good to me, thanks!

Acked-by: Julian Anastasov <ja@xxxxxx>

> ---
> v3:
> - Restore reference to commit 857ca89711de in commit message.
> - Add corresponding Fixes tag.
> v2:
> - Switched from atomic_t to the suggested READ_ONCE()/WRITE_ONCE().
> - Removed obsolete checks from the packet processing hooks.
> - Polished commit message based on feedback from maintainers.
> ---
>  net/netfilter/ipvs/ip_vs_conn.c |  4 ++--
>  net/netfilter/ipvs/ip_vs_core.c | 11 ++++-------
>  net/netfilter/ipvs/ip_vs_ctl.c  |  6 +++---
>  net/netfilter/ipvs/ip_vs_est.c  | 16 ++++++++--------
>  4 files changed, 17 insertions(+), 20 deletions(-)
> 
> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index 965f3c8e508..37ebb0cb62b 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
>                        * conntrack cleanup for the net.
>                        */
>                       smp_rmb();
> -                     if (ipvs->enable)
> +                     if (READ_ONCE(ipvs->enable))
>                               ip_vs_conn_drop_conntrack(cp);
>               }
>  
> @@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs 
> *ipvs)
>               cond_resched_rcu();
>  
>               /* netns clean up started, abort delayed work */
> -             if (!ipvs->enable)
> +             if (!READ_ONCE(ipvs->enable))
>                       break;
>       }
>       rcu_read_unlock();
> diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
> index c7a8a08b730..5ea7ab8bf4d 100644
> --- a/net/netfilter/ipvs/ip_vs_core.c
> +++ b/net/netfilter/ipvs/ip_vs_core.c
> @@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const 
> struct nf_hook_state *stat
>       if (unlikely(!skb_dst(skb)))
>               return NF_ACCEPT;
>  
> -     if (!ipvs->enable)
> -             return NF_ACCEPT;
> -
>       ip_vs_fill_iph_skb(af, skb, false, &iph);
>  #ifdef CONFIG_IP_VS_IPV6
>       if (af == AF_INET6) {
> @@ -1940,7 +1937,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const 
> struct nf_hook_state *state
>               return NF_ACCEPT;
>       }
>       /* ipvs enabled in this netns ? */
> -     if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
> +     if (unlikely(sysctl_backup_only(ipvs)))
>               return NF_ACCEPT;
>  
>       ip_vs_fill_iph_skb(af, skb, false, &iph);
> @@ -2108,7 +2105,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
>       int r;
>  
>       /* ipvs enabled in this netns ? */
> -     if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
> +     if (unlikely(sysctl_backup_only(ipvs)))
>               return NF_ACCEPT;
>  
>       if (state->pf == NFPROTO_IPV4) {
> @@ -2295,7 +2292,7 @@ static int __net_init __ip_vs_init(struct net *net)
>               return -ENOMEM;
>  
>       /* Hold the beast until a service is registered */
> -     ipvs->enable = 0;
> +     WRITE_ONCE(ipvs->enable, 0);
>       ipvs->net = net;
>       /* Counters used for creating unique names */
>       ipvs->gen = atomic_read(&ipvs_netns_cnt);
> @@ -2367,7 +2364,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct 
> list_head *net_list)
>               ipvs = net_ipvs(net);
>               ip_vs_unregister_hooks(ipvs, AF_INET);
>               ip_vs_unregister_hooks(ipvs, AF_INET6);
> -             ipvs->enable = 0;       /* Disable packet reception */
> +             WRITE_ONCE(ipvs->enable, 0);    /* Disable packet reception */
>               smp_wmb();
>               ip_vs_sync_net_cleanup(ipvs);
>       }
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index 6a6fc447853..4c8fa22be88 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct 
> *work)
>               struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
>  
>               /* netns clean up started, abort delayed work */
> -             if (!ipvs->enable)
> +             if (!READ_ONCE(ipvs->enable))
>                       goto unlock;
>               if (!kd)
>                       continue;
> @@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct 
> ip_vs_service_user_kern *u,
>  
>       *svc_p = svc;
>  
> -     if (!ipvs->enable) {
> +     if (!READ_ONCE(ipvs->enable)) {
>               /* Now there is a service - full throttle */
> -             ipvs->enable = 1;
> +             WRITE_ONCE(ipvs->enable, 1);
>  
>               /* Start estimation for first time */
>               ip_vs_est_reload_start(ipvs);
> diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
> index 15049b82673..93a925f1ed9 100644
> --- a/net/netfilter/ipvs/ip_vs_est.c
> +++ b/net/netfilter/ipvs/ip_vs_est.c
> @@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
>  void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
>  {
>       /* Ignore reloads before first service is added */
> -     if (!ipvs->enable)
> +     if (!READ_ONCE(ipvs->enable))
>               return;
>       ip_vs_est_stopped_recalc(ipvs);
>       /* Bump the kthread configuration genid */
> @@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
>       int i;
>  
>       if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
> -         ipvs->enable && ipvs->est_max_threads)
> +         READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
>               return -EINVAL;
>  
>       mutex_lock(&ipvs->est_mutex);
> @@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
>       }
>  
>       /* Start kthread tasks only when services are present */
> -     if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
> +     if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
>               ret = ip_vs_est_kthread_start(ipvs, kd);
>               if (ret < 0)
>                       goto out;
> @@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct 
> ip_vs_stats *stats)
>       struct ip_vs_estimator *est = &stats->est;
>       int ret;
>  
> -     if (!ipvs->est_max_threads && ipvs->enable)
> +     if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
>               ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
>  
>       est->ktid = -1;
> @@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, 
> int *chain_max)
>                       /* Wait for cpufreq frequency transition */
>                       wait_event_idle_timeout(wq, kthread_should_stop(),
>                                               HZ / 50);
> -                     if (!ipvs->enable || kthread_should_stop())
> +                     if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
>                               goto stop;
>               }
>  
> @@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, 
> int *chain_max)
>               rcu_read_unlock();
>               local_bh_enable();
>  
> -             if (!ipvs->enable || kthread_should_stop())
> +             if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
>                       goto stop;
>               cond_resched();
>  
> @@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
>       mutex_lock(&ipvs->est_mutex);
>       for (id = 1; id < ipvs->est_kt_count; id++) {
>               /* netns clean up started, abort */
> -             if (!ipvs->enable)
> +             if (!READ_ONCE(ipvs->enable))
>                       goto unlock2;
>               kd = ipvs->est_kt_arr[id];
>               if (!kd)
> @@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
>       id = ipvs->est_kt_count;
>  
>  next_kt:
> -     if (!ipvs->enable || kthread_should_stop())
> +     if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
>               goto unlock;
>       id--;
>       if (id < 0)
> -- 
> 2.34.1

Regards

--
Julian Anastasov <ja@xxxxxx>



<Prev in Thread] Current Thread [Next in Thread>