LVS
lvs-devel
Google
 
Web LinuxVirtualServer.org

[PATCH nf-next] ipvs: add conn_max sysctl to limit connections

To: Simon Horman <horms@xxxxxxxxxxxx>
Subject: [PATCH nf-next] ipvs: add conn_max sysctl to limit connections
Cc: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx>, Florian Westphal <fw@xxxxxxxxx>, lvs-devel@xxxxxxxxxxxxxxx, netfilter-devel@xxxxxxxxxxxxxxx
From: Julian Anastasov <ja@xxxxxx>
Date: Fri, 22 May 2026 13:55:45 +0300
Currently, we are using atomic_t to track the number of
connections. On 64-bit setups with large memory there is
a risk this counter to overflow. Also, setups with many
containers may need to tune the limit for connections.

Add sysctl control to limit the number of connections to
1,073,741,824 (64-bit) and 16,777,216 (32-bit).
Depending on the admin's privilege, the value is
used to change a soft or hard limit allowing
unprivileged admins to change the soft limit in
range determined by privileged admins.

Signed-off-by: Julian Anastasov <ja@xxxxxx>
---
 Documentation/networking/ipvs-sysctl.rst | 35 ++++++++++++++++++
 include/net/ip_vs.h                      | 22 +++++++++++
 net/netfilter/ipvs/ip_vs_conn.c          | 10 ++++-
 net/netfilter/ipvs/ip_vs_ctl.c           | 47 ++++++++++++++++++++++++
 4 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ipvs-sysctl.rst 
b/Documentation/networking/ipvs-sysctl.rst
index a556439f8be7..b6bac2612420 100644
--- a/Documentation/networking/ipvs-sysctl.rst
+++ b/Documentation/networking/ipvs-sysctl.rst
@@ -56,6 +56,41 @@ conn_lfactor - INTEGER
        -4: grow if load goes above 6% (buckets = nodes * 16)
        2: grow if load goes above 400% (buckets = nodes / 4)
 
+conn_max - INTEGER
+       Limit for number of connections, per netns.
+
+       Controls the soft and hard limit for number of connections.
+       Initially, the platform specific limit is assigned for init_net.
+       The value can be changed and later the soft limit propagated
+       to other networking namespaces.
+
+       Privileged admin can change both limits up to the value of the
+       platform limit while the unprivileged admin can change only the
+       soft limit up to the value of the hard limit.
+
+       For setups using conntrack=1 (CONFIG_IP_VS_NFCT for
+       Netfilter connection tracking) the connections can be
+       limited also by nf_conntrack_max.
+
+                               soft limit      hard limit
+       =====================================================
+       init_net:
+       create netns            platform        platform
+       priv admin              0 .. platform   0 .. platform
+       =====================================================
+       new netns:
+       create netns            init_net:soft   init_net:soft
+       priv admin              0 .. platform   0 .. platform
+       unpriv admin            0 .. hard       N/A
+
+       Limits per platform:
+       1,073,741,824 (2^30 for 64-bit)
+          16,777,216 (2^24 for 32-bit)
+
+       Possible values: 0 .. platform limit
+
+       Default: platform limit
+
 conn_reuse_mode - INTEGER
        1 - default
 
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a02e569813d2..5b3d1c681231 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -44,6 +44,14 @@
 #define IP_VS_CONN_TAB_MAX_BITS        20
 #endif
 
+/* conn_max limits */
+#if BITS_PER_LONG > 32
+/* Limit of atomic_t but restricted by roundup_pow_of_two() in ip_vs_core.c */
+#define IP_VS_CONN_MAX (1 << 30)
+#else
+#define IP_VS_CONN_MAX (1 << 24)
+#endif
+
 /* svc_table limits */
 #define IP_VS_SVC_TAB_MIN_BITS 4
 #define IP_VS_SVC_TAB_MAX_BITS 20
@@ -1220,6 +1228,10 @@ struct netns_ipvs {
        /* sysctl variables */
        int                     sysctl_amemthresh;
        int                     sysctl_am_droprate;
+#ifdef CONFIG_SYSCTL
+       int                     sysctl_conn_max;/* soft limit for conns */
+       int                     conn_max_limit; /* hard limit for conn_max */
+#endif
        int                     sysctl_drop_entry;
        int                     sysctl_drop_packet;
        int                     sysctl_secure_tcp;
@@ -1317,6 +1329,11 @@ struct netns_ipvs {
 
 #ifdef CONFIG_SYSCTL
 
+static inline int sysctl_conn_max(struct netns_ipvs *ipvs)
+{
+       return READ_ONCE(ipvs->sysctl_conn_max);
+}
+
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
 {
        return ipvs->sysctl_sync_threshold[0];
@@ -1436,6 +1453,11 @@ static inline int sysctl_est_nice(struct netns_ipvs 
*ipvs)
 
 #else
 
+static inline int sysctl_conn_max(struct netns_ipvs *ipvs)
+{
+       return IP_VS_CONN_MAX;
+}
+
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
 {
        return DEFAULT_SYNC_THRESHOLD;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9ea6b4fa78bf..e76a73d183d5 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1358,9 +1358,18 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int 
dest_af,
        struct netns_ipvs *ipvs = p->ipvs;
        struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
                                                           p->protocol);
+       /* Increment conn_count up to conn_max */
+       int count = atomic_read(&ipvs->conn_count);
+       int max = sysctl_conn_max(ipvs);
+
+       do {
+               if (count >= max)
+                       return NULL;
+       } while (!atomic_try_cmpxchg(&ipvs->conn_count, &count, count + 1));
 
        cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
        if (cp == NULL) {
+               atomic_dec(&ipvs->conn_count);
                IP_VS_ERR_RL("%s(): no memory\n", __func__);
                return NULL;
        }
@@ -1414,7 +1423,6 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int 
dest_af,
        cp->in_seq.delta = 0;
        cp->out_seq.delta = 0;
 
-       atomic_inc(&ipvs->conn_count);
        if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) {
                int af_id = ip_vs_af_index(cp->af);
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index bd9cae44d214..bd9d494b208a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2319,6 +2319,39 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
 
 #ifdef CONFIG_SYSCTL
 
+static int
+proc_do_conn_max(const struct ctl_table *table, int write,
+                void *buffer, size_t *lenp, loff_t *ppos)
+{
+       int *valp = table->data;
+       int val = *valp;
+       int rc;
+
+       const struct ctl_table tmp = {
+               .data = &val,
+               .maxlen = sizeof(int),
+               .mode = table->mode,
+       };
+
+       rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+       if (write && (*valp != val)) {
+               struct netns_ipvs *ipvs = table->extra2;
+               bool priv = capable(CAP_NET_ADMIN);
+               /* Unprivileged admins can not go above the hard limit */
+               int max = priv ? IP_VS_CONN_MAX : ipvs->conn_max_limit;
+
+               if (val < 0 || val > max) {
+                       rc = -EINVAL;
+               } else {
+                       /* Privileged admin changes both limits */
+                       if (priv)
+                               ipvs->conn_max_limit = val;
+                       WRITE_ONCE(*valp, val);
+               }
+       }
+       return rc;
+}
+
 static int
 proc_do_defense_mode(const struct ctl_table *table, int write,
                     void *buffer, size_t *lenp, loff_t *ppos)
@@ -2623,6 +2656,12 @@ static struct ctl_table vs_vars[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "conn_max",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_do_conn_max,
+       },
        {
                .procname       = "drop_entry",
                .maxlen         = sizeof(int),
@@ -4977,6 +5016,14 @@ static int __net_init 
ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
        tbl[idx++].data = &ipvs->sysctl_amemthresh;
        ipvs->sysctl_am_droprate = 10;
        tbl[idx++].data = &ipvs->sysctl_am_droprate;
+
+       /* Inherit both limits from init_net:conn_max */
+       ipvs->conn_max_limit = net_eq(net, &init_net) ? IP_VS_CONN_MAX :
+                              READ_ONCE(*(int *)vs_vars[idx].data);
+       ipvs->sysctl_conn_max = ipvs->conn_max_limit;
+       tbl[idx].extra2 = ipvs;
+       tbl[idx++].data = &ipvs->sysctl_conn_max;
+
        tbl[idx++].data = &ipvs->sysctl_drop_entry;
        tbl[idx++].data = &ipvs->sysctl_drop_packet;
 #ifdef CONFIG_IP_VS_NFCT
-- 
2.54.0




<Prev in Thread] Current Thread [Next in Thread>