|
Currently, we are using atomic_t to track the number of
connections. On 64-bit setups with large memory there is
a risk this counter to overflow. Also, setups with many
containers may need to tune the limit for connections.
Add sysctl control to limit the number of connections to
1,073,741,824 (64-bit) and 16,777,216 (32-bit).
Depending on the admin's privilege, the value is
used to change a soft or hard limit allowing
unprivileged admins to change the soft limit in
range determined by privileged admins.
Signed-off-by: Julian Anastasov <ja@xxxxxx>
---
Documentation/networking/ipvs-sysctl.rst | 35 ++++++++++++++++++
include/net/ip_vs.h | 22 +++++++++++
net/netfilter/ipvs/ip_vs_conn.c | 10 ++++-
net/netfilter/ipvs/ip_vs_ctl.c | 47 ++++++++++++++++++++++++
4 files changed, 113 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/ipvs-sysctl.rst
b/Documentation/networking/ipvs-sysctl.rst
index a556439f8be7..b6bac2612420 100644
--- a/Documentation/networking/ipvs-sysctl.rst
+++ b/Documentation/networking/ipvs-sysctl.rst
@@ -56,6 +56,41 @@ conn_lfactor - INTEGER
-4: grow if load goes above 6% (buckets = nodes * 16)
2: grow if load goes above 400% (buckets = nodes / 4)
+conn_max - INTEGER
+ Limit for number of connections, per netns.
+
+ Controls the soft and hard limit for number of connections.
+ Initially, the platform specific limit is assigned for init_net.
+ The value can be changed and later the soft limit propagated
+ to other networking namespaces.
+
+ Privileged admin can change both limits up to the value of the
+ platform limit while the unprivileged admin can change only the
+ soft limit up to the value of the hard limit.
+
+ For setups using conntrack=1 (CONFIG_IP_VS_NFCT for
+ Netfilter connection tracking) the connections can be
+ limited also by nf_conntrack_max.
+
+ soft limit hard limit
+ =====================================================
+ init_net:
+ create netns platform platform
+ priv admin 0 .. platform 0 .. platform
+ =====================================================
+ new netns:
+ create netns init_net:soft init_net:soft
+ priv admin 0 .. platform 0 .. platform
+ unpriv admin 0 .. hard N/A
+
+ Limits per platform:
+ 1,073,741,824 (2^30 for 64-bit)
+ 16,777,216 (2^24 for 32-bit)
+
+ Possible values: 0 .. platform limit
+
+ Default: platform limit
+
conn_reuse_mode - INTEGER
1 - default
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a02e569813d2..5b3d1c681231 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -44,6 +44,14 @@
#define IP_VS_CONN_TAB_MAX_BITS 20
#endif
+/* conn_max limits */
+#if BITS_PER_LONG > 32
+/* Limit of atomic_t but restricted by roundup_pow_of_two() in ip_vs_core.c */
+#define IP_VS_CONN_MAX (1 << 30)
+#else
+#define IP_VS_CONN_MAX (1 << 24)
+#endif
+
/* svc_table limits */
#define IP_VS_SVC_TAB_MIN_BITS 4
#define IP_VS_SVC_TAB_MAX_BITS 20
@@ -1220,6 +1228,10 @@ struct netns_ipvs {
/* sysctl variables */
int sysctl_amemthresh;
int sysctl_am_droprate;
+#ifdef CONFIG_SYSCTL
+ int sysctl_conn_max;/* soft limit for conns */
+ int conn_max_limit; /* hard limit for conn_max */
+#endif
int sysctl_drop_entry;
int sysctl_drop_packet;
int sysctl_secure_tcp;
@@ -1317,6 +1329,11 @@ struct netns_ipvs {
#ifdef CONFIG_SYSCTL
+static inline int sysctl_conn_max(struct netns_ipvs *ipvs)
+{
+ return READ_ONCE(ipvs->sysctl_conn_max);
+}
+
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_sync_threshold[0];
@@ -1436,6 +1453,11 @@ static inline int sysctl_est_nice(struct netns_ipvs
*ipvs)
#else
+static inline int sysctl_conn_max(struct netns_ipvs *ipvs)
+{
+ return IP_VS_CONN_MAX;
+}
+
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
{
return DEFAULT_SYNC_THRESHOLD;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9ea6b4fa78bf..e76a73d183d5 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1358,9 +1358,18 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int
dest_af,
struct netns_ipvs *ipvs = p->ipvs;
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
p->protocol);
+ /* Increment conn_count up to conn_max */
+ int count = atomic_read(&ipvs->conn_count);
+ int max = sysctl_conn_max(ipvs);
+
+ do {
+ if (count >= max)
+ return NULL;
+ } while (!atomic_try_cmpxchg(&ipvs->conn_count, &count, count + 1));
cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
+ atomic_dec(&ipvs->conn_count);
IP_VS_ERR_RL("%s(): no memory\n", __func__);
return NULL;
}
@@ -1414,7 +1423,6 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int
dest_af,
cp->in_seq.delta = 0;
cp->out_seq.delta = 0;
- atomic_inc(&ipvs->conn_count);
if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) {
int af_id = ip_vs_af_index(cp->af);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index bd9cae44d214..bd9d494b208a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2319,6 +2319,39 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
+static int
+proc_do_conn_max(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = table->data;
+ int val = *valp;
+ int rc;
+
+ const struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ };
+
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+ if (write && (*valp != val)) {
+ struct netns_ipvs *ipvs = table->extra2;
+ bool priv = capable(CAP_NET_ADMIN);
+ /* Unprivileged admins can not go above the hard limit */
+ int max = priv ? IP_VS_CONN_MAX : ipvs->conn_max_limit;
+
+ if (val < 0 || val > max) {
+ rc = -EINVAL;
+ } else {
+ /* Privileged admin changes both limits */
+ if (priv)
+ ipvs->conn_max_limit = val;
+ WRITE_ONCE(*valp, val);
+ }
+ }
+ return rc;
+}
+
static int
proc_do_defense_mode(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -2623,6 +2656,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "conn_max",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_conn_max,
+ },
{
.procname = "drop_entry",
.maxlen = sizeof(int),
@@ -4977,6 +5016,14 @@ static int __net_init
ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_amemthresh;
ipvs->sysctl_am_droprate = 10;
tbl[idx++].data = &ipvs->sysctl_am_droprate;
+
+ /* Inherit both limits from init_net:conn_max */
+ ipvs->conn_max_limit = net_eq(net, &init_net) ? IP_VS_CONN_MAX :
+ READ_ONCE(*(int *)vs_vars[idx].data);
+ ipvs->sysctl_conn_max = ipvs->conn_max_limit;
+ tbl[idx].extra2 = ipvs;
+ tbl[idx++].data = &ipvs->sysctl_conn_max;
+
tbl[idx++].data = &ipvs->sysctl_drop_entry;
tbl[idx++].data = &ipvs->sysctl_drop_packet;
#ifdef CONFIG_IP_VS_NFCT
--
2.54.0
|