Hello,
Today is LVS day and so I should like to share some of the work done on
the threshold limitation for 2.4.x kernels (2.6.x will follow). The
patch addresses following problems:
o If a RS is overloaded and quiesced and afterwards taken out
administratively by a healthcheck the avail_dest counter decrements
twice although being overloaded already means 1 destination less. The
problem is present because the avail_dest implements two different
semantics: the availability of a server and the overflow state of a
server. This has been addressed in a way that decrementing is only
done if dest was not previously in overload or overflow mode. It's
an incremental patch on top of my previous work in this area.
o Added proc-fs tunables threshold_factor and enable_threshold_factor.
enable_threshold_factor is to enable the functionality and
threshold_factor is a constant being used in the new calculation
formula for active and inactive connections regarding threshold
limitation.
The notion of having ac + ic for a total connection is considered
to be a bad approach, so a new formula to express the total conns
has been implemented:
totalconns = 10 * ac + c * ic
This is a non-linear behaviour and can be use to instantly tune the
threshold behaviour. It's mostly used in conjunction with http services
where incoming requests vary in HTTP/1.0 and HTTP/1.1 and pipelining and
keepalive settings. If threshold limitation is used to protect the real
servers this allows one to quickly react upon wrong quiescing of
destinations. c is the value which can be set using threshold_factor.
I'm in the progress of designing a simulation using agent-based modeling
for a web based dynamic behaviour, but initial tests showed that this
formula is already improving the situation. There is absolutely no
mathematical background behind this, it's also believed to be impossible
to express in a mathematical form.
Please discuss if you find time ;).
Regards,
Roberto Nibali, ratz
--
-------------------------------------------------------------
addr://Kasinostrasse 30, CH-5001 Aarau tel://++41 62 823 9355
http://www.terreactive.com fax://++41 62 823 9356
-------------------------------------------------------------
terreActive AG Wir sichern Ihren Erfolg
-------------------------------------------------------------
diff -X dontdiff -Nur linux-2.4.32-orig/include/net/ip_vs.h
linux-2.4.32-pab2/include/net/ip_vs.h
--- linux-2.4.32-orig/include/net/ip_vs.h 2005-11-21 12:06:21 +0100
+++ linux-2.4.32-pab2/include/net/ip_vs.h 2006-01-09 17:31:31 +0100
@@ -731,6 +731,8 @@
extern int sysctl_ip_vs_expire_quiescent_template;
extern int sysctl_ip_vs_sync_threshold;
extern int sysctl_ip_vs_nat_icmp_send;
+extern int sysctl_ip_vs_enable_threshold_factor;
+extern int sysctl_ip_vs_threshold_factor;
extern struct ip_vs_stats ip_vs_stats;
extern struct ip_vs_service *ip_vs_service_get(__u32 fwmark,
diff -X dontdiff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_conn.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_conn.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_conn.c 2005-11-21 12:06:21
+0100
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_conn.c 2006-01-09 18:23:46
+0100
@@ -1081,8 +1080,22 @@
static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
{
- return atomic_read(&dest->activeconns)
- + atomic_read(&dest->inactconns);
+ /* 2006/01/09: ratz
+ The notion of having ac + ic for a total connection is considered
+ to be a bad approach, so a new formula to express the total conns
+ has been implemented:
+
+ totalconns = 10 * ac + c * ic
+
+ c is settable via proc-fs (threshold_factor) and defaults to 1.
+ */
+ if (sysctl_ip_vs_enable_threshold_factor > 0) {
+ return 10 * atomic_read(&dest->activeconns) +
+ sysctl_ip_vs_threshold_factor *
atomic_read(&dest->inactconns);
+ } else {
+ return atomic_read(&dest->activeconns) +
+ atomic_read(&dest->inactconns);
+ }
}
/*
@@ -1129,6 +1142,7 @@
IP_VS_DBG(3, "Bind-dest: Threshold handling: avail_dests=%d\n",
atomic_read(&dest->svc->avail_dests));
if (dest->u_threshold != 0
+ && dest->flags & IP_VS_DEST_F_AVAILABLE
&& !(dest->flags & IP_VS_DEST_F_OVERLOAD)
&& ip_vs_dest_totalconns(dest) >= dest->u_threshold) {
dest->flags |= IP_VS_DEST_F_OVERLOAD;
@@ -1183,7 +1197,8 @@
atomic_read(&dest->svc->avail_dests));
if (dest->l_threshold != 0) {
/* This implies that the upper threshold is != 0 as well */
- if ((dest->flags & IP_VS_DEST_F_OVERLOAD)
+ if ((dest->flags & IP_VS_DEST_F_AVAILABLE)
+ && (dest->flags & IP_VS_DEST_F_OVERLOAD)
&& ip_vs_dest_totalconns(dest) <= dest->l_threshold) {
dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
atomic_inc(&dest->svc->avail_dests);
@@ -1192,7 +1207,8 @@
} else {
/* We drop in here if the upper threshold is != 0 and the
lower threshold is ==0. */
- if (dest->flags & IP_VS_DEST_F_OVERLOAD) {
+ if ((dest->flags & IP_VS_DEST_F_AVAILABLE)
+ && (dest->flags & IP_VS_DEST_F_OVERLOAD)) {
dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
atomic_inc(&dest->svc->avail_dests);
dest->svc->flags &= ~IP_VS_SVC_F_OVERLOAD;
diff -X dontdiff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_ctl.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_ctl.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_ctl.c 2005-11-21 12:06:21 +0100
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_ctl.c 2006-01-09 18:13:57 +0100
@@ -80,6 +80,8 @@
int sysctl_ip_vs_expire_quiescent_template = 0;
int sysctl_ip_vs_sync_threshold = 3;
int sysctl_ip_vs_nat_icmp_send = 0;
+int sysctl_ip_vs_threshold_factor = 10;
+int sysctl_ip_vs_enable_threshold_factor = 0;
#ifdef CONFIG_IP_VS_DEBUG
static int sysctl_ip_vs_debug_level = 0;
@@ -844,7 +846,8 @@
list_add(&dest->n_list, &svc->destinations);
svc->num_dests++;
- if (!(dest->flags & IP_VS_DEST_F_OVERFLOW)) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERFLOW) &&
+ !(dest->flags & IP_VS_DEST_F_OVERLOAD)) {
atomic_inc(&svc->avail_dests);
}
@@ -988,7 +991,19 @@
*/
list_del(&dest->n_list);
svc->num_dests--;
- if (!(dest->flags & IP_VS_DEST_F_OVERFLOW)) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERFLOW) &&
+ !(dest->flags & IP_VS_DEST_F_OVERLOAD)) {
+ IP_VS_DBG(3, "Decrementing avail_dest[%d/%d] (f:%d/%d/%d)\n",
+ atomic_read(&svc->avail_dests),
+ atomic_read(&dest->svc->avail_dests),
+ svc->flags,
+ dest->svc->flags,
+ IP_VS_SVC_F_OVERLOAD
+ );
+ /* Only decrement avail_dests if the dest is not overloaded
+ because avail_dests has been decremented already when dest
+ became overloaded in ip_vs_conn.
+ */
atomic_dec(&svc->avail_dests);
}
if (svcupd) {
@@ -1474,6 +1489,12 @@
{NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
&sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
&proc_dointvec},
+ {NET_IPV4_VS_NAT_ICMP_SEND, "threshold_factor",
+ &sysctl_ip_vs_threshold_factor, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_VS_NAT_ICMP_SEND, "enable_threshold_factor",
+ &sysctl_ip_vs_enable_threshold_factor, sizeof(int), 0644, NULL,
+ &proc_dointvec},
{NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template",
&sysctl_ip_vs_expire_quiescent_template, sizeof(int), 0644, NULL,
&proc_dointvec},
|