Hello,
Here is the backported and slightely enhanced version of the 2.6.x
feature. Since it deviates a lot from the 2.2.x functionality we need to
discuss how to properly extend the threshold feature to make it useable
for people.
Generally it's an unsolveable problem how to handle L7 problems (for
example session handling using cookies and persistency) using L4
technology (threshold limitation trying in vain to map a HTTP/S session
into a L7 session) duing a hype situation but introducing a technology
to limit the hits per dest and later per template could give us a means
to address such issues in the best possible way.
This patch, among other things, adds the dest threshold limitation. Only
the hprio, rr and wrr have been tested. Further changes:
o Introduce the ip_vs_is_overloaded function to enhance ledgibility in
schedulers.
o Set the IP_VS_S_FIN_WAIT to 10s. There is absolutely no reason to have
it that high in LVS_DR mode. We need to find a better way to address
this but having it at 120 (non defense mode) is an invitation to a
local resource starvation.
o ip_vs_dest_totalconns() is introduced to cound the active + inactive
sessions. Since this is only half of the puzzle (not weigthed and thus
does not really tell us anything about the load induced on the RS) I
though of adding a second threshold limitation which would be template
based in case of persistency.
o I wonder if the following code is really intended that way:
+ if (dest->l_threshold != 0) {
+ if (ip_vs_dest_totalconns(cp, dest) < dest->l_threshold) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ } else if (dest->u_threshold != 0) {
+ // I'm not so sure if this is a good idea. --ratz
+ if (ip_vs_dest_totalconns(cp, dest) * 4 <
dest->u_threshold * 3)
{
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ } else {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ }
+
Also this is by far not documented (the 75% OVERLOAD reset, I mean)
o The is_overloaded() function in the sh and dh schedulers look kind of
fishy to me. Either I completely missed the point over all those years
or we definitely need a better means to control the overload of the
dest for sh and dh schedulers.
o Minor cleanup
I just realise that I've mixed up a chunk of the hprio patch with this
patch, namely the Config.in and Makefile part. Since this patch is not
finished yet anyway, I hope this does not matter to much.
Please discuss.
Thanks,
Roberto Nibali, ratz
--
-------------------------------------------------------------
addr://Kasinostrasse 30, CH-5001 Aarau tel://++41 62 823 9355
http://www.terreactive.com fax://++41 62 823 9356
-------------------------------------------------------------
terreActive AG Wir sichern Ihren Erfolg
-------------------------------------------------------------
diff -Nur linux-2.4.32-orig/include/net/ip_vs.h
linux-2.4.32-pab2/include/net/ip_vs.h
--- linux-2.4.32-orig/include/net/ip_vs.h 2005-10-27 15:55:15 +0200
+++ linux-2.4.32-pab2/include/net/ip_vs.h 2005-10-27 17:01:37 +0200
@@ -24,6 +24,7 @@
* Destination Server Flags
*/
#define IP_VS_DEST_F_AVAILABLE 0x0001 /* Available tag */
+#define IP_VS_DEST_F_OVERLOAD 0x0002 /* server is overloaded */
/*
* IPVS sync daemon states
@@ -115,6 +116,8 @@
u_int16_t dport;
unsigned conn_flags; /* destination flags */
int weight; /* destination weight */
+ u_int32_t u_threshold; /* upper threshold */
+ u_int32_t l_threshold; /* lower threshold */
};
@@ -177,6 +180,9 @@
int weight; /* destination weight */
u_int32_t activeconns; /* active connections */
u_int32_t inactconns; /* inactive connections */
+ u_int32_t u_threshold; /* upper threshold */
+ u_int32_t l_threshold; /* lower threshold */
+
/* statistics */
struct ip_vs_stats_user stats;
@@ -486,11 +492,15 @@
unsigned flags; /* dest status flags */
atomic_t weight; /* server weight */
atomic_t conn_flags; /* flags to copy to conn */
- atomic_t activeconns; /* active connections */
- atomic_t inactconns; /* inactive connections */
atomic_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */
+ /* connection counters and thresholds */
+ atomic_t activeconns; /* active connections */
+ atomic_t inactconns; /* inactive connections */
+ __u32 u_threshold; /* upper threshold */
+ __u32 l_threshold; /* lower threshold */
+
/* for destination cache */
spinlock_t dst_lock; /* lock dst_cache */
struct dst_entry *dst_cache; /* destination cache entry */
@@ -935,6 +945,13 @@
return 0;
}
+/*
+ * Server overloaded?
+ */
+static inline int ip_vs_is_overloaded(struct ip_vs_dest *dest) {
+ return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
#endif /* __KERNEL__ */
#endif /* _IP_VS_H */
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/Config.in
linux-2.4.32-pab2/net/ipv4/ipvs/Config.in
--- linux-2.4.32-orig/net/ipv4/ipvs/Config.in 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/Config.in 2005-10-27 17:01:37 +0200
@@ -19,6 +19,7 @@
dep_tristate ' source hashing scheduling' CONFIG_IP_VS_SH $CONFIG_IP_VS
dep_tristate ' shortest expected delay scheduling' CONFIG_IP_VS_SED
$CONFIG_IP_VS
dep_tristate ' never queue scheduling' CONFIG_IP_VS_NQ $CONFIG_IP_VS
+ dep_tristate ' highest weight round-robin scheduling' CONFIG_IP_VS_HPRIO
$CONFIG_IP_VS
comment 'IPVS application helper'
dep_tristate ' FTP protocol helper' CONFIG_IP_VS_FTP $CONFIG_IP_VS
fi
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/Makefile
linux-2.4.32-pab2/net/ipv4/ipvs/Makefile
--- linux-2.4.32-orig/net/ipv4/ipvs/Makefile 2003-11-28 19:26:21 +0100
+++ linux-2.4.32-pab2/net/ipv4/ipvs/Makefile 2005-10-27 17:01:37 +0200
@@ -33,6 +33,7 @@
obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+obj-$(CONFIG_IP_VS_HPRIO) += ip_vs_hprio.o
# IPVS application helpers
obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_conn.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_conn.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_conn.c 2005-10-27 16:00:42
+0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_conn.c 2005-10-27 17:01:37
+0200
@@ -21,6 +21,7 @@
* and others. Many code here is taken from IP MASQ code of kernel 2.2.
*
* Changes:
+ * Roberto Nibali, ratz: backported per RS threshold limitation from 2.5.x
*
*/
@@ -336,6 +337,7 @@
/*
* Timeout table[state]
+ * [IP_VS_S_FIN_WAIT] = 2*60*HZ,
*/
struct ip_vs_timeout_table vs_timeout_table = {
ATOMIC_INIT(0), /* refcnt */
@@ -345,7 +347,7 @@
[IP_VS_S_ESTABLISHED] = 15*60*HZ,
[IP_VS_S_SYN_SENT] = 2*60*HZ,
[IP_VS_S_SYN_RECV] = 1*60*HZ,
- [IP_VS_S_FIN_WAIT] = 2*60*HZ,
+ [IP_VS_S_FIN_WAIT] = 10*HZ,
[IP_VS_S_TIME_WAIT] = 2*60*HZ,
[IP_VS_S_CLOSE] = 10*HZ,
[IP_VS_S_CLOSE_WAIT] = 60*HZ,
@@ -1077,6 +1079,15 @@
}
}
+static inline int ip_vs_dest_totalconns(struct ip_vs_conn *cp, struct
ip_vs_dest *dest)
+{
+ IP_VS_DBG(3, "ip_vs_dest_totalconns: %d(act+inact) %d(dest->refcnt)
%d(cp->refcnt)\n",
+ atomic_read(&dest->activeconns)+atomic_read(&dest->inactconns),
+ atomic_read(&dest->refcnt),
+ atomic_read(&cp->refcnt));
+ return 1 + atomic_read(&dest->activeconns)
+ + atomic_read(&dest->inactconns);
+}
/*
* Bind a connection entry with a virtual service destination
@@ -1096,7 +1107,7 @@
cp->flags |= atomic_read(&dest->conn_flags);
cp->dest = dest;
- IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+ IP_VS_DBG(3, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
"d:%u.%u.%u.%u:%d fwd:%c s:%s flg:%X cnt:%d destcnt:%d\n",
ip_vs_proto_name(cp->protocol),
NIPQUAD(cp->caddr), ntohs(cp->cport),
@@ -1105,6 +1116,14 @@
ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
+ IP_VS_DBG(3, "ip_vs_bind_dest: Checking overload: u:%d t:%d\n",
+ dest->u_threshold, ip_vs_dest_totalconns(cp, dest));
+ if (dest->u_threshold != 0 &&
+ ip_vs_dest_totalconns(cp, dest) >= dest->u_threshold) {
+ IP_VS_DBG(3, "ip_vs_bind_dest: Overload (d:%u.%u.%u.%u:%d)\n",
+ NIPQUAD(cp->daddr), ntohs(cp->dport));
+ dest->flags |= IP_VS_DEST_F_OVERLOAD;
+ }
}
@@ -1143,6 +1162,21 @@
}
}
+ if (dest->l_threshold != 0) {
+ if (ip_vs_dest_totalconns(cp, dest) < dest->l_threshold) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ } else if (dest->u_threshold != 0) {
+ // I'm not so sure if this is a good idea. --ratz
+ if (ip_vs_dest_totalconns(cp, dest) * 4 < dest->u_threshold *
3) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ } else {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ }
+
/*
* Simply decrease the refcnt of the dest, because the
* dest will be either in service's destination list
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_ctl.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_ctl.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_ctl.c 2005-06-01 02:56:56 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_ctl.c 2005-10-27 17:01:37 +0200
@@ -17,6 +17,7 @@
* 2 of the License, or (at your option) any later version.
*
* Changes:
+ * Roberto Nibali, ratz: backported per RS threshold limitation from 2.5.x
*
*/
@@ -713,6 +714,12 @@
/* set the dest status flags */
dest->flags |= IP_VS_DEST_F_AVAILABLE;
+
+ if (ur->u_threshold == 0 || ur->u_threshold > dest->u_threshold) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ dest->u_threshold = ur->u_threshold;
+ dest->l_threshold = ur->l_threshold;
}
@@ -1913,6 +1920,8 @@
entry.port = dest->port;
entry.flags = atomic_read(&dest->conn_flags);
entry.weight = atomic_read(&dest->weight);
+ entry.u_threshold = dest->u_threshold;
+ entry.l_threshold = dest->l_threshold;
entry.activeconns = atomic_read(&dest->activeconns);
entry.inactconns = atomic_read(&dest->inactconns);
__ip_vs_copy_stats(&entry.stats, &dest->stats);
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_dh.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_dh.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_dh.c 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_dh.c 2005-10-27 17:01:37 +0200
@@ -185,19 +185,6 @@
/*
- * If the number of active connections is twice larger than its weight,
- * consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
-{
- if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)*2) {
- return 1;
- }
- return 0;
-}
-
-
-/*
* Destination hashing scheduling
*/
static struct ip_vs_dest *
@@ -213,7 +200,7 @@
if (!dest
|| !(dest->flags & IP_VS_DEST_F_AVAILABLE)
|| atomic_read(&dest->weight) <= 0
- || is_overloaded(dest)) {
+ || ip_vs_is_overloaded(dest)) {
return NULL;
}
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lblc.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lblc.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lblc.c 2004-04-14 15:05:41
+0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lblc.c 2005-10-27 17:01:37
+0200
@@ -473,6 +473,8 @@
l = &svc->destinations;
for (e=l->next; e!=l; e=e->next) {
least = list_entry(e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(least))
+ continue;
if (atomic_read(&least->weight) > 0) {
loh = atomic_read(&least->activeconns) * 50
+ atomic_read(&least->inactconns);
@@ -487,6 +489,8 @@
nextstage:
for (e=e->next; e!=l; e=e->next) {
dest = list_entry(e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(dest))
+ continue;
doh = atomic_read(&dest->activeconns) * 50
+ atomic_read(&dest->inactconns);
if (loh * atomic_read(&dest->weight) >
@@ -558,7 +562,7 @@
ip_vs_lblc_hash(tbl, en);
} else {
dest = en->dest;
- if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
+ if (!(ip_vs_is_overloaded(dest))
|| atomic_read(&dest->weight) <= 0
|| is_overloaded(dest, svc)) {
dest = __ip_vs_wlc_schedule(svc, iph);
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lblcr.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lblcr.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lblcr.c 2004-04-14 15:05:41
+0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lblcr.c 2005-10-27 17:01:37
+0200
@@ -178,6 +178,8 @@
/* select the first destination server, whose weight > 0 */
for (e=set->list; e!=NULL; e=e->next) {
least = e->dest;
+ if (ip_vs_is_overloaded(least))
+ continue;
if ((atomic_read(&least->weight) > 0)
&& (least->flags & IP_VS_DEST_F_AVAILABLE)) {
loh = atomic_read(&least->activeconns) * 50
@@ -192,6 +194,8 @@
nextstage:
for (e=e->next; e!=NULL; e=e->next) {
dest = e->dest;
+ if (ip_vs_is_overloaded(dest))
+ continue;
doh = atomic_read(&dest->activeconns) * 50
+ atomic_read(&dest->inactconns);
if ((loh * atomic_read(&dest->weight) >
@@ -723,6 +727,8 @@
l = &svc->destinations;
for (e=l->next; e!=l; e=e->next) {
least = list_entry(e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(least))
+ continue;
if (atomic_read(&least->weight) > 0) {
loh = atomic_read(&least->activeconns) * 50
+ atomic_read(&least->inactconns);
@@ -737,6 +743,8 @@
nextstage:
for (e=e->next; e!=l; e=e->next) {
dest = list_entry(e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(dest))
+ continue;
doh = atomic_read(&dest->activeconns) * 50
+ atomic_read(&dest->inactconns);
if (loh * atomic_read(&dest->weight) >
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lc.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lc.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lc.c 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lc.c 2005-10-27 17:01:37 +0200
@@ -79,6 +79,8 @@
l = &svc->destinations;
for (e=l->next; e!=l; e=e->next) {
least = list_entry (e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(least))
+ continue;
if (atomic_read(&least->weight) > 0) {
loh = ip_vs_lc_dest_overhead(least);
goto nextstage;
@@ -92,7 +94,8 @@
nextstage:
for (e=e->next; e!=l; e=e->next) {
dest = list_entry(e, struct ip_vs_dest, n_list);
- if (atomic_read(&dest->weight) == 0)
+ if (ip_vs_is_overloaded(dest)
+ || atomic_read(&dest->weight) == 0)
continue;
doh = ip_vs_lc_dest_overhead(dest);
if (doh < loh) {
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_nq.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_nq.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_nq.c 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_nq.c 2005-10-27 17:01:37 +0200
@@ -99,6 +99,8 @@
l = &svc->destinations;
for (e=l->next; e!=l; e=e->next) {
least = list_entry(e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(least))
+ continue;
if (atomic_read(&least->weight) > 0) {
loh = ip_vs_nq_dest_overhead(least);
@@ -117,6 +119,8 @@
nextstage:
for (e=e->next; e!=l; e=e->next) {
dest = list_entry(e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(dest))
+ continue;
doh = ip_vs_nq_dest_overhead(dest);
/* return the server directly if it is idle */
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_rr.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_rr.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_rr.c 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_rr.c 2005-10-27 17:01:37 +0200
@@ -68,7 +68,8 @@
continue;
}
dest = list_entry(q, struct ip_vs_dest, n_list);
- if (atomic_read(&dest->weight) > 0)
+ if (!ip_vs_is_overloaded(dest)
+ && atomic_read(&dest->weight) > 0)
/* HIT */
goto out;
q = q->next;
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_sed.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_sed.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_sed.c 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_sed.c 2005-10-27 17:01:37 +0200
@@ -103,6 +103,8 @@
l = &svc->destinations;
for (e=l->next; e!=l; e=e->next) {
least = list_entry(e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(least))
+ continue;
if (atomic_read(&least->weight) > 0) {
loh = ip_vs_sed_dest_overhead(least);
goto nextstage;
@@ -116,6 +118,8 @@
nextstage:
for (e=e->next; e!=l; e=e->next) {
dest = list_entry(e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(dest))
+ continue;
doh = ip_vs_sed_dest_overhead(dest);
if (loh * atomic_read(&dest->weight) >
doh * atomic_read(&least->weight)) {
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_sh.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_sh.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_sh.c 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_sh.c 2005-10-27 17:01:37 +0200
@@ -182,19 +182,6 @@
/*
- * If the number of active connections is twice larger than its weight,
- * consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
-{
- if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)*2) {
- return 1;
- }
- return 0;
-}
-
-
-/*
* Source Hashing scheduling
*/
static struct ip_vs_dest *
@@ -210,7 +197,7 @@
if (!dest
|| !(dest->flags & IP_VS_DEST_F_AVAILABLE)
|| atomic_read(&dest->weight) <= 0
- || is_overloaded(dest)) {
+ || ip_vs_is_overloaded(dest)) {
return NULL;
}
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_wlc.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_wlc.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_wlc.c 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_wlc.c 2005-10-27 17:01:37 +0200
@@ -91,6 +91,8 @@
l = &svc->destinations;
for (e=l->next; e!=l; e=e->next) {
least = list_entry(e, struct ip_vs_dest, n_list);
+ if (ip_vs_is_overloaded(least))
+ continue;
if (atomic_read(&least->weight) > 0) {
loh = ip_vs_wlc_dest_overhead(least);
goto nextstage;
@@ -104,7 +106,8 @@
nextstage:
for (e=e->next; e!=l; e=e->next) {
dest = list_entry(e, struct ip_vs_dest, n_list);
-
+ if (ip_vs_is_overloaded(dest))
+ continue;
doh = ip_vs_wlc_dest_overhead(dest);
if (loh * atomic_read(&dest->weight) >
doh * atomic_read(&least->weight)) {
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_wrr.c
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_wrr.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_wrr.c 2005-04-04 03:42:20 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_wrr.c 2005-10-27 17:01:37 +0200
@@ -154,14 +154,16 @@
{
struct ip_vs_dest *dest;
struct ip_vs_wrr_mark *mark = svc->sched_data;
+ struct list_head *p;
IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
/*
- * This loop will always terminate, because 0<mark->cw<max_weight,
+ * This loop will always terminate, because 0 < mark->cw < max_weight,
* and at least one server has its weight equal to max_weight.
*/
write_lock(&svc->sched_lock);
+ p = mark->cl;
while (1) {
if (mark->cl == &svc->destinations) {
/* it is at the head of the destination list */
@@ -187,17 +189,29 @@
return NULL;
}
}
- }
- else mark->cl = mark->cl->next;
+ } else
+ mark->cl = mark->cl->next;
if (mark->cl != &svc->destinations) {
/* not at the head of the list */
dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
- if (atomic_read(&dest->weight) >= mark->cw) {
+ if (!ip_vs_is_overloaded(dest)
+ && atomic_read(&dest->weight) >= mark->cw) {
write_unlock(&svc->sched_lock);
break;
}
}
+
+ if (mark->cl == p) {
+ /*
+ We're back to the start and no dest has been found.
+ It's only possible if all dests are OVERLOADED. This
+ while ip_vs_wrr_schedule section should be adapted
+ to match the 2.6.x kernel function, using goto.
+ */
+ write_unlock(&svc->sched_lock);
+ return NULL;
+ }
}
IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
@@ -206,8 +220,7 @@
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt),
atomic_read(&dest->weight));
-
- return dest;
+ return dest;
}
|