LVS
lvs-users
Google
 
Web LinuxVirtualServer.org

[PATCH 2.4] backported and enhanced per real server thresholdlimitation

To: "LinuxVirtualServer.org users mailing list." <lvs-users@xxxxxxxxxxxxxxxxxxxxxx>
Subject: [PATCH 2.4] backported and enhanced per real server thresholdlimitation against 2.4-git
From: Roberto Nibali <ratz@xxxxxx>
Date: Thu, 27 Oct 2005 19:20:16 +0200
Hello,

Here is the backported and slightely enhanced version of the 2.6.x
feature. Since it deviates a lot from the 2.2.x functionality we need to
discuss how to properly extend the threshold feature to make it useable
for people.

Generally it's an unsolveable problem how to handle L7 problems (for
example session handling using cookies and persistency) using L4
technology (threshold limitation trying in vain to map a HTTP/S session
into a L7 session) duing a hype situation but introducing a technology
to limit the hits per dest and later per template could give us a means
to address such issues in the best possible way.

This patch, among other things, adds the dest threshold limitation. Only
 the hprio, rr and wrr have been tested. Further changes:

o Introduce the ip_vs_is_overloaded function to enhance ledgibility in
  schedulers.
o Set the IP_VS_S_FIN_WAIT to 10s. There is absolutely no reason to have
  it that high in LVS_DR mode. We need to find a better way to address
  this but having it at 120 (non defense mode) is an invitation to a
  local resource starvation.
o ip_vs_dest_totalconns() is introduced to cound the active + inactive
  sessions. Since this is only half of the puzzle (not weigthed and thus
  does not really tell us anything about the load induced on the RS) I
  though of adding a second threshold limitation which would be template
  based in case of persistency.
o I wonder if the following code is really intended that way:

+       if (dest->l_threshold != 0) {
+               if (ip_vs_dest_totalconns(cp, dest) < dest->l_threshold) {
+                       dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+               }
+       } else if (dest->u_threshold != 0) {
+               // I'm not so sure if this is a good idea. --ratz
+               if (ip_vs_dest_totalconns(cp, dest) * 4 <
dest->u_threshold * 3)
 {
+                       dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+               }
+       } else {
+               if (dest->flags & IP_VS_DEST_F_OVERLOAD) {
+                       dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+               }
+       }
+

  Also this is by far not documented (the 75% OVERLOAD reset, I mean)
o The is_overloaded() function in the sh and dh schedulers look kind of
  fishy to me. Either I completely missed the point over all those years
  or we definitely need a better means to control the overload of the
  dest for sh and dh schedulers.
o Minor cleanup

I just realise that I've mixed up a chunk of the hprio patch with this
patch, namely the Config.in and Makefile part. Since this patch is not
finished yet anyway, I hope this does not matter to much.

Please discuss.

Thanks,
Roberto Nibali, ratz
-- 
-------------------------------------------------------------
addr://Kasinostrasse 30, CH-5001 Aarau tel://++41 62 823 9355
http://www.terreactive.com             fax://++41 62 823 9356
-------------------------------------------------------------
terreActive AG                       Wir sichern Ihren Erfolg
-------------------------------------------------------------
diff -Nur linux-2.4.32-orig/include/net/ip_vs.h 
linux-2.4.32-pab2/include/net/ip_vs.h
--- linux-2.4.32-orig/include/net/ip_vs.h       2005-10-27 15:55:15 +0200
+++ linux-2.4.32-pab2/include/net/ip_vs.h       2005-10-27 17:01:37 +0200
@@ -24,6 +24,7 @@
  *      Destination Server Flags
  */
 #define IP_VS_DEST_F_AVAILABLE        0x0001    /* Available tag */
+#define IP_VS_DEST_F_OVERLOAD         0x0002    /* server is overloaded */
 
 /*
  *      IPVS sync daemon states
@@ -115,6 +116,8 @@
        u_int16_t       dport;
        unsigned        conn_flags;     /* destination flags */
        int             weight;         /* destination weight */
+       u_int32_t       u_threshold;    /* upper threshold */
+       u_int32_t       l_threshold;    /* lower threshold */
 };
 
 
@@ -177,6 +180,9 @@
        int             weight;         /* destination weight */
        u_int32_t       activeconns;    /* active connections */
        u_int32_t       inactconns;     /* inactive connections */
+       u_int32_t       u_threshold;    /* upper threshold */
+       u_int32_t       l_threshold;    /* lower threshold */
+
 
        /* statistics */
        struct ip_vs_stats_user stats;
@@ -486,11 +492,15 @@
        unsigned                flags;    /* dest status flags */
        atomic_t                weight;   /* server weight */
        atomic_t                conn_flags;     /* flags to copy to conn */
-       atomic_t                activeconns;    /* active connections */
-       atomic_t                inactconns;     /* inactive connections */
        atomic_t                refcnt;         /* reference counter */
        struct ip_vs_stats      stats;          /* statistics */
 
+       /* connection counters and thresholds */
+       atomic_t                activeconns;    /* active connections */
+       atomic_t                inactconns;     /* inactive connections */
+       __u32                   u_threshold;    /* upper threshold */
+       __u32                   l_threshold;    /* lower threshold */
+
        /* for destination cache */
        spinlock_t              dst_lock;       /* lock dst_cache */
        struct dst_entry        *dst_cache;     /* destination cache entry */
@@ -935,6 +945,13 @@
        return 0;
 }
 
+/*
+ *     Server overloaded? 
+ */
+static inline int ip_vs_is_overloaded(struct ip_vs_dest *dest) {
+       return dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _IP_VS_H */
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/Config.in 
linux-2.4.32-pab2/net/ipv4/ipvs/Config.in
--- linux-2.4.32-orig/net/ipv4/ipvs/Config.in   2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/Config.in   2005-10-27 17:01:37 +0200
@@ -19,6 +19,7 @@
   dep_tristate '  source hashing scheduling' CONFIG_IP_VS_SH $CONFIG_IP_VS
   dep_tristate '  shortest expected delay scheduling' CONFIG_IP_VS_SED 
$CONFIG_IP_VS
   dep_tristate '  never queue scheduling' CONFIG_IP_VS_NQ $CONFIG_IP_VS
+  dep_tristate '  highest weight round-robin scheduling' CONFIG_IP_VS_HPRIO 
$CONFIG_IP_VS
   comment 'IPVS application helper'
   dep_tristate '  FTP protocol helper' CONFIG_IP_VS_FTP $CONFIG_IP_VS
 fi
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/Makefile 
linux-2.4.32-pab2/net/ipv4/ipvs/Makefile
--- linux-2.4.32-orig/net/ipv4/ipvs/Makefile    2003-11-28 19:26:21 +0100
+++ linux-2.4.32-pab2/net/ipv4/ipvs/Makefile    2005-10-27 17:01:37 +0200
@@ -33,6 +33,7 @@
 obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
 obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
 obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+obj-$(CONFIG_IP_VS_HPRIO) += ip_vs_hprio.o
 
 # IPVS application helpers
 obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_conn.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_conn.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_conn.c        2005-10-27 16:00:42 
+0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_conn.c        2005-10-27 17:01:37 
+0200
@@ -21,6 +21,7 @@
  * and others. Many code here is taken from IP MASQ code of kernel 2.2.
  *
  * Changes:
+ *     Roberto Nibali, ratz: backported per RS threshold limitation from 2.5.x
  *
  */
 
@@ -336,6 +337,7 @@
 
 /*
  *     Timeout table[state]
+ *             [IP_VS_S_FIN_WAIT]      =       2*60*HZ,
  */
 struct ip_vs_timeout_table vs_timeout_table = {
        ATOMIC_INIT(0), /* refcnt */
@@ -345,7 +347,7 @@
                [IP_VS_S_ESTABLISHED]   =       15*60*HZ,
                [IP_VS_S_SYN_SENT]      =       2*60*HZ,
                [IP_VS_S_SYN_RECV]      =       1*60*HZ,
-               [IP_VS_S_FIN_WAIT]      =       2*60*HZ,
+               [IP_VS_S_FIN_WAIT]      =       10*HZ,
                [IP_VS_S_TIME_WAIT]     =       2*60*HZ,
                [IP_VS_S_CLOSE]         =       10*HZ,
                [IP_VS_S_CLOSE_WAIT]    =       60*HZ,
@@ -1077,6 +1079,15 @@
        }
 }
 
+static inline int ip_vs_dest_totalconns(struct ip_vs_conn *cp, struct 
ip_vs_dest *dest)
+{
+       IP_VS_DBG(3, "ip_vs_dest_totalconns: %d(act+inact) %d(dest->refcnt) 
%d(cp->refcnt)\n",
+               atomic_read(&dest->activeconns)+atomic_read(&dest->inactconns),
+               atomic_read(&dest->refcnt),
+               atomic_read(&cp->refcnt));
+       return 1 + atomic_read(&dest->activeconns)
+               + atomic_read(&dest->inactconns);
+}
 
 /*
  *  Bind a connection entry with a virtual service destination
@@ -1096,7 +1107,7 @@
        cp->flags |= atomic_read(&dest->conn_flags);
        cp->dest = dest;
 
-       IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
+       IP_VS_DBG(3, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
                  "d:%u.%u.%u.%u:%d fwd:%c s:%s flg:%X cnt:%d destcnt:%d\n",
                  ip_vs_proto_name(cp->protocol),
                  NIPQUAD(cp->caddr), ntohs(cp->cport),
@@ -1105,6 +1116,14 @@
                  ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
                  cp->flags, atomic_read(&cp->refcnt),
                  atomic_read(&dest->refcnt));
+       IP_VS_DBG(3, "ip_vs_bind_dest: Checking overload: u:%d t:%d\n",
+               dest->u_threshold, ip_vs_dest_totalconns(cp, dest));
+       if (dest->u_threshold != 0 &&
+           ip_vs_dest_totalconns(cp, dest) >= dest->u_threshold) {
+               IP_VS_DBG(3, "ip_vs_bind_dest: Overload (d:%u.%u.%u.%u:%d)\n",
+                       NIPQUAD(cp->daddr), ntohs(cp->dport));
+               dest->flags |= IP_VS_DEST_F_OVERLOAD;
+       }
 }
 
 
@@ -1143,6 +1162,21 @@
                }
        }
 
+       if (dest->l_threshold != 0) {
+               if (ip_vs_dest_totalconns(cp, dest) < dest->l_threshold) {
+                       dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+               }
+       } else if (dest->u_threshold != 0) {
+               // I'm not so sure if this is a good idea. --ratz
+               if (ip_vs_dest_totalconns(cp, dest) * 4 < dest->u_threshold * 
3) {
+                       dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+               }
+       } else {
+               if (dest->flags & IP_VS_DEST_F_OVERLOAD) {
+                       dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+               }
+       }
+
        /*
         * Simply decrease the refcnt of the dest, because the
         * dest will be either in service's destination list
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_ctl.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_ctl.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_ctl.c 2005-06-01 02:56:56 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_ctl.c 2005-10-27 17:01:37 +0200
@@ -17,6 +17,7 @@
  *              2 of the License, or (at your option) any later version.
  *
  * Changes:
+ *     Roberto Nibali, ratz: backported per RS threshold limitation from 2.5.x
  *
  */
 
@@ -713,6 +714,12 @@
 
        /* set the dest status flags */
        dest->flags |= IP_VS_DEST_F_AVAILABLE;
+
+       if (ur->u_threshold == 0 || ur->u_threshold > dest->u_threshold) {
+               dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+       }
+       dest->u_threshold = ur->u_threshold;
+       dest->l_threshold = ur->l_threshold;
 }
 
 
@@ -1913,6 +1920,8 @@
                        entry.port = dest->port;
                        entry.flags = atomic_read(&dest->conn_flags);
                        entry.weight = atomic_read(&dest->weight);
+                       entry.u_threshold = dest->u_threshold;
+                       entry.l_threshold = dest->l_threshold;
                        entry.activeconns = atomic_read(&dest->activeconns);
                        entry.inactconns = atomic_read(&dest->inactconns);
                        __ip_vs_copy_stats(&entry.stats, &dest->stats);
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_dh.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_dh.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_dh.c  2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_dh.c  2005-10-27 17:01:37 +0200
@@ -185,19 +185,6 @@
 
 
 /*
- *      If the number of active connections is twice larger than its weight,
- *      consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
-{
-       if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)*2) {
-               return 1;
-       }
-       return 0;
-}
-
-
-/*
  *      Destination hashing scheduling
  */
 static struct ip_vs_dest *
@@ -213,7 +200,7 @@
        if (!dest
            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
            || atomic_read(&dest->weight) <= 0
-           || is_overloaded(dest)) {
+           || ip_vs_is_overloaded(dest)) {
                return NULL;
        }
 
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lblc.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lblc.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lblc.c        2004-04-14 15:05:41 
+0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lblc.c        2005-10-27 17:01:37 
+0200
@@ -473,6 +473,8 @@
        l = &svc->destinations;
        for (e=l->next; e!=l; e=e->next) {
                least = list_entry(e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(least))
+                       continue;
                if (atomic_read(&least->weight) > 0) {
                        loh = atomic_read(&least->activeconns) * 50
                                + atomic_read(&least->inactconns);
@@ -487,6 +489,8 @@
   nextstage:
        for (e=e->next; e!=l; e=e->next) {
                dest = list_entry(e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(dest))
+                       continue;
                doh = atomic_read(&dest->activeconns) * 50
                        + atomic_read(&dest->inactconns);
                if (loh * atomic_read(&dest->weight) >
@@ -558,7 +562,7 @@
                ip_vs_lblc_hash(tbl, en);
        } else {
                dest = en->dest;
-               if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
+               if (!(ip_vs_is_overloaded(dest))
                    || atomic_read(&dest->weight) <= 0
                    || is_overloaded(dest, svc)) {
                        dest = __ip_vs_wlc_schedule(svc, iph);
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lblcr.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lblcr.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lblcr.c       2004-04-14 15:05:41 
+0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lblcr.c       2005-10-27 17:01:37 
+0200
@@ -178,6 +178,8 @@
        /* select the first destination server, whose weight > 0 */
        for (e=set->list; e!=NULL; e=e->next) {
                least = e->dest;
+               if (ip_vs_is_overloaded(least))
+                       continue;
                if ((atomic_read(&least->weight) > 0)
                    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
                        loh = atomic_read(&least->activeconns) * 50
@@ -192,6 +194,8 @@
   nextstage:
        for (e=e->next; e!=NULL; e=e->next) {
                dest = e->dest;
+               if (ip_vs_is_overloaded(dest))
+                       continue;
                doh = atomic_read(&dest->activeconns) * 50
                        + atomic_read(&dest->inactconns);
                if ((loh * atomic_read(&dest->weight) >
@@ -723,6 +727,8 @@
        l = &svc->destinations;
        for (e=l->next; e!=l; e=e->next) {
                least = list_entry(e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(least))
+                       continue;
                if (atomic_read(&least->weight) > 0) {
                        loh = atomic_read(&least->activeconns) * 50
                                + atomic_read(&least->inactconns);
@@ -737,6 +743,8 @@
   nextstage:
        for (e=e->next; e!=l; e=e->next) {
                dest = list_entry(e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(dest))
+                       continue;
                doh = atomic_read(&dest->activeconns) * 50
                        + atomic_read(&dest->inactconns);
                if (loh * atomic_read(&dest->weight) >
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lc.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lc.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_lc.c  2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_lc.c  2005-10-27 17:01:37 +0200
@@ -79,6 +79,8 @@
        l = &svc->destinations;
        for (e=l->next; e!=l; e=e->next) {
                least = list_entry (e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(least))
+                       continue;
                if (atomic_read(&least->weight) > 0) {
                        loh = ip_vs_lc_dest_overhead(least);
                        goto nextstage;
@@ -92,7 +94,8 @@
   nextstage:
        for (e=e->next; e!=l; e=e->next) {
                dest = list_entry(e, struct ip_vs_dest, n_list);
-               if (atomic_read(&dest->weight) == 0)
+               if (ip_vs_is_overloaded(dest)
+                   || atomic_read(&dest->weight) == 0)
                        continue;
                doh = ip_vs_lc_dest_overhead(dest);
                if (doh < loh) {
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_nq.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_nq.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_nq.c  2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_nq.c  2005-10-27 17:01:37 +0200
@@ -99,6 +99,8 @@
        l = &svc->destinations;
        for (e=l->next; e!=l; e=e->next) {
                least = list_entry(e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(least))
+                       continue;
                if (atomic_read(&least->weight) > 0) {
                        loh = ip_vs_nq_dest_overhead(least);
 
@@ -117,6 +119,8 @@
   nextstage:
        for (e=e->next; e!=l; e=e->next) {
                dest = list_entry(e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(dest))
+                       continue;
                doh = ip_vs_nq_dest_overhead(dest);
 
                /* return the server directly if it is idle */
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_rr.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_rr.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_rr.c  2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_rr.c  2005-10-27 17:01:37 +0200
@@ -68,7 +68,8 @@
                        continue;
                }
                dest = list_entry(q, struct ip_vs_dest, n_list);
-               if (atomic_read(&dest->weight) > 0)
+               if (!ip_vs_is_overloaded(dest)
+                   && atomic_read(&dest->weight) > 0)
                        /* HIT */
                        goto out;
                q = q->next;
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_sed.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_sed.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_sed.c 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_sed.c 2005-10-27 17:01:37 +0200
@@ -103,6 +103,8 @@
        l = &svc->destinations;
        for (e=l->next; e!=l; e=e->next) {
                least = list_entry(e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(least))
+                       continue;
                if (atomic_read(&least->weight) > 0) {
                        loh = ip_vs_sed_dest_overhead(least);
                        goto nextstage;
@@ -116,6 +118,8 @@
   nextstage:
        for (e=e->next; e!=l; e=e->next) {
                dest = list_entry(e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(dest))
+                       continue;
                doh = ip_vs_sed_dest_overhead(dest);
                if (loh * atomic_read(&dest->weight) >
                    doh * atomic_read(&least->weight)) {
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_sh.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_sh.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_sh.c  2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_sh.c  2005-10-27 17:01:37 +0200
@@ -182,19 +182,6 @@
 
 
 /*
- *      If the number of active connections is twice larger than its weight,
- *      consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
-{
-       if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)*2) {
-               return 1;
-       }
-       return 0;
-}
-
-
-/*
  *      Source Hashing scheduling
  */
 static struct ip_vs_dest *
@@ -210,7 +197,7 @@
        if (!dest
            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
            || atomic_read(&dest->weight) <= 0
-           || is_overloaded(dest)) {
+           || ip_vs_is_overloaded(dest)) {
                return NULL;
        }
 
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_wlc.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_wlc.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_wlc.c 2004-04-14 15:05:41 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_wlc.c 2005-10-27 17:01:37 +0200
@@ -91,6 +91,8 @@
        l = &svc->destinations;
        for (e=l->next; e!=l; e=e->next) {
                least = list_entry(e, struct ip_vs_dest, n_list);
+               if (ip_vs_is_overloaded(least))
+                       continue;
                if (atomic_read(&least->weight) > 0) {
                        loh = ip_vs_wlc_dest_overhead(least);
                        goto nextstage;
@@ -104,7 +106,8 @@
   nextstage:
        for (e=e->next; e!=l; e=e->next) {
                dest = list_entry(e, struct ip_vs_dest, n_list);
-
+               if (ip_vs_is_overloaded(dest))
+                       continue;
                doh = ip_vs_wlc_dest_overhead(dest);
                if (loh * atomic_read(&dest->weight) >
                    doh * atomic_read(&least->weight)) {
diff -Nur linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_wrr.c 
linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_wrr.c
--- linux-2.4.32-orig/net/ipv4/ipvs/ip_vs_wrr.c 2005-04-04 03:42:20 +0200
+++ linux-2.4.32-pab2/net/ipv4/ipvs/ip_vs_wrr.c 2005-10-27 17:01:37 +0200
@@ -154,14 +154,16 @@
 {
        struct ip_vs_dest *dest;
        struct ip_vs_wrr_mark *mark = svc->sched_data;
+       struct list_head *p;
 
        IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
 
        /*
-        * This loop will always terminate, because 0<mark->cw<max_weight,
+        * This loop will always terminate, because 0 < mark->cw < max_weight,
         * and at least one server has its weight equal to max_weight.
         */
        write_lock(&svc->sched_lock);
+       p = mark->cl;
        while (1) {
                if (mark->cl == &svc->destinations) {
                        /* it is at the head of the destination list */
@@ -187,17 +189,29 @@
                                        return NULL;
                                }
                        }
-               }
-               else mark->cl = mark->cl->next;
+               } else
+                       mark->cl = mark->cl->next;
 
                if (mark->cl != &svc->destinations) {
                        /* not at the head of the list */
                        dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
-                       if (atomic_read(&dest->weight) >= mark->cw) {
+                       if (!ip_vs_is_overloaded(dest)
+                           && atomic_read(&dest->weight) >= mark->cw) {
                                write_unlock(&svc->sched_lock);
                                break;
                        }
                }
+
+               if (mark->cl == p) {
+                       /*
+                          We're back to the start and no dest has been found.
+                          It's only possible if all dests are OVERLOADED. This
+                          while ip_vs_wrr_schedule section should be adapted
+                          to match the 2.6.x kernel function, using goto.
+                        */
+                       write_unlock(&svc->sched_lock);
+                       return NULL;
+               }
        }
 
        IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
@@ -206,8 +220,7 @@
                  atomic_read(&dest->activeconns),
                  atomic_read(&dest->refcnt),
                  atomic_read(&dest->weight));
-
-       return  dest;
+       return dest;
 }
 
 




<Prev in Thread] Current Thread [Next in Thread>
  • [PATCH 2.4] backported and enhanced per real server thresholdlimitation against 2.4-git, Roberto Nibali <=