Re: [PATCH] wlib scheduler (fwd)

To:	Wensong Zhang <wensong@xxxxxxxxxxxx>
Subject:	Re: [PATCH] wlib scheduler (fwd)
Cc:	lvs-users@xxxxxxxxxxxxxxxxxxxxxx
From:	Chris Caputo <ccaputo@xxxxxxx>
Date:	Sun, 23 Jan 2005 16:52:37 -0800 (PST)

On Wed, 19 Jan 2005, Wensong Zhang wrote:
> Yes, please do it, 64-bit based multiply is fine.

Hey Wensong,

I have put up a new version of the wlib scheduler with 64-bit multiply
instead of 32-bit divide and also with an option for using incoming packet
rate instead of incoming byte rate.

  http://www.caputo.com/foss/ipvsadm-1.24-wlib.patch
  http://www.caputo.com/foss/lvs_wlib-2.6.10.patch
  (also below)

Please let me know of any issues.

Chris

--- patch against ipvsadm-1.24 ---
--- patch against ipvsadm-1.24 ---

diff -upr ipvsadm-1.24/SCHEDULERS ipvsadm-1.24-wlib/SCHEDULERS
--- ipvsadm-1.24/SCHEDULERS     2003-05-10 03:05:26.000000000 +0000
+++ ipvsadm-1.24-wlib/SCHEDULERS        2005-01-13 22:17:23.744596405
+0000
@@ -1 +1 @@
-rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq
+rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq|wlib
diff -upr ipvsadm-1.24/ipvsadm.8 ipvsadm-1.24-wlib/ipvsadm.8
--- ipvsadm-1.24/ipvsadm.8      2003-07-05 05:32:38.000000000 +0000
+++ ipvsadm-1.24-wlib/ipvsadm.8 2005-01-13 22:17:23.745596281 +0000
@@ -255,6 +255,10 @@ fixed service rate (weight) of the ith s
 \fBnq\fR - Never Queue: assigns an incoming job to an idle server if
 there is, instead of waiting for a fast one; if all the servers are
 busy, it adopts the Shortest Expected Delay policy to assign the job.
+.sp
+\fBwlib\fR - Weighted Least Incoming Bandwidth: directs network
+connections to the real server with the least incoming bandwidth
+normalized by the server weight.
 .TP
 .B -p, --persistent [\fItimeout\fP]
 Specify that a virtual service is persistent. If this option is

--- patch against linux kernel 2.6.10 ---
--- patch against linux kernel 2.6.10 ---

diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/Kconfig 
linux/net/ipv4/ipvs/Kconfig
--- linux-2.6.10-stock/net/ipv4/ipvs/Kconfig    2005-01-10 03:57:58.000000000 
+0000
+++ linux/net/ipv4/ipvs/Kconfig 2005-01-23 22:39:40.000000000 +0000
@@ -224,6 +224,25 @@ config     IP_VS_NQ
          If you want to compile it in kernel, say Y. To compile it as a
          module, choose M here. If unsure, say N.
 
+config IP_VS_WLIB
+       tristate "weighted least incoming bandwidth scheduling"
+        depends on IP_VS
+       ---help---
+         The weighted least incoming bandwidth scheduling algorithm directs
+         network connections to the server with the least incoming bandwidth
+         normalized by the server weight.
+
+         If you want to compile it in kernel, say Y. To compile it as a
+         module, choose M here. If unsure, say N.
+
+config IP_VS_WLIB_PPS
+       bool "Use incoming packet rate rather than incoming byte rate"
+        depends on IP_VS_WLIB
+       ---help---
+         By default the weighted least incoming bandwidth scheduler uses the
+         incoming byte rate in its algorithm.  Selecting this option will
+         cause the scheduler to use the incoming packet rate instead.
+
 comment 'IPVS application helper'
        depends on IP_VS
 
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/Makefile 
linux/net/ipv4/ipvs/Makefile
--- linux-2.6.10-stock/net/ipv4/ipvs/Makefile   2005-01-10 03:58:08.000000000 
+0000
+++ linux/net/ipv4/ipvs/Makefile        2005-01-23 22:31:56.000000000 +0000
@@ -29,6 +29,7 @@ obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
 obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
 obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
 obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+obj-$(CONFIG_IP_VS_WLIB) += ip_vs_wlib.o
 
 # IPVS application helpers
 obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/ip_vs_wlib.c 
linux/net/ipv4/ipvs/ip_vs_wlib.c
--- linux-2.6.10-stock/net/ipv4/ipvs/ip_vs_wlib.c       2005-01-10 
03:59:54.000000000 +0000
+++ linux/net/ipv4/ipvs/ip_vs_wlib.c    2005-01-23 23:46:21.000000000 +0000
@@ -0,0 +1,165 @@
+/*
+ * IPVS:        Weighted Least Incoming Bandwidth Scheduling module
+ *
+ * Version:     ip_vs_wlib.c 1.01 2005/01/23 ccaputo
+ *
+ * Authors:     Chris Caputo <ccaputo@xxxxxxx> based on code by:
+ *
+ *                  Wensong Zhang <wensong@xxxxxxxxxxxxxxxxxxxxxx>
+ *                  Peter Kese <peter.kese@xxxxxx>
+ *                  Julian Anastasov <ja@xxxxxx>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Chris Caputo: Based code on ip_vs_wlc.c ip_vs_rr.c.
+ *
+ */
+
+/*
+ * The WLIB algorithm uses the results of the estimator's inbps (or inpps)
+ * calculations to determine which real server has the lowest incoming
+ * byterate (or packetrate).
+ *
+ * Real server weight is factored into the calculation.  An example way to
+ * use this is if you have one server that can handle 100 Mbps of input and
+ * another that can handle 1 Gbps you could set the weights to be 100 and 1000
+ * respectively.
+ *
+ * If CONFIG_IP_VS_WLIB_PPS is set then inpps (packet rate) is used instead of
+ * inbps (byte rate).
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_wlib_init_svc(struct ip_vs_service *svc)
+{
+       svc->sched_data = &svc->destinations;
+       return 0;
+}
+
+
+static int
+ip_vs_wlib_done_svc(struct ip_vs_service *svc)
+{
+       return 0;
+}
+
+
+static int
+ip_vs_wlib_update_svc(struct ip_vs_service *svc)
+{
+       svc->sched_data = &svc->destinations;
+       return 0;
+}
+
+
+/*
+ *     Weighted Least Incoming Bandwidth scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlib_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+       struct list_head *p, *q;
+       struct ip_vs_dest *dest, *least = NULL;
+       u32 dr, lr = -1;
+       int dwgt, lwgt = 0;
+
+       IP_VS_DBG(6, "ip_vs_wlib_schedule(): Scheduling...\n");
+
+       /*
+        * We calculate the load of each dest server as follows:
+        *        (dest inbps or inpps rate) / dest->weight
+        *
+        * The comparison of dr*lwght < lr*dwght is equivalent to that of
+        * dr/dwght < lr/lwght if every weight is larger than zero.
+        *
+        * A server with weight=0 is quiesced and will not receive any
+        * new connections.
+        *
+        * In case of ties, highest weight is winner.  And if that still makes
+        * for a tie, round robin is used (which is why we remember our last
+        * starting location in the linked list).
+        */
+
+       write_lock(&svc->sched_lock);
+       p = (struct list_head *)svc->sched_data;
+       p = p->next;
+       q = p;
+       do {
+               /* skip list head */
+               if (q == &svc->destinations) {
+                       q = q->next;
+                       continue;
+               }
+
+               dest = list_entry(q, struct ip_vs_dest, n_list);
+               dwgt = atomic_read(&dest->weight);
+               if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && dwgt > 0) {
+                       spin_lock(&dest->stats.lock);
+#ifdef CONFIG_IP_VS_WLIB_PPS
+                       dr = dest->stats.inpps;
+#else /* CONFIG_IP_VS_WLIB_PPS */
+                       dr = dest->stats.inbps;
+#endif /* CONFIG_IP_VS_WLIB_PPS */
+                       spin_unlock(&dest->stats.lock);
+
+                       if (least == NULL ||
+                               (u64)dr * (u64)lwgt < (u64)lr * (u64)dwgt ||
+                               (dr == lr && dwgt > lwgt)) {
+                               least = dest;
+                               lr = dr;
+                               lwgt = dwgt;
+                               svc->sched_data = q;
+                       }
+               }
+               q = q->next;
+       } while (q != p);
+       write_unlock(&svc->sched_lock);
+
+       if (least != NULL)
+               IP_VS_DBG(6, "WLIB: server %u.%u.%u.%u:%u "
+                         "activeconns %d refcnt %d weight %d\n",
+                         NIPQUAD(least->addr), ntohs(least->port),
+                         atomic_read(&least->activeconns),
+                         atomic_read(&least->refcnt),
+                         atomic_read(&least->weight));
+
+       return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlib_scheduler =
+{
+       .name =                 "wlib",
+       .refcnt =               ATOMIC_INIT(0),
+       .module =               THIS_MODULE,
+       .init_service =         ip_vs_wlib_init_svc,
+       .done_service =         ip_vs_wlib_done_svc,
+       .update_service =       ip_vs_wlib_update_svc,
+       .schedule =             ip_vs_wlib_schedule,
+};
+
+
+static int __init ip_vs_wlib_init(void)
+{
+       INIT_LIST_HEAD(&ip_vs_wlib_scheduler.n_list);
+       return register_ip_vs_scheduler(&ip_vs_wlib_scheduler);
+}
+
+static void __exit ip_vs_wlib_cleanup(void)
+{
+       unregister_ip_vs_scheduler(&ip_vs_wlib_scheduler);
+}
+
+module_init(ip_vs_wlib_init);
+module_exit(ip_vs_wlib_cleanup);
+MODULE_LICENSE("GPL");

<Prev in Thread]	Current Thread	[Next in Thread>
Re: [PATCH] wlib scheduler (fwd), Wensong Zhang Re: [PATCH] wlib scheduler (fwd), Chris Caputo Re: [PATCH] wlib scheduler (fwd), Wensong Zhang Re: [PATCH] wlib scheduler (fwd), Chris Caputo Re: [PATCH] wlib scheduler (fwd), Wensong Zhang Re: [PATCH] wlib scheduler (fwd), Chris Caputo <= Re: [PATCH] wlib scheduler (fwd), Wensong Zhang Re: [PATCH] wlib scheduler (fwd), Chris Caputo Re: [PATCH] wlib scheduler (fwd), Wensong Zhang Re: [PATCH] wlib scheduler (fwd), Chris Caputo

Previous by Date:	Re: ktcpvs for 2.6 kernel ?, Wensong Zhang
Next by Date:	Re: LVS-DR ssh service, Horms
Previous by Thread:	Re: [PATCH] wlib scheduler (fwd), Wensong Zhang
Next by Thread:	Re: [PATCH] wlib scheduler (fwd), Wensong Zhang
Indexes:	[Date] [Thread] [Top] [All Lists]