On Wed, 19 Jan 2005, Wensong Zhang wrote:
> Yes, please do it, 64-bit based multiply is fine.
Hey Wensong,
I have put up a new version of the wlib scheduler with 64-bit multiply
instead of 32-bit divide and also with an option for using incoming packet
rate instead of incoming byte rate.
http://www.caputo.com/foss/ipvsadm-1.24-wlib.patch
http://www.caputo.com/foss/lvs_wlib-2.6.10.patch
(also below)
Please let me know of any issues.
Chris
--- patch against ipvsadm-1.24 ---
--- patch against ipvsadm-1.24 ---
diff -upr ipvsadm-1.24/SCHEDULERS ipvsadm-1.24-wlib/SCHEDULERS
--- ipvsadm-1.24/SCHEDULERS 2003-05-10 03:05:26.000000000 +0000
+++ ipvsadm-1.24-wlib/SCHEDULERS 2005-01-13 22:17:23.744596405
+0000
@@ -1 +1 @@
-rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq
+rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq|wlib
diff -upr ipvsadm-1.24/ipvsadm.8 ipvsadm-1.24-wlib/ipvsadm.8
--- ipvsadm-1.24/ipvsadm.8 2003-07-05 05:32:38.000000000 +0000
+++ ipvsadm-1.24-wlib/ipvsadm.8 2005-01-13 22:17:23.745596281 +0000
@@ -255,6 +255,10 @@ fixed service rate (weight) of the ith s
\fBnq\fR - Never Queue: assigns an incoming job to an idle server if
there is, instead of waiting for a fast one; if all the servers are
busy, it adopts the Shortest Expected Delay policy to assign the job.
+.sp
+\fBwlib\fR - Weighted Least Incoming Bandwidth: directs network
+connections to the real server with the least incoming bandwidth
+normalized by the server weight.
.TP
.B -p, --persistent [\fItimeout\fP]
Specify that a virtual service is persistent. If this option is
--- patch against linux kernel 2.6.10 ---
--- patch against linux kernel 2.6.10 ---
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/Kconfig
linux/net/ipv4/ipvs/Kconfig
--- linux-2.6.10-stock/net/ipv4/ipvs/Kconfig 2005-01-10 03:57:58.000000000
+0000
+++ linux/net/ipv4/ipvs/Kconfig 2005-01-23 22:39:40.000000000 +0000
@@ -224,6 +224,25 @@ config IP_VS_NQ
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+config IP_VS_WLIB
+ tristate "weighted least incoming bandwidth scheduling"
+ depends on IP_VS
+ ---help---
+ The weighted least incoming bandwidth scheduling algorithm directs
+ network connections to the server with the least incoming bandwidth
+ normalized by the server weight.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_WLIB_PPS
+ bool "Use incoming packet rate rather than incoming byte rate"
+ depends on IP_VS_WLIB
+ ---help---
+ By default the weighted least incoming bandwidth scheduler uses the
+ incoming byte rate in its algorithm. Selecting this option will
+ cause the scheduler to use the incoming packet rate instead.
+
comment 'IPVS application helper'
depends on IP_VS
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/Makefile
linux/net/ipv4/ipvs/Makefile
--- linux-2.6.10-stock/net/ipv4/ipvs/Makefile 2005-01-10 03:58:08.000000000
+0000
+++ linux/net/ipv4/ipvs/Makefile 2005-01-23 22:31:56.000000000 +0000
@@ -29,6 +29,7 @@ obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+obj-$(CONFIG_IP_VS_WLIB) += ip_vs_wlib.o
# IPVS application helpers
obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/ip_vs_wlib.c
linux/net/ipv4/ipvs/ip_vs_wlib.c
--- linux-2.6.10-stock/net/ipv4/ipvs/ip_vs_wlib.c 2005-01-10
03:59:54.000000000 +0000
+++ linux/net/ipv4/ipvs/ip_vs_wlib.c 2005-01-23 23:46:21.000000000 +0000
@@ -0,0 +1,165 @@
+/*
+ * IPVS: Weighted Least Incoming Bandwidth Scheduling module
+ *
+ * Version: ip_vs_wlib.c 1.01 2005/01/23 ccaputo
+ *
+ * Authors: Chris Caputo <ccaputo@xxxxxxx> based on code by:
+ *
+ * Wensong Zhang <wensong@xxxxxxxxxxxxxxxxxxxxxx>
+ * Peter Kese <peter.kese@xxxxxx>
+ * Julian Anastasov <ja@xxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Chris Caputo: Based code on ip_vs_wlc.c ip_vs_rr.c.
+ *
+ */
+
+/*
+ * The WLIB algorithm uses the results of the estimator's inbps (or inpps)
+ * calculations to determine which real server has the lowest incoming
+ * byterate (or packetrate).
+ *
+ * Real server weight is factored into the calculation. An example way to
+ * use this is if you have one server that can handle 100 Mbps of input and
+ * another that can handle 1 Gbps you could set the weights to be 100 and 1000
+ * respectively.
+ *
+ * If CONFIG_IP_VS_WLIB_PPS is set then inpps (packet rate) is used instead of
+ * inbps (byte rate).
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_wlib_init_svc(struct ip_vs_service *svc)
+{
+ svc->sched_data = &svc->destinations;
+ return 0;
+}
+
+
+static int
+ip_vs_wlib_done_svc(struct ip_vs_service *svc)
+{
+ return 0;
+}
+
+
+static int
+ip_vs_wlib_update_svc(struct ip_vs_service *svc)
+{
+ svc->sched_data = &svc->destinations;
+ return 0;
+}
+
+
+/*
+ * Weighted Least Incoming Bandwidth scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlib_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct list_head *p, *q;
+ struct ip_vs_dest *dest, *least = NULL;
+ u32 dr, lr = -1;
+ int dwgt, lwgt = 0;
+
+ IP_VS_DBG(6, "ip_vs_wlib_schedule(): Scheduling...\n");
+
+ /*
+ * We calculate the load of each dest server as follows:
+ * (dest inbps or inpps rate) / dest->weight
+ *
+ * The comparison of dr*lwght < lr*dwght is equivalent to that of
+ * dr/dwght < lr/lwght if every weight is larger than zero.
+ *
+ * A server with weight=0 is quiesced and will not receive any
+ * new connections.
+ *
+ * In case of ties, highest weight is winner. And if that still makes
+ * for a tie, round robin is used (which is why we remember our last
+ * starting location in the linked list).
+ */
+
+ write_lock(&svc->sched_lock);
+ p = (struct list_head *)svc->sched_data;
+ p = p->next;
+ q = p;
+ do {
+ /* skip list head */
+ if (q == &svc->destinations) {
+ q = q->next;
+ continue;
+ }
+
+ dest = list_entry(q, struct ip_vs_dest, n_list);
+ dwgt = atomic_read(&dest->weight);
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && dwgt > 0) {
+ spin_lock(&dest->stats.lock);
+#ifdef CONFIG_IP_VS_WLIB_PPS
+ dr = dest->stats.inpps;
+#else /* CONFIG_IP_VS_WLIB_PPS */
+ dr = dest->stats.inbps;
+#endif /* CONFIG_IP_VS_WLIB_PPS */
+ spin_unlock(&dest->stats.lock);
+
+ if (least == NULL ||
+ (u64)dr * (u64)lwgt < (u64)lr * (u64)dwgt ||
+ (dr == lr && dwgt > lwgt)) {
+ least = dest;
+ lr = dr;
+ lwgt = dwgt;
+ svc->sched_data = q;
+ }
+ }
+ q = q->next;
+ } while (q != p);
+ write_unlock(&svc->sched_lock);
+
+ if (least != NULL)
+ IP_VS_DBG(6, "WLIB: server %u.%u.%u.%u:%u "
+ "activeconns %d refcnt %d weight %d\n",
+ NIPQUAD(least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight));
+
+ return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlib_scheduler =
+{
+ .name = "wlib",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .init_service = ip_vs_wlib_init_svc,
+ .done_service = ip_vs_wlib_done_svc,
+ .update_service = ip_vs_wlib_update_svc,
+ .schedule = ip_vs_wlib_schedule,
+};
+
+
+static int __init ip_vs_wlib_init(void)
+{
+ INIT_LIST_HEAD(&ip_vs_wlib_scheduler.n_list);
+ return register_ip_vs_scheduler(&ip_vs_wlib_scheduler);
+}
+
+static void __exit ip_vs_wlib_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_wlib_scheduler);
+}
+
+module_init(ip_vs_wlib_init);
+module_exit(ip_vs_wlib_cleanup);
+MODULE_LICENSE("GPL");
|