Hi Chris,
Thanks for the patch.
I see that recording current pointer into svc->sched_data is probably not
necessary, because every time the scheduler will iterate servers from the
beginning to the end. Can you make two separate scheduler such as Weighted
Least Incoming Byterate and Weighted Least Incoming Packetrate? because
the selection of packetrate or byterate at compile time is not good for
distributing the built kernel.
Thanks,
Wensong
On Sun, 23 Jan 2005, Chris Caputo wrote:
On Wed, 19 Jan 2005, Wensong Zhang wrote:
Yes, please do it, 64-bit based multiply is fine.
Hey Wensong,
I have put up a new version of the wlib scheduler with 64-bit multiply
instead of 32-bit divide and also with an option for using incoming packet
rate instead of incoming byte rate.
http://www.caputo.com/foss/ipvsadm-1.24-wlib.patch
http://www.caputo.com/foss/lvs_wlib-2.6.10.patch
(also below)
Please let me know of any issues.
Chris
--- patch against ipvsadm-1.24 ---
--- patch against ipvsadm-1.24 ---
diff -upr ipvsadm-1.24/SCHEDULERS ipvsadm-1.24-wlib/SCHEDULERS
--- ipvsadm-1.24/SCHEDULERS 2003-05-10 03:05:26.000000000 +0000
+++ ipvsadm-1.24-wlib/SCHEDULERS 2005-01-13 22:17:23.744596405
+0000
@@ -1 +1 @@
-rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq
+rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq|wlib
diff -upr ipvsadm-1.24/ipvsadm.8 ipvsadm-1.24-wlib/ipvsadm.8
--- ipvsadm-1.24/ipvsadm.8 2003-07-05 05:32:38.000000000 +0000
+++ ipvsadm-1.24-wlib/ipvsadm.8 2005-01-13 22:17:23.745596281 +0000
@@ -255,6 +255,10 @@ fixed service rate (weight) of the ith s
\fBnq\fR - Never Queue: assigns an incoming job to an idle server if
there is, instead of waiting for a fast one; if all the servers are
busy, it adopts the Shortest Expected Delay policy to assign the job.
+.sp
+\fBwlib\fR - Weighted Least Incoming Bandwidth: directs network
+connections to the real server with the least incoming bandwidth
+normalized by the server weight.
.TP
.B -p, --persistent [\fItimeout\fP]
Specify that a virtual service is persistent. If this option is
--- patch against linux kernel 2.6.10 ---
--- patch against linux kernel 2.6.10 ---
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/Kconfig
linux/net/ipv4/ipvs/Kconfig
--- linux-2.6.10-stock/net/ipv4/ipvs/Kconfig 2005-01-10 03:57:58.000000000
+0000
+++ linux/net/ipv4/ipvs/Kconfig 2005-01-23 22:39:40.000000000 +0000
@@ -224,6 +224,25 @@ config IP_VS_NQ
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+config IP_VS_WLIB
+ tristate "weighted least incoming bandwidth scheduling"
+ depends on IP_VS
+ ---help---
+ The weighted least incoming bandwidth scheduling algorithm directs
+ network connections to the server with the least incoming bandwidth
+ normalized by the server weight.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
+config IP_VS_WLIB_PPS
+ bool "Use incoming packet rate rather than incoming byte rate"
+ depends on IP_VS_WLIB
+ ---help---
+ By default the weighted least incoming bandwidth scheduler uses the
+ incoming byte rate in its algorithm. Selecting this option will
+ cause the scheduler to use the incoming packet rate instead.
+
comment 'IPVS application helper'
depends on IP_VS
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/Makefile
linux/net/ipv4/ipvs/Makefile
--- linux-2.6.10-stock/net/ipv4/ipvs/Makefile 2005-01-10 03:58:08.000000000
+0000
+++ linux/net/ipv4/ipvs/Makefile 2005-01-23 22:31:56.000000000 +0000
@@ -29,6 +29,7 @@ obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+obj-$(CONFIG_IP_VS_WLIB) += ip_vs_wlib.o
# IPVS application helpers
obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/ip_vs_wlib.c
linux/net/ipv4/ipvs/ip_vs_wlib.c
--- linux-2.6.10-stock/net/ipv4/ipvs/ip_vs_wlib.c 2005-01-10
03:59:54.000000000 +0000
+++ linux/net/ipv4/ipvs/ip_vs_wlib.c 2005-01-23 23:46:21.000000000 +0000
@@ -0,0 +1,165 @@
+/*
+ * IPVS: Weighted Least Incoming Bandwidth Scheduling module
+ *
+ * Version: ip_vs_wlib.c 1.01 2005/01/23 ccaputo
+ *
+ * Authors: Chris Caputo <ccaputo@xxxxxxx> based on code by:
+ *
+ * Wensong Zhang <wensong@xxxxxxxxxxxxxxxxxxxxxx>
+ * Peter Kese <peter.kese@xxxxxx>
+ * Julian Anastasov <ja@xxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Chris Caputo: Based code on ip_vs_wlc.c ip_vs_rr.c.
+ *
+ */
+
+/*
+ * The WLIB algorithm uses the results of the estimator's inbps (or inpps)
+ * calculations to determine which real server has the lowest incoming
+ * byterate (or packetrate).
+ *
+ * Real server weight is factored into the calculation. An example way to
+ * use this is if you have one server that can handle 100 Mbps of input and
+ * another that can handle 1 Gbps you could set the weights to be 100 and 1000
+ * respectively.
+ *
+ * If CONFIG_IP_VS_WLIB_PPS is set then inpps (packet rate) is used instead of
+ * inbps (byte rate).
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_wlib_init_svc(struct ip_vs_service *svc)
+{
+ svc->sched_data = &svc->destinations;
+ return 0;
+}
+
+
+static int
+ip_vs_wlib_done_svc(struct ip_vs_service *svc)
+{
+ return 0;
+}
+
+
+static int
+ip_vs_wlib_update_svc(struct ip_vs_service *svc)
+{
+ svc->sched_data = &svc->destinations;
+ return 0;
+}
+
+
+/*
+ * Weighted Least Incoming Bandwidth scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlib_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+ struct list_head *p, *q;
+ struct ip_vs_dest *dest, *least = NULL;
+ u32 dr, lr = -1;
+ int dwgt, lwgt = 0;
+
+ IP_VS_DBG(6, "ip_vs_wlib_schedule(): Scheduling...\n");
+
+ /*
+ * We calculate the load of each dest server as follows:
+ * (dest inbps or inpps rate) / dest->weight
+ *
+ * The comparison of dr*lwght < lr*dwght is equivalent to that of
+ * dr/dwght < lr/lwght if every weight is larger than zero.
+ *
+ * A server with weight=0 is quiesced and will not receive any
+ * new connections.
+ *
+ * In case of ties, highest weight is winner. And if that still makes
+ * for a tie, round robin is used (which is why we remember our last
+ * starting location in the linked list).
+ */
+
+ write_lock(&svc->sched_lock);
+ p = (struct list_head *)svc->sched_data;
+ p = p->next;
+ q = p;
+ do {
+ /* skip list head */
+ if (q == &svc->destinations) {
+ q = q->next;
+ continue;
+ }
+
+ dest = list_entry(q, struct ip_vs_dest, n_list);
+ dwgt = atomic_read(&dest->weight);
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && dwgt > 0) {
+ spin_lock(&dest->stats.lock);
+#ifdef CONFIG_IP_VS_WLIB_PPS
+ dr = dest->stats.inpps;
+#else /* CONFIG_IP_VS_WLIB_PPS */
+ dr = dest->stats.inbps;
+#endif /* CONFIG_IP_VS_WLIB_PPS */
+ spin_unlock(&dest->stats.lock);
+
+ if (least == NULL ||
+ (u64)dr * (u64)lwgt < (u64)lr * (u64)dwgt ||
+ (dr == lr && dwgt > lwgt)) {
+ least = dest;
+ lr = dr;
+ lwgt = dwgt;
+ svc->sched_data = q;
+ }
+ }
+ q = q->next;
+ } while (q != p);
+ write_unlock(&svc->sched_lock);
+
+ if (least != NULL)
+ IP_VS_DBG(6, "WLIB: server %u.%u.%u.%u:%u "
+ "activeconns %d refcnt %d weight %d\n",
+ NIPQUAD(least->addr), ntohs(least->port),
+ atomic_read(&least->activeconns),
+ atomic_read(&least->refcnt),
+ atomic_read(&least->weight));
+
+ return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlib_scheduler =
+{
+ .name = "wlib",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .init_service = ip_vs_wlib_init_svc,
+ .done_service = ip_vs_wlib_done_svc,
+ .update_service = ip_vs_wlib_update_svc,
+ .schedule = ip_vs_wlib_schedule,
+};
+
+
+static int __init ip_vs_wlib_init(void)
+{
+ INIT_LIST_HEAD(&ip_vs_wlib_scheduler.n_list);
+ return register_ip_vs_scheduler(&ip_vs_wlib_scheduler);
+}
+
+static void __exit ip_vs_wlib_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_wlib_scheduler);
+}
+
+module_init(ip_vs_wlib_init);
+module_exit(ip_vs_wlib_cleanup);
+MODULE_LICENSE("GPL");
|