Hi,
I'm back again with rather good news :)
I have to disappoint you as the per RS theshold patch is only available
for 2.2.x and 2.5.x kernels. I'll have a look on how trivial it is to
backport it to 2.4.x. Maybe not today because I have to finish other
programming tasks for a project.
I magically lost the sources of the project I had to do, so I was forced
to work on a port to 2.4.x :). Attached is the patch against the
ipvs-1.0.9 tar-ball. It compiles and the output looks ok but I haven't
tested it.
I call for brave testers here or for some good eyes (Horms? Julian? Lars?)
Wensong, I think I will pose you some questions about the current 2.5.x
implementation, maybe even some patches. I am not sure about the 1/3
rule :). Otherwise if people test it and like it I suggest we include
it. I will definitely hit on it next week when I do another kernel round
for our company internal kernels.
To compile you might need to patch your kernel with the usual symbol
patches from the tar-ball. Then you patch with my threshold patch and
then you make the bloody thing.
A sample test output (didn't even crash the kernel) of my session:
root@laphish /data/lvs/ipvs-1.0.9-ratz # ./ipvsadm -L -n --thresholds
IP Virtual Server version 1.0.16 (size=32768)
Prot LocalAddress:Port Uthreshold Lthreshold ActiveConn InActConn
-> RemoteAddress:Port
root@laphish /data/lvs/ipvs-1.0.9-ratz # ./ipvsadm -A -t 10.10.10.10:80
root@laphish /data/lvs/ipvs-1.0.9-ratz # ./ipvsadm -a -t 10.10.10.10:80
-r 10.10.10.1:80 -x 59 -y 234
root@laphish /data/lvs/ipvs-1.0.9-ratz # ./ipvsadm -L -n --thresholds
IP Virtual Server version 1.0.16 (size=32768)
Prot LocalAddress:Port Uthreshold Lthreshold ActiveConn InActConn
-> RemoteAddress:Port
TCP 10.10.10.10:80 wlc
-> 10.10.10.1:80 59 234 0 0
root@laphish /data/lvs/ipvs-1.0.9-ratz # uname -a
Linux laphish.drugphish.ch 2.4.20-gentoo-r5 #2 Tue Jun 10 16:14:44 CEST
2003 i686 Pentium III (Coppermine) GenuineIntel GNU/Linux
root@laphish /data/lvs/ipvs-1.0.9-ratz #
Happiness rules until the stuff breaks the kernel!
Have fun,
Roberto Nibali, ratz
--
echo '[q]sa[ln0=aln256%Pln256/snlbx]sb3135071790101768542287578439snlbxq'|dc
diff -ur ipvs-1.0.9/ipvs/ip_vs.h ipvs-1.0.9-ratz/ipvs/ip_vs.h
--- ipvs-1.0.9/ipvs/ip_vs.h 2003-05-20 18:06:31.000000000 +0200
+++ ipvs-1.0.9-ratz/ipvs/ip_vs.h 2003-06-18 03:12:20.000000000 +0200
@@ -24,6 +24,7 @@
* Destination Server Flags
*/
#define IP_VS_DEST_F_AVAILABLE 0x0001 /* Available tag */
+#define IP_VS_DEST_F_OVERLOAD 0x0002 /* server is overloaded */
/*
* IPVS sync daemon states
@@ -113,6 +114,8 @@
u_int16_t dport;
unsigned conn_flags; /* destination flags */
int weight; /* destination weight */
+ u_int32_t u_threshold; /* upper threshold */
+ u_int32_t l_threshold; /* lower threshold */
};
@@ -175,6 +178,9 @@
int weight; /* destination weight */
u_int32_t activeconns; /* active connections */
u_int32_t inactconns; /* inactive connections */
+ u_int32_t u_threshold; /* upper threshold */
+ u_int32_t l_threshold; /* lower threshold */
+
/* statistics */
struct ip_vs_stats_user stats;
@@ -482,11 +488,15 @@
unsigned flags; /* dest status flags */
atomic_t weight; /* server weight */
atomic_t conn_flags; /* flags to copy to conn */
- atomic_t activeconns; /* active connections */
- atomic_t inactconns; /* inactive connections */
atomic_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */
+ /* connection counters and thresholds */
+ atomic_t activeconns; /* active connections */
+ atomic_t inactconns; /* inactive connections */
+ __u32 u_threshold; /* upper threshold */
+ __u32 l_threshold; /* lower threshold */
+
/* for destination cache */
spinlock_t dst_lock; /* lock dst_cache */
struct dst_entry *dst_cache; /* destination cache entry */
diff -ur ipvs-1.0.9/ipvs/ip_vs_conn.c ipvs-1.0.9-ratz/ipvs/ip_vs_conn.c
--- ipvs-1.0.9/ipvs/ip_vs_conn.c 2003-04-11 16:02:35.000000000 +0200
+++ ipvs-1.0.9-ratz/ipvs/ip_vs_conn.c 2003-06-18 03:10:46.000000000 +0200
@@ -21,6 +21,7 @@
* and others. Many code here is taken from IP MASQ code of kernel 2.2.
*
* Changes:
+ * Roberto Nibali, ratz: backported per RS threshold limitation from 2.5.x
*
*/
@@ -1048,6 +1049,11 @@
}
}
+static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
+{
+ return atomic_read(&dest->activeconns)
+ + atomic_read(&dest->inactconns);
+}
/*
* Bind a connection entry with a virtual service destination
@@ -1078,6 +1084,9 @@
ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
+ if (dest->u_threshold != 0 &&
+ ip_vs_dest_totalconns(dest) >= dest->u_threshold)
+ dest->flags |= IP_VS_DEST_F_OVERLOAD;
}
@@ -1114,6 +1123,21 @@
}
}
+ if (dest->l_threshold != 0) {
+ if (ip_vs_dest_totalconns(dest) < dest->l_threshold) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ } else if (dest->u_threshold != 0) {
+ /* I'm not so sure if this is a good idea. --ratz */
+ if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold
* 3) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ } else {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ }
+
/*
* Simply decrease the refcnt of the dest, because the
* dest will be either in service's destination list
diff -ur ipvs-1.0.9/ipvs/ip_vs_ctl.c ipvs-1.0.9-ratz/ipvs/ip_vs_ctl.c
--- ipvs-1.0.9/ipvs/ip_vs_ctl.c 2002-11-14 11:05:23.000000000 +0100
+++ ipvs-1.0.9-ratz/ipvs/ip_vs_ctl.c 2003-06-18 03:11:03.000000000 +0200
@@ -17,6 +17,7 @@
* 2 of the License, or (at your option) any later version.
*
* Changes:
+ * Roberto Nibali, ratz: backported per RS threshold limitation from 2.5.x
*
*/
@@ -684,6 +685,12 @@
/* set the dest status flags */
dest->flags |= IP_VS_DEST_F_AVAILABLE;
+
+ if (ur->u_threshold == 0 || ur->u_threshold > dest->u_threshold) {
+ dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+ }
+ dest->u_threshold = ur->u_threshold;
+ dest->l_threshold = ur->l_threshold;
}
@@ -1880,6 +1887,8 @@
entry.port = dest->port;
entry.flags = atomic_read(&dest->conn_flags);
entry.weight = atomic_read(&dest->weight);
+ entry.u_threshold = dest->u_threshold;
+ entry.l_threshold = dest->l_threshold;
entry.activeconns = atomic_read(&dest->activeconns);
entry.inactconns = atomic_read(&dest->inactconns);
__ip_vs_copy_stats(&entry.stats, &dest->stats);
diff -ur ipvs-1.0.9/ipvs/ipvsadm/ipvsadm.c
ipvs-1.0.9-ratz/ipvs/ipvsadm/ipvsadm.c
--- ipvs-1.0.9/ipvs/ipvsadm/ipvsadm.c 2003-04-11 16:02:38.000000000 +0200
+++ ipvs-1.0.9-ratz/ipvs/ipvsadm/ipvsadm.c 2003-06-18 02:58:41.000000000
+0200
@@ -52,6 +52,7 @@
* Horms : added -v option
* Wensong Zhang : rewrite most code of parsing options and
* processing options.
+ * Roberto Nibali, ratz: Added support for threshold limitation
*
*
* ippfvsadm - Port Fowarding & Virtual Server ADMinistration program
@@ -168,7 +169,10 @@
#define OPT_STATS 0x01000
#define OPT_RATE 0x02000
#define OPT_SORT 0x04000
-#define NUMBER_OF_OPT 15
+#define OPT_UTHRESHOLD 0x08000
+#define OPT_LTHRESHOLD 0x10000
+#define OPT_THRESHOLDS 0x20000
+#define NUMBER_OF_OPT 18
static const char* optnames[] = {
"numeric",
@@ -186,6 +190,9 @@
"stats",
"rate",
"sort",
+ "u-threshold",
+ "l-threshold",
+ "thresholds",
};
/*
@@ -198,22 +205,22 @@
*/
static const char commands_v_options[NUMBER_OF_CMD][NUMBER_OF_OPT] =
{
- /* -n -c svc -s -p -M -r fwd -w -mc -to dmn -st
-rt srt */
-/*INSERT*/ {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'x', 'x',
'x', 'x', 'x'},
-/*ADD*/ {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*EDIT*/ {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*DEL*/ {'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*FLUSH*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*LIST*/ {' ', '1', '1', 'x', 'x', 'x', 'x', 'x', 'x', 'x', '1', '1', '
', ' ', ' '},
-/*ADD-SERVER*/{'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*DEL-SERVER*/{'x', 'x', '+', 'x', 'x', 'x', '+', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*EDIT-SRV*/ {'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*TIMEOUT*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*START-D*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x',
'x', 'x', 'x'},
-/*STOP-D*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*RESTORE*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*SAVE*/ {' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
-/*ZERO*/ {'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x'},
+ /* -n -c svc -s -p -M -r fwd -w -mc -to dmn -st
-rt srt -x -y thr */
+/*INSERT*/ {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*ADD*/ {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*EDIT*/ {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*DEL*/ {'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*FLUSH*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*LIST*/ {' ', '1', '1', 'x', 'x', 'x', 'x', 'x', 'x', 'x', '1', '1', '
', ' ', ' ', 'x', 'x', ' '},
+/*ADD-SERVER*/{'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', 'x', 'x', 'x',
'x', 'x', 'x', ' ', ' ', 'x'},
+/*DEL-SERVER*/{'x', 'x', '+', 'x', 'x', 'x', '+', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*EDIT-SRV*/ {'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', 'x', 'x', 'x',
'x', 'x', 'x', ' ', ' ', 'x'},
+/*TIMEOUT*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*START-D*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*STOP-D*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*RESTORE*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*SAVE*/ {' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
+/*ZERO*/ {'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', 'x', 'x', 'x'},
};
/* printing format flags */
@@ -223,6 +230,7 @@
#define FMT_STATS 0x0004
#define FMT_RATE 0x0008
#define FMT_SORT 0x0010
+#define FMT_THRESHOLDS 0x0020
#define SERVICE_NONE 0x0
#define SERVICE_ADDR 0x1
@@ -355,6 +363,9 @@
{"stats", '\0', POPT_ARG_NONE, NULL, '7'},
{"rate", '\0', POPT_ARG_NONE, NULL, '8'},
{"sort", '\0', POPT_ARG_NONE, NULL, '9'},
+ {"u-threshold", 'x', POPT_ARG_STRING, &optarg, 'x'},
+ {"l-threshold", 'y', POPT_ARG_STRING, &optarg, 'y'},
+ {"thresholds", '\0', POPT_ARG_NONE, NULL, '0'},
{NULL, 0, 0, NULL, 0}
};
@@ -493,6 +504,18 @@
string_to_number(optarg, 0, 65535)) == -1)
fail(2, "illegal weight specified");
break;
+ case 'x':
+ set_option(options, OPT_UTHRESHOLD);
+ if ((ur->u_threshold =
+ string_to_number(optarg, 0, 65535)) == -1)
+ fail(2, "illegal u_threshold specified");
+ break;
+ case 'y':
+ set_option(options, OPT_LTHRESHOLD);
+ if ((ur->l_threshold =
+ string_to_number(optarg, 0, 65535)) == -1)
+ fail(2, "illegal l_threshold specified");
+ break;
case 'c':
set_option(options, OPT_CONNECTION);
break;
@@ -522,6 +545,10 @@
set_option(options, OPT_SORT);
*format |= FMT_SORT;
break;
+ case '0':
+ set_option(options, OPT_THRESHOLDS);
+ *format |= FMT_THRESHOLDS;
+ break;
default:
fail(2, "invalid option");
}
@@ -604,6 +631,9 @@
{"stats", 0, 0, '7'},
{"rate", 0, 0, '8'},
{"sort", 0, 0, '9'},
+ {"u-threshold", 1, 0, 'x'},
+ {"l-threshold", 1, 0, 'y'},
+ {"thresholds", 0, 0, '0'},
{"help", 0, 0, 'h'},
{0, 0, 0, 0}
};
@@ -765,6 +795,18 @@
string_to_number(optarg,0,65535)) == -1)
fail(2, "illegal weight specified");
break;
+ case 'x':
+ set_option(options, OPT_UTHRESHOLD);
+ if ((ur->u_threshold =
+ string_to_number(optarg, 0, 65535)) == -1)
+ fail(2, "illegal u_threshold specified");
+ break;
+ case 'y':
+ set_option(options, OPT_LTHRESHOLD);
+ if ((ur->l_threshold =
+ string_to_number(optarg, 0, 65535)) == -1)
+ fail(2, "illegal l_threshold specified");
+ break;
case 'c':
set_option(options, OPT_CONNECTION);
break;
@@ -794,6 +836,10 @@
set_option(options, OPT_SORT);
*format |= FMT_SORT;
break;
+ case '0':
+ set_option(options, OPT_THRESHOLDS);
+ *format |= FMT_THRESHOLDS;
+ break;
default:
fail(2, "invalid option");
}
@@ -853,7 +899,8 @@
if ((options & OPT_CONNECTION
|| options & OPT_TIMEOUT
|| options & OPT_DAEMON)
- && (options & OPT_STATS || options & OPT_RATE))
+ && (options & OPT_STATS || options & OPT_RATE
+ || options & OPT_THRESHOLDS))
fail(2, "options conflicts in the list command");
if (options & OPT_CONNECTION)
@@ -1120,7 +1167,7 @@
" %s -C\n"
" %s -R\n"
" %s -S [-n]\n"
- " %s -a|e -t|u|f service-address -r server-address [-g|i|m]
[-w weight]\n"
+ " %s -a|e -t|u|f service-address -r server-address [Options]\n"
" %s -d -t|u|f service-address -r server-address\n"
" %s -L|l [options]\n"
" %s -Z [-t|u|f service-address]\n"
@@ -1166,12 +1213,15 @@
" --ipip -i ipip encapsulation
(tunneling)\n"
" --masquerading -m masquerading (NAT)\n"
" --weight -w weight capacity of real
server\n"
+ " --u-threshold -x uthreshold upper threshold of
connections\n"
+ " --l-threshold -y lthreshold lower threshold of
connections\n"
" --mcast-interface interface multicast interface for
connection sync\n"
" --connection -c output of current IPVS
connections\n"
" --timeout output of timeout (tcp
tcpfin udp)\n"
" --daemon output of daemon
information\n"
" --stats output of statistics
information\n"
" --rate output of rate
information\n"
+ " --thresholds output of thresholds
information\n"
" --sort sorting output of
service/server entries\n"
" --numeric -n numeric output of
addresses and ports\n",
DEF_SCHED);
@@ -1411,6 +1461,12 @@
" -> RemoteAddress:Port\n",
"Prot LocalAddress:Port",
"CPS", "InPPS", "OutPPS", "InBPS", "OutBPS");
+ else if (format & FMT_THRESHOLDS)
+ printf("%-33s %-10s %-10s %-10s %-10s\n"
+ " -> RemoteAddress:Port\n",
+ "Prot LocalAddress:Port",
+ "Uthreshold", "Lthreshold", "ActiveConn", "InActConn");
+
else if (!(format & FMT_RULE))
printf("Prot LocalAddress:Port Scheduler Flags\n"
" -> RemoteAddress:Port Forward Weight
ActiveConn InActConn\n");
@@ -1527,6 +1583,11 @@
print_largenum(e->stats.inbps);
print_largenum(e->stats.outbps);
printf("\n");
+ } else if (format & FMT_THRESHOLDS) {
+ printf(" -> %-28s %-10u %-10u %-10u %-10u\n", dname,
+ e->u_threshold, e->l_threshold,
+ e->activeconns, e->inactconns);
+
} else
printf(" -> %-28s %-7s %-6d %-10u %-10u\n",
dname, fwd_name(e->flags),
|