LVS
lvs-users
Google
 
Web LinuxVirtualServer.org

[CFT/RFC/PATCH] backport of the RS threshold limitation patch for 2.4.x

To: "LinuxVirtualServer.org users mailing list." <lvs-users@xxxxxxxxxxxxxxxxxxxxxx>
Subject: [CFT/RFC/PATCH] backport of the RS threshold limitation patch for 2.4.x kernels
Cc: Wensong Zhang <wensong@xxxxxxxxxxxx>
From: Roberto Nibali <ratz@xxxxxxxxxxxx>
Date: Wed, 18 Jun 2003 03:15:16 +0200
Hi,

I'm back again with rather good news :)

I have to disappoint you as the per RS theshold patch is only available for 2.2.x and 2.5.x kernels. I'll have a look on how trivial it is to backport it to 2.4.x. Maybe not today because I have to finish other programming tasks for a project.

I magically lost the sources of the project I had to do, so I was forced to work on a port to 2.4.x :). Attached is the patch against the ipvs-1.0.9 tar-ball. It compiles and the output looks ok but I haven't tested it.

I call for brave testers here or for some good eyes (Horms? Julian? Lars?)

Wensong, I think I will pose you some questions about the current 2.5.x implementation, maybe even some patches. I am not sure about the 1/3 rule :). Otherwise if people test it and like it I suggest we include it. I will definitely hit on it next week when I do another kernel round for our company internal kernels.

To compile you might need to patch your kernel with the usual symbol patches from the tar-ball. Then you patch with my threshold patch and then you make the bloody thing.

A sample test output (didn't even crash the kernel) of my session:

root@laphish /data/lvs/ipvs-1.0.9-ratz # ./ipvsadm -L -n --thresholds
IP Virtual Server version 1.0.16 (size=32768)
Prot LocalAddress:Port            Uthreshold Lthreshold ActiveConn InActConn
  -> RemoteAddress:Port
root@laphish /data/lvs/ipvs-1.0.9-ratz # ./ipvsadm -A -t 10.10.10.10:80
root@laphish /data/lvs/ipvs-1.0.9-ratz # ./ipvsadm -a -t 10.10.10.10:80 -r 10.10.10.1:80 -x 59 -y 234
root@laphish /data/lvs/ipvs-1.0.9-ratz # ./ipvsadm -L -n --thresholds
IP Virtual Server version 1.0.16 (size=32768)
Prot LocalAddress:Port            Uthreshold Lthreshold ActiveConn InActConn
  -> RemoteAddress:Port
TCP  10.10.10.10:80 wlc
  -> 10.10.10.1:80                59         234        0          0
root@laphish /data/lvs/ipvs-1.0.9-ratz # uname -a
Linux laphish.drugphish.ch 2.4.20-gentoo-r5 #2 Tue Jun 10 16:14:44 CEST 2003 i686 Pentium III (Coppermine) GenuineIntel GNU/Linux
root@laphish /data/lvs/ipvs-1.0.9-ratz #

Happiness rules until the stuff breaks the kernel!

Have fun,
Roberto Nibali, ratz
--
echo '[q]sa[ln0=aln256%Pln256/snlbx]sb3135071790101768542287578439snlbxq'|dc
diff -ur ipvs-1.0.9/ipvs/ip_vs.h ipvs-1.0.9-ratz/ipvs/ip_vs.h
--- ipvs-1.0.9/ipvs/ip_vs.h     2003-05-20 18:06:31.000000000 +0200
+++ ipvs-1.0.9-ratz/ipvs/ip_vs.h        2003-06-18 03:12:20.000000000 +0200
@@ -24,6 +24,7 @@
  *      Destination Server Flags
  */
 #define IP_VS_DEST_F_AVAILABLE        0x0001    /* Available tag */
+#define IP_VS_DEST_F_OVERLOAD         0x0002    /* server is overloaded */
 
 /*
  *      IPVS sync daemon states
@@ -113,6 +114,8 @@
        u_int16_t       dport;
        unsigned        conn_flags;     /* destination flags */
        int             weight;         /* destination weight */
+       u_int32_t       u_threshold;    /* upper threshold */
+       u_int32_t       l_threshold;    /* lower threshold */
 };
 
 
@@ -175,6 +178,9 @@
        int             weight;         /* destination weight */
        u_int32_t       activeconns;    /* active connections */
        u_int32_t       inactconns;     /* inactive connections */
+       u_int32_t       u_threshold;    /* upper threshold */
+       u_int32_t       l_threshold;    /* lower threshold */
+
 
        /* statistics */
        struct ip_vs_stats_user stats;
@@ -482,11 +488,15 @@
        unsigned                flags;    /* dest status flags */
        atomic_t                weight;   /* server weight */
        atomic_t                conn_flags;     /* flags to copy to conn */
-       atomic_t                activeconns;    /* active connections */
-       atomic_t                inactconns;     /* inactive connections */
        atomic_t                refcnt;         /* reference counter */
        struct ip_vs_stats      stats;          /* statistics */
 
+       /* connection counters and thresholds */
+       atomic_t                activeconns;    /* active connections */
+       atomic_t                inactconns;     /* inactive connections */
+       __u32                   u_threshold;    /* upper threshold */
+       __u32                   l_threshold;    /* lower threshold */
+
        /* for destination cache */
        spinlock_t              dst_lock;       /* lock dst_cache */
        struct dst_entry        *dst_cache;     /* destination cache entry */
diff -ur ipvs-1.0.9/ipvs/ip_vs_conn.c ipvs-1.0.9-ratz/ipvs/ip_vs_conn.c
--- ipvs-1.0.9/ipvs/ip_vs_conn.c        2003-04-11 16:02:35.000000000 +0200
+++ ipvs-1.0.9-ratz/ipvs/ip_vs_conn.c   2003-06-18 03:10:46.000000000 +0200
@@ -21,6 +21,7 @@
  * and others. Many code here is taken from IP MASQ code of kernel 2.2.
  *
  * Changes:
+ *     Roberto Nibali, ratz: backported per RS threshold limitation from 2.5.x
  *
  */
 
@@ -1048,6 +1049,11 @@
        }
 }
 
+static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
+{
+       return atomic_read(&dest->activeconns)
+               + atomic_read(&dest->inactconns);
+}
 
 /*
  *  Bind a connection entry with a virtual service destination
@@ -1078,6 +1084,9 @@
                  ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
                  cp->flags, atomic_read(&cp->refcnt),
                  atomic_read(&dest->refcnt));
+       if (dest->u_threshold != 0 &&
+           ip_vs_dest_totalconns(dest) >= dest->u_threshold)
+               dest->flags |= IP_VS_DEST_F_OVERLOAD;
 }
 
 
@@ -1114,6 +1123,21 @@
                        }
                }
 
+               if (dest->l_threshold != 0) {
+                       if (ip_vs_dest_totalconns(dest) < dest->l_threshold) {
+                               dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+                       }
+               } else if (dest->u_threshold != 0) {
+                       /* I'm not so sure if this is a good idea. --ratz */
+                       if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold 
* 3) {
+                               dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+                       }
+               } else {
+                       if (dest->flags & IP_VS_DEST_F_OVERLOAD) {
+                               dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+                       }
+               }
+
                /*
                 * Simply decrease the refcnt of the dest, because the
                 * dest will be either in service's destination list
diff -ur ipvs-1.0.9/ipvs/ip_vs_ctl.c ipvs-1.0.9-ratz/ipvs/ip_vs_ctl.c
--- ipvs-1.0.9/ipvs/ip_vs_ctl.c 2002-11-14 11:05:23.000000000 +0100
+++ ipvs-1.0.9-ratz/ipvs/ip_vs_ctl.c    2003-06-18 03:11:03.000000000 +0200
@@ -17,6 +17,7 @@
  *              2 of the License, or (at your option) any later version.
  *
  * Changes:
+ *     Roberto Nibali, ratz: backported per RS threshold limitation from 2.5.x
  *
  */
 
@@ -684,6 +685,12 @@
 
        /* set the dest status flags */
        dest->flags |= IP_VS_DEST_F_AVAILABLE;
+
+       if (ur->u_threshold == 0 || ur->u_threshold > dest->u_threshold) {
+               dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
+       }
+       dest->u_threshold = ur->u_threshold;
+       dest->l_threshold = ur->l_threshold;
 }
 
 
@@ -1880,6 +1887,8 @@
                        entry.port = dest->port;
                        entry.flags = atomic_read(&dest->conn_flags);
                        entry.weight = atomic_read(&dest->weight);
+                       entry.u_threshold = dest->u_threshold;
+                       entry.l_threshold = dest->l_threshold;
                        entry.activeconns = atomic_read(&dest->activeconns);
                        entry.inactconns = atomic_read(&dest->inactconns);
                        __ip_vs_copy_stats(&entry.stats, &dest->stats);
diff -ur ipvs-1.0.9/ipvs/ipvsadm/ipvsadm.c 
ipvs-1.0.9-ratz/ipvs/ipvsadm/ipvsadm.c
--- ipvs-1.0.9/ipvs/ipvsadm/ipvsadm.c   2003-04-11 16:02:38.000000000 +0200
+++ ipvs-1.0.9-ratz/ipvs/ipvsadm/ipvsadm.c      2003-06-18 02:58:41.000000000 
+0200
@@ -52,6 +52,7 @@
  *        Horms               :   added -v option
  *        Wensong Zhang       :   rewrite most code of parsing options and
  *                                processing options.
+ *        Roberto Nibali, ratz:   Added support for threshold limitation
  *
  *
  *      ippfvsadm - Port Fowarding & Virtual Server ADMinistration program
@@ -168,7 +169,10 @@
 #define OPT_STATS      0x01000
 #define OPT_RATE       0x02000
 #define OPT_SORT       0x04000
-#define NUMBER_OF_OPT  15
+#define OPT_UTHRESHOLD  0x08000
+#define OPT_LTHRESHOLD  0x10000
+#define OPT_THRESHOLDS  0x20000
+#define NUMBER_OF_OPT  18
 
 static const char* optnames[] = {
        "numeric",
@@ -186,6 +190,9 @@
        "stats",
        "rate",
        "sort",
+       "u-threshold",
+       "l-threshold",
+       "thresholds",
 };
 
 /*
@@ -198,22 +205,22 @@
  */
 static const char commands_v_options[NUMBER_OF_CMD][NUMBER_OF_OPT] =
 {
-       /*     -n   -c   svc  -s   -p   -M   -r   fwd  -w   -mc  -to  dmn  -st  
-rt  srt */
-/*INSERT*/    {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'x', 'x', 
'x', 'x', 'x'},
-/*ADD*/       {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*EDIT*/      {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*DEL*/       {'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*FLUSH*/     {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*LIST*/      {' ', '1', '1', 'x', 'x', 'x', 'x', 'x', 'x', 'x', '1', '1', ' 
', ' ', ' '},
-/*ADD-SERVER*/{'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*DEL-SERVER*/{'x', 'x', '+', 'x', 'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*EDIT-SRV*/  {'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*TIMEOUT*/   {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*START-D*/   {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 
'x', 'x', 'x'},
-/*STOP-D*/    {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*RESTORE*/   {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*SAVE*/      {' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
-/*ZERO*/      {'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x'},
+       /*     -n   -c   svc  -s   -p   -M   -r   fwd  -w   -mc  -to  dmn  -st  
-rt  srt  -x   -y   thr */
+/*INSERT*/    {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*ADD*/       {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*EDIT*/      {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*DEL*/       {'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*FLUSH*/     {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*LIST*/      {' ', '1', '1', 'x', 'x', 'x', 'x', 'x', 'x', 'x', '1', '1', ' 
', ' ', ' ', 'x', 'x', ' '},
+/*ADD-SERVER*/{'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', 'x', 'x', 'x', 
'x', 'x', 'x', ' ', ' ', 'x'},
+/*DEL-SERVER*/{'x', 'x', '+', 'x', 'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*EDIT-SRV*/  {'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', 'x', 'x', 'x', 
'x', 'x', 'x', ' ', ' ', 'x'},
+/*TIMEOUT*/   {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*START-D*/   {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*STOP-D*/    {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*RESTORE*/   {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*SAVE*/      {' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
+/*ZERO*/      {'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 
'x', 'x', 'x', 'x', 'x', 'x'},
 };
 
 /* printing format flags */
@@ -223,6 +230,7 @@
 #define FMT_STATS      0x0004
 #define FMT_RATE       0x0008
 #define FMT_SORT       0x0010
+#define FMT_THRESHOLDS 0x0020
 
 #define SERVICE_NONE    0x0
 #define SERVICE_ADDR    0x1
@@ -355,6 +363,9 @@
                {"stats", '\0', POPT_ARG_NONE, NULL, '7'},
                {"rate", '\0', POPT_ARG_NONE, NULL, '8'},
                {"sort", '\0', POPT_ARG_NONE, NULL, '9'},
+               {"u-threshold", 'x', POPT_ARG_STRING, &optarg, 'x'},
+               {"l-threshold", 'y', POPT_ARG_STRING, &optarg, 'y'},
+               {"thresholds", '\0', POPT_ARG_NONE, NULL, '0'},
                {NULL, 0, 0, NULL, 0}
        };
 
@@ -493,6 +504,18 @@
                             string_to_number(optarg, 0, 65535)) == -1)
                                fail(2, "illegal weight specified");
                        break;
+               case 'x':
+                       set_option(options, OPT_UTHRESHOLD);
+                       if ((ur->u_threshold =
+                           string_to_number(optarg, 0, 65535)) == -1)
+                               fail(2, "illegal u_threshold specified");
+                       break;
+               case 'y':
+                       set_option(options, OPT_LTHRESHOLD);
+                       if ((ur->l_threshold =
+                           string_to_number(optarg, 0, 65535)) == -1)
+                               fail(2, "illegal l_threshold specified");
+                       break;
                case 'c':
                        set_option(options, OPT_CONNECTION);
                        break;
@@ -522,6 +545,10 @@
                        set_option(options, OPT_SORT);
                        *format |= FMT_SORT;
                        break;
+               case '0':
+                       set_option(options, OPT_THRESHOLDS);
+                       *format |= FMT_THRESHOLDS;
+                       break;
                default:
                        fail(2, "invalid option");
                }
@@ -604,6 +631,9 @@
                {"stats", 0, 0, '7'},
                {"rate", 0, 0, '8'},
                {"sort", 0, 0, '9'},
+               {"u-threshold", 1, 0, 'x'},
+               {"l-threshold", 1, 0, 'y'},
+               {"thresholds", 0, 0, '0'},
                {"help", 0, 0, 'h'},
                {0, 0, 0, 0}
        };
@@ -765,6 +795,18 @@
                             string_to_number(optarg,0,65535)) == -1)
                                fail(2, "illegal weight specified");
                        break;
+               case 'x':
+                       set_option(options, OPT_UTHRESHOLD);
+                       if ((ur->u_threshold =
+                           string_to_number(optarg, 0, 65535)) == -1)
+                               fail(2, "illegal u_threshold specified");
+                       break;
+               case 'y':
+                       set_option(options, OPT_LTHRESHOLD);
+                       if ((ur->l_threshold =
+                           string_to_number(optarg, 0, 65535)) == -1)
+                               fail(2, "illegal l_threshold specified");
+                       break;
                case 'c':
                        set_option(options, OPT_CONNECTION);
                        break;
@@ -794,6 +836,10 @@
                        set_option(options, OPT_SORT);
                        *format |= FMT_SORT;
                        break;
+               case '0':
+                       set_option(options, OPT_THRESHOLDS);
+                       *format |= FMT_THRESHOLDS;
+                       break;
                default:
                        fail(2, "invalid option");
                }
@@ -853,7 +899,8 @@
                if ((options & OPT_CONNECTION
                     || options & OPT_TIMEOUT
                     || options & OPT_DAEMON)
-                   && (options & OPT_STATS || options & OPT_RATE))
+                   && (options & OPT_STATS || options & OPT_RATE
+                    || options & OPT_THRESHOLDS))
                        fail(2, "options conflicts in the list command");
 
                if (options & OPT_CONNECTION)
@@ -1120,7 +1167,7 @@
                "  %s -C\n"
                "  %s -R\n"
                "  %s -S [-n]\n"
-               "  %s -a|e -t|u|f service-address -r server-address [-g|i|m] 
[-w weight]\n"
+               "  %s -a|e -t|u|f service-address -r server-address [Options]\n"
                "  %s -d -t|u|f service-address -r server-address\n"
                "  %s -L|l [options]\n"
                "  %s -Z [-t|u|f service-address]\n"
@@ -1166,12 +1213,15 @@
                "  --ipip         -i                   ipip encapsulation 
(tunneling)\n"
                "  --masquerading -m                   masquerading (NAT)\n"
                "  --weight       -w weight            capacity of real 
server\n"
+               "  --u-threshold  -x uthreshold        upper threshold of 
connections\n"
+               "  --l-threshold  -y lthreshold        lower threshold of 
connections\n"
                "  --mcast-interface interface         multicast interface for 
connection sync\n"
                "  --connection   -c                   output of current IPVS 
connections\n"
                "  --timeout                           output of timeout (tcp 
tcpfin udp)\n"
                "  --daemon                            output of daemon 
information\n"
                "  --stats                             output of statistics 
information\n"
                "  --rate                              output of rate 
information\n"
+               "  --thresholds                        output of thresholds 
information\n"
                "  --sort                              sorting output of 
service/server entries\n"
                "  --numeric      -n                   numeric output of 
addresses and ports\n",
                DEF_SCHED);
@@ -1411,6 +1461,12 @@
                       "  -> RemoteAddress:Port\n",
                       "Prot LocalAddress:Port",
                       "CPS", "InPPS", "OutPPS", "InBPS", "OutBPS");
+       else if (format & FMT_THRESHOLDS)
+               printf("%-33s %-10s %-10s %-10s %-10s\n"
+                       "  -> RemoteAddress:Port\n",
+                       "Prot LocalAddress:Port",
+                       "Uthreshold", "Lthreshold", "ActiveConn", "InActConn");
+
        else if (!(format & FMT_RULE))
                printf("Prot LocalAddress:Port Scheduler Flags\n"
                       "  -> RemoteAddress:Port           Forward Weight 
ActiveConn InActConn\n");
@@ -1527,6 +1583,11 @@
                        print_largenum(e->stats.inbps);
                        print_largenum(e->stats.outbps);
                        printf("\n");
+               } else if (format & FMT_THRESHOLDS) {
+                       printf("  -> %-28s %-10u %-10u %-10u %-10u\n", dname,
+                               e->u_threshold, e->l_threshold,
+                               e->activeconns, e->inactconns);
+
                } else
                        printf("  -> %-28s %-7s %-6d %-10u %-10u\n",
                               dname, fwd_name(e->flags),
<Prev in Thread] Current Thread [Next in Thread>