diff -u -b -I$Id:.*Exp -r1.1.1.4 -r1.1.1.2.2.3 Index: ipvs/ip_vs.h =================================================================== RCS file: /home/valinux/simon/cvs/ipvs-1.0/ipvs/ip_vs.h,v retrieving revision 1.1.1.6 retrieving revision 1.1.1.2.2.3 diff -u -b -I$Id:.*Exp -r1.1.1.6 -r1.1.1.2.2.3 --- ipvs/ip_vs.h 15 Nov 2002 02:25:39 -0000 1.1.1.6 +++ ipvs/ip_vs.h 15 Nov 2002 04:17:24 -0000 1.1.1.2.2.3 @@ -218,6 +218,131 @@ }; +/* + * IPVS connection entry hash table + */ + +#define VS_STATE_INPUT 0 +#define VS_STATE_OUTPUT 4 +#define VS_STATE_INPUT_ONLY 8 + +/* + * Delta sequence info structure + * Each ip_vs_conn has 2 (output AND input seq. changes). + * Only used in the VS/NAT. + */ +struct ip_vs_seq { + __u32 init_seq; /* Add delta from this seq */ + __u32 delta; /* Delta in sequence numbers */ + __u32 previous_delta; /* Delta in sequence numbers + before last resized pkt */ +}; + + +/* + * IPVS sync connection entry + */ +struct ip_vs_sync_conn { + __u8 reserved; + + /* Protocol, addresses and port numbers */ + __u8 protocol; /* Which protocol (TCP/UDP) */ + __u16 cport; + __u16 vport; + __u16 dport; + __u32 caddr; /* client address */ + __u32 vaddr; /* virtual address */ + __u32 daddr; /* destination address */ + + /* Flags and state transition */ + __u16 flags; /* status flags */ + __u16 state; /* state info */ + + /* The sequence options start here */ +}; + +struct ip_vs_sync_conn_options { + struct ip_vs_seq in_seq; /* incoming seq. struct */ + struct ip_vs_seq out_seq; /* outgoing seq. struct */ +}; + +#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ) +#define IP_VS_SYNC_SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) +#define IP_VS_SYNC_FULL_CONN_SIZE \ +(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) + + +/* + The master mulitcasts messages to the backup load balancers in the + following format. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Count Conns | Reserved | Size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | IPVS Sync Connection (1) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | . | + | . | + | . | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | IPVS Sync Connection (n) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +*/ + +struct ip_vs_sync_mesg { + __u8 nr_conns; + __u8 reserved; + __u16 size; + + /* ip_vs_sync_conn entries start here */ +}; + + +/* Trailing sizeof(struct ip_vs_sync_conn_options) is to allow + * the full connection count to be used by allowing a bit of extra + * space, just in case the last connection is FULL_CONN_SIZE + * instead of SIMPLE_CONN_SIZE */ + +/* At the very least the message needs to hold one message */ +#define IP_VS_SYNC_MESG_MAX_SIZE_MIN \ + (IP_VS_SYNC_SIMPLE_CONN_SIZE + sizeof(struct ip_vs_sync_mesg) \ + + sizeof(struct ip_vs_sync_conn_options)) + +/* At most, 256 messages can be carried. This is because + * the nr_conns element in the ip_vs_sync_mesg structure + * is an unsigned 8bit integer, and thus has a valid range + * of 0 - 255. Beyond that a wraparound will occur. */ +#define IP_VS_SYNC_MESG_MAX_SIZE_MAX \ + (255 * IP_VS_SYNC_SIMPLE_CONN_SIZE + sizeof(struct ip_vs_sync_mesg) \ + + sizeof(struct ip_vs_sync_conn_options)) + +/* By default, send 50 connections per message. This fits niceley + * into a 1500 MTU packet */ +#define IP_VS_SYNC_MESG_MAX_SIZE_DEFAULT \ + (50 * IP_VS_SYNC_SIMPLE_CONN_SIZE + sizeof(struct ip_vs_sync_mesg) \ + + sizeof(struct ip_vs_sync_conn_options)) + +#define IP_VS_SYNC_MESG_MAX_SIZE \ + ((sysctl_ip_vs_sync_msg_max_size > IP_VS_SYNC_MESG_MAX_SIZE_MAX) ? \ + IP_VS_SYNC_MESG_MAX_SIZE_MAX: \ + ((sysctl_ip_vs_sync_msg_max_size < IP_VS_SYNC_MESG_MAX_SIZE_MIN) ? \ + IP_VS_SYNC_MESG_MAX_SIZE_MIN : sysctl_ip_vs_sync_msg_max_size)) + + +#define IP_VS_SYNC_FREQUENCY_DEFAULT 50 +#define IP_VS_SYNC_FREQUENCY \ + ((sysctl_ip_vs_sync_frequency < 1) ? 1 : sysctl_ip_vs_sync_frequency) + +#define IP_VS_SYNC_THRESHOLD_DEFAULT 3 +#define IP_VS_SYNC_THRESHOLD \ + ((sysctl_ip_vs_sync_threshold < 0) ? 0 : \ + (sysctl_ip_vs_sync_threshold >= IP_VS_SYNC_FREQUENCY) ? \ + IP_VS_SYNC_FREQUENCY - 1 : sysctl_ip_vs_sync_threshold) + #ifdef __KERNEL__ #include @@ -316,7 +441,9 @@ NET_IPV4_VS_CACHE_BYPASS=22, NET_IPV4_VS_EXPIRE_NODEST_CONN=23, NET_IPV4_VS_SYNC_THRESHOLD=24, - NET_IPV4_VS_NAT_ICMP_SEND=25, + NET_IPV4_VS_SYNC_FREQUENCY=25, + NET_IPV4_VS_SYNC_MSG_MAX_SIZE=26, + NET_IPV4_VS_NAT_ICMP_SEND=27, NET_IPV4_VS_LAST }; @@ -362,19 +489,6 @@ /* - * Delta sequence info structure - * Each ip_vs_conn has 2 (output AND input seq. changes). - * Only used in the VS/NAT. - */ -struct ip_vs_seq { - __u32 init_seq; /* Add delta from this seq */ - __u32 delta; /* Delta in sequence numbers */ - __u32 previous_delta; /* Delta in sequence numbers - before last resized pkt */ -}; - - -/* * IPVS statistics object */ struct ip_vs_stats @@ -701,6 +815,8 @@ extern int sysctl_ip_vs_cache_bypass; extern int sysctl_ip_vs_expire_nodest_conn; extern int sysctl_ip_vs_sync_threshold; +extern int sysctl_ip_vs_sync_frequency; +extern int sysctl_ip_vs_sync_msg_max_size; extern int sysctl_ip_vs_nat_icmp_send; extern atomic_t ip_vs_dropentry; extern struct ip_vs_stats ip_vs_stats; Index: ipvs/ip_vs_ctl.c =================================================================== RCS file: /home/valinux/simon/cvs/ipvs-1.0/ipvs/ip_vs_ctl.c,v retrieving revision 1.1.1.7 retrieving revision 1.1.1.2.2.3 diff -u -b -I$Id:.*Exp -r1.1.1.7 -r1.1.1.2.2.3 --- ipvs/ip_vs_ctl.c 15 Nov 2002 02:25:39 -0000 1.1.1.7 +++ ipvs/ip_vs_ctl.c 15 Nov 2002 04:17:24 -0000 1.1.1.2.2.3 @@ -78,7 +78,9 @@ static int sysctl_ip_vs_am_droprate = 10; int sysctl_ip_vs_cache_bypass = 0; int sysctl_ip_vs_expire_nodest_conn = 0; -int sysctl_ip_vs_sync_threshold = 3; +int sysctl_ip_vs_sync_threshold = IP_VS_SYNC_THRESHOLD_DEFAULT; +int sysctl_ip_vs_sync_frequency = IP_VS_SYNC_FREQUENCY_DEFAULT; +int sysctl_ip_vs_sync_msg_max_size = IP_VS_SYNC_MESG_MAX_SIZE_DEFAULT; int sysctl_ip_vs_nat_icmp_send = 0; #ifdef CONFIG_IP_VS_DEBUG @@ -1410,6 +1412,12 @@ &proc_dointvec}, {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold", &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_IPV4_VS_SYNC_FREQUENCY, "sync_frequency", + &sysctl_ip_vs_sync_frequency, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_IPV4_VS_SYNC_MSG_MAX_SIZE, "sync_msg_max_size", + &sysctl_ip_vs_sync_msg_max_size, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send", &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL, Index: ipvs/ip_vs_sync.c =================================================================== RCS file: /home/valinux/simon/cvs/ipvs-1.0/ipvs/ip_vs_sync.c,v retrieving revision 1.1.1.5 retrieving revision 1.1.1.2.2.3 diff -u -b -I$Id:.*Exp -r1.1.1.5 -r1.1.1.2.2.3 --- ipvs/ip_vs_sync.c 6 Sep 2002 01:16:26 -0000 1.1.1.5 +++ ipvs/ip_vs_sync.c 15 Nov 2002 04:17:24 -0000 1.1.1.2.2.3 @@ -39,69 +39,6 @@ #define IP_VS_SYNC_PORT 8848 /* multicast port */ -/* - * IPVS sync connection entry - */ -struct ip_vs_sync_conn { - __u8 reserved; - - /* Protocol, addresses and port numbers */ - __u8 protocol; /* Which protocol (TCP/UDP) */ - __u16 cport; - __u16 vport; - __u16 dport; - __u32 caddr; /* client address */ - __u32 vaddr; /* virtual address */ - __u32 daddr; /* destination address */ - - /* Flags and state transition */ - __u16 flags; /* status flags */ - __u16 state; /* state info */ - - /* The sequence options start here */ -}; - -struct ip_vs_sync_conn_options { - struct ip_vs_seq in_seq; /* incoming seq. struct */ - struct ip_vs_seq out_seq; /* outgoing seq. struct */ -}; - -#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ) -#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) -#define FULL_CONN_SIZE \ -(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) - - -/* - The master mulitcasts messages to the backup load balancers in the - following format. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Count Conns | Reserved | Size | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | IPVS Sync Connection (1) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | . | - | . | - | . | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | IPVS Sync Connection (n) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -*/ -#define SYNC_MESG_MAX_SIZE (24*50+4) -struct ip_vs_sync_mesg { - __u8 nr_conns; - __u8 reserved; - __u16 size; - - /* ip_vs_sync_conn entries start here */ -}; - - struct ip_vs_sync_buff { struct list_head list; unsigned long firstuse; @@ -153,14 +90,14 @@ if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - if (!(sb->mesg=kmalloc(SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) { + if (!(sb->mesg=kmalloc(IP_VS_SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) { kfree(sb); return NULL; } sb->mesg->nr_conns = 0; sb->mesg->size = 4; sb->head = (unsigned char *)sb->mesg + 4; - sb->end = (unsigned char *)sb->mesg + SYNC_MESG_MAX_SIZE; + sb->end = (unsigned char *)sb->mesg + IP_VS_SYNC_MESG_MAX_SIZE; sb->firstuse = jiffies; return sb; } @@ -211,8 +148,8 @@ } } - len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : - SIMPLE_CONN_SIZE; + len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? IP_VS_SYNC_FULL_CONN_SIZE : + IP_VS_SYNC_SIMPLE_CONN_SIZE; m = curr_sb->mesg; s = (struct ip_vs_sync_conn *)curr_sb->head; @@ -237,7 +174,7 @@ curr_sb->head += len; /* check if there is a space for next one */ - if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { + if (curr_sb->head+IP_VS_SYNC_FULL_CONN_SIZE > curr_sb->end) { sb_queue_tail(curr_sb); curr_sb = NULL; } @@ -294,9 +231,9 @@ if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) { opt = (struct ip_vs_sync_conn_options *)&s[1]; memcpy(&cp->in_seq, opt, sizeof(*opt)); - p += FULL_CONN_SIZE; + p += IP_VS_SYNC_FULL_CONN_SIZE; } else - p += SIMPLE_CONN_SIZE; + p += IP_VS_SYNC_SIMPLE_CONN_SIZE; atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold); cp->timeout = IP_VS_SYNC_CONN_TIMEOUT; @@ -625,7 +562,7 @@ char *buf; int len; - if (!(buf=kmalloc(SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) { + if (!(buf=kmalloc(IP_VS_SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) { IP_VS_ERR("sync_backup_loop: kmalloc error\n"); return; } @@ -639,7 +576,7 @@ /* do you have data now? */ while (!skb_queue_empty(&(sock->sk->receive_queue))) { if ((len=ip_vs_receive(sock, buf, - SYNC_MESG_MAX_SIZE))<=0) { + IP_VS_SYNC_MESG_MAX_SIZE))<=0) { IP_VS_ERR("receiving message error\n"); break; } Index: ipvsadm.8 =================================================================== RCS file: /home/valinux/simon/cvs/ipvs-1.0/ipvs/ipvsadm/ipvsadm.8,v retrieving revision 1.1.1.6 diff -u -b -I$Id:.*Exp -r1.1.1.6 ipvsadm.8 --- ipvsadm.8 15 Nov 2002 02:22:55 -0000 1.1.1.6 +++ ipvsadm.8 15 Nov 2002 05:37:23 -0000 @@ -5,6 +5,7 @@ .\" .\" Authors: Mike Wangsmo .\" Wensong Zhang +.\" Horms .\" .\" Changes: .\" Horms : Updated to reflect recent change of ipvsadm @@ -14,6 +15,8 @@ .\" Horms : Tidy up some of the description and the .\" grammar in the -f and sysctl sections .\" Wensong Zhang : --set option description taken from ipchains(8) +.\" Horms : Document synchronisation daemon's proc +.\" entries. .\" .\" This program is free software; you can redistribute it and/or modify .\" it under the terms of the GNU General Public License as published by @@ -154,19 +157,43 @@ the current timeout value of the corresponding entry is preserved. .TP .B --start-daemon \fIstate\fP -Start the connection synchronization daemon. The \fIstate\fP is to +Start the connection synchronisation daemon. The \fIstate\fP is to indicate that the daemon is started as \fImaster\fP or \fIbackup\fP. The -connection synchronization daemon is implemented inside the Linux -kernel. The master daemon running at the primary load balancer +connection synchronisation daemon is implemented inside the Linux +kernel. The master daemon running on the primary load balancer multicasts changes of connections periodically, and the backup daemon -running at the backup load balancers receives multicast message and -creates corresponding connections. Then, in case the primary load -balancer fails, a backup load balancer will takeover, and it has state -of almost all connections, so that almost all established connections +running on the backup load balancers receives multicast message and +creates corresponding connections. Then, if the primary load +balancer fails and backup load balancer takes oever, it has the state +of almost all connections. Thus, almost all established connections can continue to access the service. +.sp +There are 3 proc enties that effect the behaviour of the +synchronisation daemon. In the case of each of +these proc entries, values outside of the valid ranges given +will be rounded up or down as neccessary. +.sp +/proc/sys/net/ipv4/vs/sync_msg_max_size sets the maximum size of messages +sent by the synchronisation daemon in bytes. The default is 1228 and the +useful range is 52 through to 6172. +.sp +/proc/sys/net/ipv4/vs/sync_frequency sets syncrhonisation frequency \- +the how often a connection is +synchronised in terms of the number of packets received. The default is 50 +and the useful range is 1 through to 2147483648. +.sp +/proc/sys/net/ipv4/vs/sync_threshold sets the synchronisation threshold \- +the minmum number of packets a connection needs to receive before it will +be synchronised. The default is 3 and the useful range is from 0 up to the +synchronisation frequency. Once this threshold is passed the connection +will be syncronised each time the number of packets, modulus the +synchronisation frequency equals the threshold. For example, using the +default frequency of 50 and the default threshold of 3, +syncronisation will occur on once the 3rd packet is recieved, and every +50th packet from then onwards. .TP .B --stop-daemon -Stop the connection synchronization daemon. +Stop the connection synchronisation daemon. .TP \fB-h, --help\fR Display a description of the command syntax. @@ -462,6 +489,10 @@ .br .I /proc/sys/net/ipv4/vs/secure_tcp .br +.I /proc/sys/net/ipv4/vs/sync_msg_max_size +.br +.I /proc/sys/net/ipv4/vs/sync_threshold +.br .I /proc/sys/net/ipv4/vs/timeout_close .br .I /proc/sys/net/ipv4/vs/timeout_closewait @@ -493,5 +524,5 @@ Peter Kese man page - Mike Wangsmo Wensong Zhang - Horms + Horms .fi