LVS
lvs-devel
Google
 
Web LinuxVirtualServer.org

Re: [PATCH] IPVS: Allow boot time change of hash size.

To: "Joseph Mack NA3T" <jmack@xxxxxxxx>
Subject: Re: [PATCH] IPVS: Allow boot time change of hash size.
Cc: netdev@xxxxxxxxxxxxxxx, lvs-devel@xxxxxxxxxxxxxxx
From: "Catalin(ux) M. BOIE" <catab@xxxxxxxxxxxxx>
Date: Wed, 26 Nov 2008 23:58:09 -0700 (MST)
Hello!

> On Wed, 26 Nov 2008, Catalin(ux) M. BOIE wrote:
>
>> I was very frustrated about the fact that I have to recompile the kernel
>> to change the hash size. So, I created this patch.
>
> thanks for sending us the code.
>
> Why do you need to change the hash size? We really don't
> recommend anyone do this under normal circumstances

As it is written in the help, to lower the collisions in the case of a lot
of concurrent connections.
Or am I missing something?

> Thanks Joe
>
>> If IPVS is built-in you can append ip_vs.conn_tab_bits=?? to kernel
>> command line, or, if you built IPVS as modules, you can add
>> options ip_vs conn_tab_bits=??.
>> To keep everything backward compatible, you still can select the size at
>> compile time, and that will be used as default.
>>
>> Signed-off-by: Catalin(ux) M. BOIE <catab@xxxxxxxxxxxxx>
>> ---
>> include/net/ip_vs.h             |   16 ++++----------
>> net/netfilter/ipvs/Kconfig      |    4 +++
>> net/netfilter/ipvs/ip_vs_conn.c |   41
>> ++++++++++++++++++++++++++++----------
>> net/netfilter/ipvs/ip_vs_ctl.c  |    8 +++---
>> 4 files changed, 43 insertions(+), 26 deletions(-)
>>
>> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
>> index fe9fcf7..5a788a4 100644
>> --- a/include/net/ip_vs.h
>> +++ b/include/net/ip_vs.h
>> @@ -26,6 +26,11 @@
>> #include <linux/ipv6.h>                      /* for struct ipv6hdr */
>> #include <net/ipv6.h>                        /* for ipv6_addr_copy */
>>
>> +
>> +/* Connections' size value needed by ip_vs_ctl.c */
>> +extern int ip_vs_conn_tab_size;
>> +
>> +
>> struct ip_vs_iphdr {
>>      int len;
>>      __u8 protocol;
>> @@ -599,17 +604,6 @@ extern void ip_vs_init_hash_table(struct list_head
>> *table, int rows);
>>  *     (from ip_vs_conn.c)
>>  */
>>
>> -/*
>> - *     IPVS connection entry hash table
>> - */
>> -#ifndef CONFIG_IP_VS_TAB_BITS
>> -#define CONFIG_IP_VS_TAB_BITS   12
>> -#endif
>> -
>> -#define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS
>> -#define IP_VS_CONN_TAB_SIZE     (1 << IP_VS_CONN_TAB_BITS)
>> -#define IP_VS_CONN_TAB_MASK     (IP_VS_CONN_TAB_SIZE - 1)
>> -
>> enum {
>>      IP_VS_DIR_INPUT = 0,
>>      IP_VS_DIR_OUTPUT,
>> diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
>> index 79a6980..c71e543 100644
>> --- a/net/netfilter/ipvs/Kconfig
>> +++ b/net/netfilter/ipvs/Kconfig
>> @@ -68,6 +68,10 @@ config    IP_VS_TAB_BITS
>>        each hash entry uses 8 bytes, so you can estimate how much memory is
>>        needed for your box.
>>
>> +      You can overwrite this number setting conn_tab_bits module parameter
>> +      or by appending ip_vs.conn_tab_bits=? to the kernel command line
>> +      if IP VS was compiled built-in.
>> +
>> comment "IPVS transport protocol load balancing support"
>>
>> config       IP_VS_PROTO_TCP
>> diff --git a/net/netfilter/ipvs/ip_vs_conn.c
>> b/net/netfilter/ipvs/ip_vs_conn.c
>> index 9a24332..b1462f1 100644
>> --- a/net/netfilter/ipvs/ip_vs_conn.c
>> +++ b/net/netfilter/ipvs/ip_vs_conn.c
>> @@ -37,6 +37,21 @@
>> #include <net/ip_vs.h>
>>
>>
>> +#ifndef CONFIG_IP_VS_TAB_BITS
>> +#define CONFIG_IP_VS_TAB_BITS       12
>> +#endif
>> +
>> +/*
>> + * Connection hash size. Default is what was selected at compile time.
>> +*/
>> +int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
>> +module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
>> +MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
>> +
>> +/* size and mask values */
>> +int ip_vs_conn_tab_size;
>> +int ip_vs_conn_tab_mask;
>> +
>> /*
>>  *  Connection hash table: for input and output packets lookups of IPVS
>>  */
>> @@ -122,11 +137,11 @@ static unsigned int ip_vs_conn_hashkey(int af,
>> unsigned proto,
>>      if (af == AF_INET6)
>>              return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
>>                                  (__force u32)port, proto, ip_vs_conn_rnd)
>> -                    & IP_VS_CONN_TAB_MASK;
>> +                    & ip_vs_conn_tab_mask;
>> #endif
>>      return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
>>                          ip_vs_conn_rnd)
>> -            & IP_VS_CONN_TAB_MASK;
>> +            & ip_vs_conn_tab_mask;
>> }
>>
>>
>> @@ -752,7 +767,7 @@ static void *ip_vs_conn_array(struct seq_file *seq,
>> loff_t pos)
>>      int idx;
>>      struct ip_vs_conn *cp;
>>
>> -    for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
>> +    for(idx = 0; idx < ip_vs_conn_tab_size; idx++) {
>>              ct_read_lock_bh(idx);
>>              list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
>>                      if (pos-- == 0) {
>> @@ -789,7 +804,7 @@ static void *ip_vs_conn_seq_next(struct seq_file
>> *seq, void *v, loff_t *pos)
>>      idx = l - ip_vs_conn_tab;
>>      ct_read_unlock_bh(idx);
>>
>> -    while (++idx < IP_VS_CONN_TAB_SIZE) {
>> +    while (++idx < ip_vs_conn_tab_size) {
>>              ct_read_lock_bh(idx);
>>              list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
>>                      seq->private = &ip_vs_conn_tab[idx];
>> @@ -972,8 +987,8 @@ void ip_vs_random_dropentry(void)
>>      /*
>>       * Randomly scan 1/32 of the whole table every second
>>       */
>> -    for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
>> -            unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
>> +    for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
>> +            unsigned hash = net_random() & ip_vs_conn_tab_mask;
>>
>>              /*
>>               *  Lock is actually needed in this loop.
>> @@ -1025,7 +1040,7 @@ static void ip_vs_conn_flush(void)
>>      struct ip_vs_conn *cp;
>>
>>   flush_again:
>> -    for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
>> +    for (idx=0; idx<ip_vs_conn_tab_size; idx++) {
>>              /*
>>               *  Lock is actually needed in this loop.
>>               */
>> @@ -1056,10 +1071,14 @@ int __init ip_vs_conn_init(void)
>> {
>>      int idx;
>>
>> +    /* Compute size and mask */
>> +    ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
>> +    ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
>> +
>>      /*
>>       * Allocate the connection hash table and initialize its list heads
>>       */
>> -    ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct
>> list_head));
>> +    ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * sizeof(struct
>> list_head));
>>      if (!ip_vs_conn_tab)
>>              return -ENOMEM;
>>
>> @@ -1074,12 +1093,12 @@ int __init ip_vs_conn_init(void)
>>
>>      IP_VS_INFO("Connection hash table configured "
>>                 "(size=%d, memory=%ldKbytes)\n",
>> -               IP_VS_CONN_TAB_SIZE,
>> -               (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
>> +               ip_vs_conn_tab_size,
>> +               (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
>>      IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
>>                sizeof(struct ip_vs_conn));
>>
>> -    for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
>> +    for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
>>              INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
>>      }
>>
>> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c
>> b/net/netfilter/ipvs/ip_vs_ctl.c
>> index 0302cf3..6dcadc2 100644
>> --- a/net/netfilter/ipvs/ip_vs_ctl.c
>> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
>> @@ -1854,7 +1854,7 @@ static int ip_vs_info_seq_show(struct seq_file
>> *seq, void *v)
>>      if (v == SEQ_START_TOKEN) {
>>              seq_printf(seq,
>>                      "IP Virtual Server version %d.%d.%d (size=%d)\n",
>> -                    NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
>> +                    NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
>>              seq_puts(seq,
>>                       "Prot LocalAddress:Port Scheduler Flags\n");
>>              seq_puts(seq,
>> @@ -2385,7 +2385,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void
>> __user *user, int *len)
>>              char buf[64];
>>
>>              sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
>> -                    NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
>> +                    NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
>>              if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
>>                      ret = -EFAULT;
>>                      goto out;
>> @@ -2398,7 +2398,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void
>> __user *user, int *len)
>>      {
>>              struct ip_vs_getinfo info;
>>              info.version = IP_VS_VERSION_CODE;
>> -            info.size = IP_VS_CONN_TAB_SIZE;
>> +            info.size = ip_vs_conn_tab_size;
>>              info.num_services = ip_vs_num_services;
>>              if (copy_to_user(user, &info, sizeof(info)) != 0)
>>                      ret = -EFAULT;
>> @@ -3238,7 +3238,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb,
>> struct genl_info *info)
>>      case IPVS_CMD_GET_INFO:
>>              NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
>>              NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
>> -                        IP_VS_CONN_TAB_SIZE);
>> +                        ip_vs_conn_tab_size);
>>              break;
>>      }
>>
>>
>
> --
> Joseph Mack NA3T EME(B,D), FM05lw North Carolina
> jmack (at) wm7d (dot) net - azimuthal equidistant map
> generator at http://www.wm7d.net/azproj.shtml
> Homepage http://www.austintek.com/ It's GNU/Linux!
>


-- 
Catalin(ux) M. BOIE
http://kernel.embedromix.ro/

--
To unsubscribe from this list: send the line "unsubscribe lvs-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

<Prev in Thread] Current Thread [Next in Thread>