LVS
lvs-devel
Google
 
Web LinuxVirtualServer.org

[RFC PATCH 9/9] ipvs network name space aware

To: lvs-devel@xxxxxxxxxxxxxxx, netdev@xxxxxxxxxxxxxxx, netfilter-devel@xxxxxxxxxxxxxxx
Subject: [RFC PATCH 9/9] ipvs network name space aware
Cc: horms@xxxxxxxxxxxx, ja@xxxxxx, wensong@xxxxxxxxxxxx, daniel.lezcano@xxxxxxx
From: Hans Schillstrom <hans.schillstrom@xxxxxxxxxxxx>
Date: Fri, 8 Oct 2010 13:17:13 +0200
This patch contains ip_vs_sync.c and ip_vs_xmit.c

There is one sync daemon per netns, and a number is prepended to its name.
(a kind of incarnation counter)

Part of the netns migration in ip_vs_xmit.c was done in the IPv6 tunnel patch,
so make sure that "[patch v4] ipvs: IPv6 tunnel mode" is applied

Signed-off-by:Hans Schillstrom <hans.schillstrom@xxxxxxxxxxxx>

diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 7ba0693..98575da 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -74,6 +74,7 @@ struct ip_vs_sync_conn_options {
 struct ip_vs_sync_thread_data {
        struct socket *sock;
        char *buf;
+       struct net *net;
 };

 #define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
@@ -113,9 +114,6 @@ struct ip_vs_sync_mesg {
        /* ip_vs_sync_conn entries start here */
 };

-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;

 struct ip_vs_sync_buff {
        struct list_head        list;
@@ -127,70 +125,41 @@ struct ip_vs_sync_buff {
        unsigned char           *end;
 };

-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff   *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
-/* multicast addr */
-static struct sockaddr_in mcast_addr = {
-       .sin_family             = AF_INET,
-       .sin_port               = cpu_to_be16(IP_VS_SYNC_PORT),
-       .sin_addr.s_addr        = cpu_to_be32(IP_VS_SYNC_GROUP),
-};
-
-
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(struct net *net)
 {
        struct ip_vs_sync_buff *sb;
+       struct netns_ipvs *ipvs = net->ipvs;

-       spin_lock_bh(&ip_vs_sync_lock);
-       if (list_empty(&ip_vs_sync_queue)) {
+       spin_lock_bh(&ipvs->sync_lock);
+       if (list_empty(&ipvs->sync_queue)) {
                sb = NULL;
        } else {
-               sb = list_entry(ip_vs_sync_queue.next,
+               sb = list_entry(ipvs->sync_queue.next,
                                struct ip_vs_sync_buff,
                                list);
                list_del(&sb->list);
        }
-       spin_unlock_bh(&ip_vs_sync_lock);
+       spin_unlock_bh(&ipvs->sync_lock);

        return sb;
 }

-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(struct net *net)
 {
        struct ip_vs_sync_buff *sb;

        if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
                return NULL;

-       if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+       if (!(sb->mesg=kmalloc(net->ipvs->sync_send_mesg_maxlen, GFP_ATOMIC))) {
                kfree(sb);
                return NULL;
        }
        sb->mesg->nr_conns = 0;
-       sb->mesg->syncid = ip_vs_master_syncid;
+       sb->mesg->syncid = net->ipvs->master_syncid;
        sb->mesg->size = 4;
        sb->head = (unsigned char *)sb->mesg + 4;
-       sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+       sb->end = (unsigned char *)sb->mesg + net->ipvs->sync_send_mesg_maxlen;
        sb->firstuse = jiffies;
        return sb;
 }
@@ -201,14 +170,16 @@ static inline void ip_vs_sync_buff_release(struct 
ip_vs_sync_buff *sb)
        kfree(sb);
 }

-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct net *net, struct ip_vs_sync_buff *sb)
 {
-       spin_lock(&ip_vs_sync_lock);
-       if (ip_vs_sync_state & IP_VS_STATE_MASTER)
-               list_add_tail(&sb->list, &ip_vs_sync_queue);
+       struct netns_ipvs *ipvs = net->ipvs;
+
+       spin_lock(&ipvs->sync_lock);
+       if (ipvs->sync_state & IP_VS_STATE_MASTER)
+               list_add_tail(&sb->list, &ipvs->sync_queue);
        else
                ip_vs_sync_buff_release(sb);
-       spin_unlock(&ip_vs_sync_lock);
+       spin_unlock(&ipvs->sync_lock);
 }

 /*
@@ -216,18 +187,19 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff 
*sb)
  *     than the specified time or the specified time is zero.
  */
 static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct net *net, unsigned long time)
 {
        struct ip_vs_sync_buff *sb;
+       struct netns_ipvs *ipvs = net->ipvs;

-       spin_lock_bh(&curr_sb_lock);
-       if (curr_sb && (time == 0 ||
-                       time_before(jiffies - curr_sb->firstuse, time))) {
-               sb = curr_sb;
-               curr_sb = NULL;
+       spin_lock_bh(&ipvs->sync_buff_lock);
+       if (ipvs->sync_buff && (time == 0 ||
+                       time_before(jiffies - ipvs->sync_buff->firstuse, 
time))) {
+               sb = ipvs->sync_buff;
+               ipvs->sync_buff = NULL;
        } else
                sb = NULL;
-       spin_unlock_bh(&curr_sb_lock);
+       spin_unlock_bh(&ipvs->sync_buff_lock);
        return sb;
 }

@@ -236,16 +208,17 @@ get_curr_sync_buff(unsigned long time)
  *      Add an ip_vs_conn information into the current sync_buff.
  *      Called by ip_vs_in.
  */
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
 {
        struct ip_vs_sync_mesg *m;
        struct ip_vs_sync_conn *s;
        int len;
+       struct netns_ipvs *ipvs = net->ipvs;

-       spin_lock(&curr_sb_lock);
-       if (!curr_sb) {
-               if (!(curr_sb=ip_vs_sync_buff_create())) {
-                       spin_unlock(&curr_sb_lock);
+       spin_lock(&ipvs->sync_buff_lock);
+       if (!ipvs->sync_buff) {
+               if (!(ipvs->sync_buff=ip_vs_sync_buff_create(net))) {
+                       spin_unlock(&ipvs->sync_buff_lock);
                        pr_err("ip_vs_sync_buff_create failed.\n");
                        return;
                }
@@ -253,8 +226,8 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)

        len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
                SIMPLE_CONN_SIZE;
-       m = curr_sb->mesg;
-       s = (struct ip_vs_sync_conn *)curr_sb->head;
+       m = ipvs->sync_buff->mesg;
+       s = (struct ip_vs_sync_conn *)ipvs->sync_buff->head;

        /* copy members */
        s->protocol = cp->protocol;
@@ -274,18 +247,18 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)

        m->nr_conns++;
        m->size += len;
-       curr_sb->head += len;
+       ipvs->sync_buff->head += len;

        /* check if there is a space for next one */
-       if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
-               sb_queue_tail(curr_sb);
-               curr_sb = NULL;
+       if (ipvs->sync_buff->head+FULL_CONN_SIZE > ipvs->sync_buff->end) {
+               sb_queue_tail(net, ipvs->sync_buff);
+               ipvs->sync_buff = NULL;
        }
-       spin_unlock(&curr_sb_lock);
+       spin_unlock(&ipvs->sync_buff_lock);

        /* synchronize its controller if it has */
        if (cp->control)
-               ip_vs_sync_conn(cp->control);
+               ip_vs_sync_conn(net, cp->control);
 }


@@ -293,13 +266,15 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
  *      Process received multicast message and create the corresponding
  *      ip_vs_conn entries.
  */
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void
+ip_vs_process_message(struct net *net, const char *buffer, const size_t buflen)
 {
        struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
        struct ip_vs_sync_conn *s;
        struct ip_vs_sync_conn_options *opt;
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        struct ip_vs_dest *dest;
        char *p;
        int i;
@@ -318,7 +293,7 @@ static void ip_vs_process_message(const char *buffer, const 
size_t buflen)
        }

        /* SyncID sanity check */
-       if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
+       if (net->ipvs->backup_syncid != 0 && m->syncid != 
net->ipvs->backup_syncid) {
                IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
                          m->syncid);
                return;
@@ -371,13 +346,13 @@ static void ip_vs_process_message(const char *buffer, 
const size_t buflen)
                }

                if (!(flags & IP_VS_CONN_F_TEMPLATE))
-                       cp = ip_vs_conn_in_get(AF_INET, s->protocol,
+                       cp = ip_vs_conn_in_get(net, AF_INET, s->protocol,
                                               (union nf_inet_addr *)&s->caddr,
                                               s->cport,
                                               (union nf_inet_addr *)&s->vaddr,
                                               s->vport);
                else
-                       cp = ip_vs_ct_in_get(AF_INET, s->protocol,
+                       cp = ip_vs_ct_in_get(net, AF_INET, s->protocol,
                                             (union nf_inet_addr *)&s->caddr,
                                             s->cport,
                                             (union nf_inet_addr *)&s->vaddr,
@@ -388,7 +363,7 @@ static void ip_vs_process_message(const char *buffer, const 
size_t buflen)
                         * If it is not found the connection will remain unbound
                         * but still handled.
                         */
-                       dest = ip_vs_find_dest(AF_INET,
+                       dest = ip_vs_find_dest(net, AF_INET,
                                               (union nf_inet_addr *)&s->daddr,
                                               s->dport,
                                               (union nf_inet_addr *)&s->vaddr,
@@ -406,7 +381,7 @@ static void ip_vs_process_message(const char *buffer, const 
size_t buflen)
                                else
                                        flags &= ~IP_VS_CONN_F_INACTIVE;
                        }
-                       cp = ip_vs_conn_new(AF_INET, s->protocol,
+                       cp = ip_vs_conn_new(net, AF_INET, s->protocol,
                                            (union nf_inet_addr *)&s->caddr,
                                            s->cport,
                                            (union nf_inet_addr *)&s->vaddr,
@@ -421,7 +396,7 @@ static void ip_vs_process_message(const char *buffer, const 
size_t buflen)
                                return;
                        }
                } else if (!cp->dest) {
-                       dest = ip_vs_try_bind_dest(cp);
+                       dest = ip_vs_try_bind_dest(net, cp);
                        if (dest)
                                atomic_dec(&dest->refcnt);
                } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
@@ -452,7 +427,7 @@ static void ip_vs_process_message(const char *buffer, const 
size_t buflen)

                if (opt)
                        memcpy(&cp->in_seq, opt, sizeof(*opt));
-               atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+               atomic_set(&cp->in_pkts, net->ipvs->sysctl_sync_threshold[0]);
                cp->state = state;
                cp->old_state = cp->state;
                /*
@@ -461,8 +436,9 @@ static void ip_vs_process_message(const char *buffer, const 
size_t buflen)
                 * virtual service. If needed, we can do it for
                 * non-fwmark persistent services.
                 */
-               if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
-                       cp->timeout = pp->timeout_table[state];
+               pd = ip_vs_proto_data_get(net,cp->protocol);
+               if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table 
)
+                       cp->timeout = pd->timeout_table[state];
                else
                        cp->timeout = (3*60*HZ);
                ip_vs_conn_put(cp);
@@ -503,8 +479,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
 {
        struct net_device *dev;
        struct inet_sock *inet = inet_sk(sk);
+       struct net *net = sock_net(sk);

-       if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+       BUG_ON(!net);
+       if ((dev = __dev_get_by_name(net, ifname)) == NULL)
                return -ENODEV;

        if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -523,30 +501,31 @@ static int set_mcast_if(struct sock *sk, char *ifname)
  *     Set the maximum length of sync message according to the
  *     specified interface's MTU.
  */
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
 {
        struct net_device *dev;
        int num;
+       struct netns_ipvs *ipvs = net->ipvs;

        if (sync_state == IP_VS_STATE_MASTER) {
-               if ((dev = __dev_get_by_name(&init_net, 
ip_vs_master_mcast_ifn)) == NULL)
+               if ((dev = __dev_get_by_name(net, ipvs->master_mcast_ifn)) == 
NULL)
                        return -ENODEV;

                num = (dev->mtu - sizeof(struct iphdr) -
                       sizeof(struct udphdr) -
                       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-               sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+               ipvs->sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
                        SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
                IP_VS_DBG(7, "setting the maximum length of sync sending "
-                         "message %d.\n", sync_send_mesg_maxlen);
+                         "message %d.\n", ipvs->sync_send_mesg_maxlen);
        } else if (sync_state == IP_VS_STATE_BACKUP) {
-               if ((dev = __dev_get_by_name(&init_net, 
ip_vs_backup_mcast_ifn)) == NULL)
+               if ((dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn)) == 
NULL)
                        return -ENODEV;

-               sync_recv_mesg_maxlen = dev->mtu -
+               ipvs->sync_recv_mesg_maxlen = dev->mtu -
                        sizeof(struct iphdr) - sizeof(struct udphdr);
                IP_VS_DBG(7, "setting the maximum length of sync receiving "
-                         "message %d.\n", sync_recv_mesg_maxlen);
+                         "message %d.\n", ipvs->sync_recv_mesg_maxlen);
        }

        return 0;
@@ -564,11 +543,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, 
char *ifname)
        struct ip_mreqn mreq;
        struct net_device *dev;
        int ret;
+       struct net *net = sock_net(sk);

+       BUG_ON(!net);
        memset(&mreq, 0, sizeof(mreq));
        memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));

-       if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+       if ((dev = __dev_get_by_name(net, ifname)) == NULL)
                return -ENODEV;
        if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
                return -EINVAL;
@@ -588,8 +569,10 @@ static int bind_mcastif_addr(struct socket *sock, char 
*ifname)
        struct net_device *dev;
        __be32 addr;
        struct sockaddr_in sin;
+       struct net *net = sock_net(sock->sk);

-       if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+       BUG_ON(!net);
+       if ((dev = __dev_get_by_name(net, ifname)) == NULL)
                return -ENODEV;

        addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -611,19 +594,19 @@ static int bind_mcastif_addr(struct socket *sock, char 
*ifname)
 /*
  *      Set up sending multicast socket over UDP
  */
-static struct socket * make_send_sock(void)
+static struct socket * make_send_sock(struct net *net)
 {
        struct socket *sock;
        int result;

-       /* First create a socket */
-       result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+       /* First create a socket in current netns  */
+       result = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
        if (result < 0) {
                pr_err("Error during creation of socket; terminating\n");
                return ERR_PTR(result);
        }

-       result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+       result = set_mcast_if(sock->sk, net->ipvs->master_mcast_ifn);
        if (result < 0) {
                pr_err("Error setting outbound mcast interface\n");
                goto error;
@@ -632,13 +615,14 @@ static struct socket * make_send_sock(void)
        set_mcast_loop(sock->sk, 0);
        set_mcast_ttl(sock->sk, 1);

-       result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+       result = bind_mcastif_addr(sock, net->ipvs->master_mcast_ifn);
        if (result < 0) {
                pr_err("Error binding address of the mcast interface\n");
                goto error;
        }

-       result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+       result = sock->ops->connect(sock,
+                       (struct sockaddr *) &net->ipvs->sync_mcast_addr,
                        sizeof(struct sockaddr), 0);
        if (result < 0) {
                pr_err("Error connecting to the multicast addr\n");
@@ -656,13 +640,13 @@ static struct socket * make_send_sock(void)
 /*
  *      Set up receiving multicast socket over UDP
  */
-static struct socket * make_receive_sock(void)
+static struct socket * make_receive_sock(struct net *net)
 {
        struct socket *sock;
        int result;

-       /* First create a socket */
-       result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+       /* First create a socket in current netns */
+       result = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
        if (result < 0) {
                pr_err("Error during creation of socket; terminating\n");
                return ERR_PTR(result);
@@ -671,7 +655,8 @@ static struct socket * make_receive_sock(void)
        /* it is equivalent to the REUSEADDR option in user-space */
        sock->sk->sk_reuse = 1;

-       result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+       result = sock->ops->bind(sock,
+                       (struct sockaddr *) &net->ipvs->sync_mcast_addr,
                        sizeof(struct sockaddr));
        if (result < 0) {
                pr_err("Error binding to the multicast addr\n");
@@ -680,8 +665,8 @@ static struct socket * make_receive_sock(void)

        /* join the multicast group */
        result = join_mcast_group(sock->sk,
-                       (struct in_addr *) &mcast_addr.sin_addr,
-                       ip_vs_backup_mcast_ifn);
+                       (struct in_addr *) &net->ipvs->sync_mcast_addr.sin_addr,
+                       net->ipvs->backup_mcast_ifn);
        if (result < 0) {
                pr_err("Error joining to the multicast group\n");
                goto error;
@@ -756,16 +741,17 @@ static int sync_thread_master(void *data)

        pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
                "syncid = %d\n",
-               ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+               tinfo->net->ipvs->master_mcast_ifn,
+               tinfo->net->ipvs->master_syncid);

        while (!kthread_should_stop()) {
-               while ((sb = sb_dequeue())) {
+               while ((sb = sb_dequeue(tinfo->net))) {
                        ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
                        ip_vs_sync_buff_release(sb);
                }

                /* check if entries stay in curr_sb for 2 seconds */
-               sb = get_curr_sync_buff(2 * HZ);
+               sb = get_curr_sync_buff(tinfo->net, 2 * HZ);
                if (sb) {
                        ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
                        ip_vs_sync_buff_release(sb);
@@ -775,12 +761,12 @@ static int sync_thread_master(void *data)
        }

        /* clean up the sync_buff queue */
-       while ((sb=sb_dequeue())) {
+       while ((sb=sb_dequeue(tinfo->net))) {
                ip_vs_sync_buff_release(sb);
        }

        /* clean up the current sync_buff */
-       if ((sb = get_curr_sync_buff(0))) {
+       if ((sb = get_curr_sync_buff(tinfo->net, 0))) {
                ip_vs_sync_buff_release(sb);
        }

@@ -796,10 +782,11 @@ static int sync_thread_backup(void *data)
 {
        struct ip_vs_sync_thread_data *tinfo = data;
        int len;
-
+
        pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
                "syncid = %d\n",
-               ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+               tinfo->net->ipvs->backup_mcast_ifn,
+               tinfo->net->ipvs->backup_syncid);

        while (!kthread_should_stop()) {
                wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -809,16 +796,15 @@ static int sync_thread_backup(void *data)
                /* do we have data now? */
                while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
                        len = ip_vs_receive(tinfo->sock, tinfo->buf,
-                                       sync_recv_mesg_maxlen);
+                                       
tinfo->net->ipvs->sync_recv_mesg_maxlen);
                        if (len <= 0) {
                                pr_err("receiving message error\n");
                                break;
                        }
-
-                       /* disable bottom half, because it accesses the data
+                       /* disable bottom half per netns, because it accesses 
the data
                           shared by softirq while getting/creating conns */
                        local_bh_disable();
-                       ip_vs_process_message(tinfo->buf, len);
+                       ip_vs_process_message(tinfo->net, tinfo->buf, len);
                        local_bh_enable();
                }
        }
@@ -832,41 +818,43 @@ static int sync_thread_backup(void *data)
 }


-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 {
        struct ip_vs_sync_thread_data *tinfo;
        struct task_struct **realtask, *task;
        struct socket *sock;
+       struct netns_ipvs *ipvs = net->ipvs;
        char *name, *buf = NULL;
        int (*threadfn)(void *data);
        int result = -ENOMEM;

-       IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
+       IP_VS_DBG(7, "%s(): pid %d inc:%d\n", __func__, task_pid_nr(current),
+                                            ipvs->inc);
        IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
                  sizeof(struct ip_vs_sync_conn));

        if (state == IP_VS_STATE_MASTER) {
-               if (sync_master_thread)
+               if (ipvs->sync_master_thread)
                        return -EEXIST;

-               strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
-                       sizeof(ip_vs_master_mcast_ifn));
-               ip_vs_master_syncid = syncid;
-               realtask = &sync_master_thread;
-               name = "ipvs_syncmaster";
+               strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+                       sizeof(ipvs->master_mcast_ifn));
+               ipvs->master_syncid = syncid;
+               realtask = &ipvs->sync_master_thread;
+               name = "ipvs_master:%d";
                threadfn = sync_thread_master;
-               sock = make_send_sock();
+               sock = make_send_sock(net);
        } else if (state == IP_VS_STATE_BACKUP) {
-               if (sync_backup_thread)
+               if (ipvs->sync_backup_thread)
                        return -EEXIST;

-               strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
-                       sizeof(ip_vs_backup_mcast_ifn));
-               ip_vs_backup_syncid = syncid;
-               realtask = &sync_backup_thread;
-               name = "ipvs_syncbackup";
+               strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+                       sizeof(ipvs->backup_mcast_ifn));
+               ipvs->backup_syncid = syncid;
+               realtask = &ipvs->sync_backup_thread;
+               name = "ipvs_backup:%d";
                threadfn = sync_thread_backup;
-               sock = make_receive_sock();
+               sock = make_receive_sock(net);
        } else {
                return -EINVAL;
        }
@@ -876,9 +864,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 
syncid)
                goto out;
        }

-       set_sync_mesg_maxlen(state);
+       set_sync_mesg_maxlen(net, state);
        if (state == IP_VS_STATE_BACKUP) {
-               buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+               buf = kmalloc(ipvs->sync_recv_mesg_maxlen, GFP_KERNEL);
                if (!buf)
                        goto outsocket;
        }
@@ -889,16 +877,17 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 
syncid)

        tinfo->sock = sock;
        tinfo->buf = buf;
+       tinfo->net = net;

-       task = kthread_run(threadfn, tinfo, name);
+       task = kthread_run(threadfn, tinfo, name, ipvs->inc);
        if (IS_ERR(task)) {
                result = PTR_ERR(task);
                goto outtinfo;
        }
-
+       IP_VS_DBG(1, "kthread %s started (%d)\n", name, task->pid);
        /* mark as active */
        *realtask = task;
-       ip_vs_sync_state |= state;
+       ipvs->sync_state |= state;

        /* increase the module use count */
        ip_vs_use_count_inc();
@@ -916,16 +905,19 @@ out:
 }


-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
 {
+       struct netns_ipvs *ipvs = net->ipvs;
+
        IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));

        if (state == IP_VS_STATE_MASTER) {
-               if (!sync_master_thread)
+               if (!ipvs->sync_master_thread)
                        return -ESRCH;

-               pr_info("stopping master sync thread %d ...\n",
-                       task_pid_nr(sync_master_thread));
+               pr_info("stopping master sync thread %d  inc:%d...\n",
+                       task_pid_nr(ipvs->sync_master_thread),
+                       ipvs->inc);

                /*
                 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -933,21 +925,22 @@ int stop_sync_thread(int state)
                 * progress of stopping the master sync daemon.
                 */

-               spin_lock_bh(&ip_vs_sync_lock);
-               ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
-               spin_unlock_bh(&ip_vs_sync_lock);
-               kthread_stop(sync_master_thread);
-               sync_master_thread = NULL;
+               spin_lock_bh(&ipvs->sync_lock);
+               ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+               spin_unlock_bh(&ipvs->sync_lock);
+               kthread_stop(ipvs->sync_master_thread);
+               ipvs->sync_master_thread = NULL;
        } else if (state == IP_VS_STATE_BACKUP) {
-               if (!sync_backup_thread)
+               if (!ipvs->sync_backup_thread)
                        return -ESRCH;

-               pr_info("stopping backup sync thread %d ...\n",
-                       task_pid_nr(sync_backup_thread));
+               pr_info("stopping backup sync thread %d inc:%d...\n",
+                       task_pid_nr(ipvs->sync_backup_thread),
+                       ipvs->inc);

-               ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
-               kthread_stop(sync_backup_thread);
-               sync_backup_thread = NULL;
+               ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+               kthread_stop(ipvs->sync_backup_thread);
+               ipvs->sync_backup_thread = NULL;
        } else {
                return -EINVAL;
        }
@@ -957,3 +950,41 @@ int stop_sync_thread(int state)

        return 0;
 }
+
+/*
+ * Initialize data struct for each netns
+ */
+static int __net_init __ip_vs_sync_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net->ipvs;
+       INIT_LIST_HEAD(&ipvs->sync_queue);
+       spin_lock_init(&ipvs->sync_lock);
+       spin_lock_init(&ipvs->sync_buff_lock);
+
+       ipvs->sync_mcast_addr.sin_family = AF_INET;
+       ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+       ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
+       return 0;
+}
+
+static void __ip_vs_sync_cleanup(struct net *net)
+{
+       stop_sync_thread(net, net->ipvs->sync_state &
+                             (IP_VS_STATE_MASTER | IP_VS_STATE_BACKUP));
+       return;
+}
+static struct pernet_operations ipvs_sync_ops = {
+       .init = __ip_vs_sync_init,
+       .exit = __ip_vs_sync_cleanup,
+};
+
+
+int __init ip_vs_sync_init(void)
+{
+       return register_pernet_subsys(&ipvs_sync_ops);
+}
+
+void __exit ip_vs_sync_cleanup(void)
+{
+       unregister_pernet_subsys(&ipvs_sync_ops);
+}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index a2e8497..d68178f 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -410,13 +410,15 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
        /* check if it is a connection of no-client-port */
        if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
                __be16 _pt, *p;
+               struct net *net;
                p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
                if (p == NULL)
                        goto tx_error;
-               ip_vs_conn_fill_cport(cp, *p);
+               net = dev_net(skb->dev);
+               ip_vs_conn_fill_cport(net, cp, *p);
                IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
        }
-
+       IP_VS_DBG(10, "%s() dst:%x\n", __func__, iph->daddr);
        if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos))))
                goto tx_error_icmp;

@@ -486,14 +488,16 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn 
*cp,
        /* check if it is a connection of no-client-port */
        if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
                __be16 _pt, *p;
+               struct net *net;
                p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
                                       sizeof(_pt), &_pt);
                if (p == NULL)
                        goto tx_error;
-               ip_vs_conn_fill_cport(cp, *p);
+               net = dev_net(skb->dev);
+               BUG_ON(!net);
+               ip_vs_conn_fill_cport(net, cp, *p);
                IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
        }
-
        rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0);
        if (!rt)
                goto tx_error_icmp;

-- 
Regards
Hans Schillstrom <hans.schillstrom@xxxxxxxxxxxx>
--
To unsubscribe from this list: send the line "unsubscribe lvs-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

<Prev in Thread] Current Thread [Next in Thread>
  • [RFC PATCH 9/9] ipvs network name space aware, Hans Schillstrom <=