Index: ip_vs.h =================================================================== RCS file: /home/wensong/cvsroot/ipvs/ip_vs.h,v retrieving revision 1.16 diff -u -r1.16 ip_vs.h --- ip_vs.h 2001/03/22 12:57:46 1.16 +++ ip_vs.h 2001/04/03 14:57:09 @@ -6,7 +6,7 @@ #ifndef _IP_VS_H #define _IP_VS_H -#define IP_VS_VERSION_CODE 0x000207 +#define IP_VS_VERSION_CODE 0x000208 #define NVERSION(version) \ (version >> 16) & 0xFF, \ (version >> 8) & 0xFF, \ @@ -420,7 +420,6 @@ extern struct ip_vs_timeout_table vs_timeout_table; extern struct ip_vs_timeout_table vs_timeout_table_dos; -extern atomic_t ip_vs_conn_no_cport_cnt; extern int ip_vs_conn_hash(struct ip_vs_conn *cp); extern int ip_vs_conn_unhash(struct ip_vs_conn *cp); @@ -428,6 +427,7 @@ (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); extern struct ip_vs_conn *ip_vs_conn_out_get (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); +extern void __ip_vs_conn_put(struct ip_vs_conn *cp); extern void ip_vs_conn_put(struct ip_vs_conn *cp); extern struct ip_vs_conn * ip_vs_conn_new(int proto, __u32 maddr, __u16 mport, __u32 saddr, @@ -445,11 +445,6 @@ extern int ip_vs_conn_init(void); extern void ip_vs_conn_cleanup(void); -static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) -{ - atomic_dec(&cp->refcnt); -} - static inline void ip_vs_control_del(struct ip_vs_conn *cp) { struct ip_vs_conn *ctl_cp = cp->control; @@ -496,7 +491,6 @@ cp->control = ctl_cp; atomic_inc(&ctl_cp->n_control); } - /* Index: ip_vs_conn.c =================================================================== RCS file: /home/wensong/cvsroot/ipvs/ip_vs_conn.c,v retrieving revision 1.13 diff -u -r1.13 ip_vs_conn.c --- ip_vs_conn.c 2001/03/22 12:57:46 1.13 +++ ip_vs_conn.c 2001/04/03 14:57:09 @@ -50,7 +50,7 @@ /* * Connection hash table: for input and output packets lookups of IPVS */ -struct list_head *ip_vs_table; +static struct list_head *ip_vs_conn_tab; /* SLAB cache for IPVS connections */ static kmem_cache_t *ip_vs_conn_cachep; @@ -58,23 +58,25 @@ /* * No client port connection couter */ -atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); +static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); /* - * Set ip_vs_conn expiration (deletion) and adds timer, - * if timeout==0 cancel expiration. - * Warning: it does not check/delete previous timer! + * Set ip_vs_conn expiration. */ static inline void -__ip_vs_set_expire(struct ip_vs_conn *cp, unsigned long tout) +__ip_vs_set_expire(struct ip_vs_conn *cp, unsigned long expires) { - if (tout) { - cp->timer.expires = jiffies+tout; - add_sltimer(&cp->timer); - } else { - del_sltimer(&cp->timer); - } + mod_sltimer(&cp->timer, expires); +} + +/* + * Unset (cancel) ip_vs_conn expiration. + */ +static inline void +__ip_vs_unset_expire(struct ip_vs_conn *cp) +{ + del_sltimer(&cp->timer); } @@ -139,7 +141,7 @@ /* - * Hashes ip_vs_conn in ip_vs_table by proto,addr,port. + * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. * returns bool success. */ int ip_vs_conn_hash(struct ip_vs_conn *cp) @@ -160,7 +162,7 @@ ct_write_lock(hash); - list_add(&cp->c_list, &ip_vs_table[hash]); + list_add(&cp->c_list, &ip_vs_conn_tab[hash]); cp->flags |= IP_VS_CONN_F_HASHED; atomic_inc(&cp->refcnt); @@ -171,7 +173,7 @@ /* - * UNhashes ip_vs_conn from ip_vs_table. + * UNhashes ip_vs_conn from ip_vs_conn_tab. * should be called with locked tables. * returns bool success. */ @@ -202,7 +204,7 @@ /* - * Gets ip_vs_conn associated with supplied parameters in the ip_vs_table. + * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. * Called for pkts coming from OUTside-to-INside. * s_addr, s_port: pkt source address (foreign host) * d_addr, d_port: pkt dest address (load balancer) @@ -216,10 +218,10 @@ struct list_head *l,*e; hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + l = &ip_vs_conn_tab[hash]; ct_read_lock(hash); - - l = &ip_vs_table[hash]; + for (e=l->next; e!=l; e=e->next) { cp = list_entry(e, struct ip_vs_conn, c_list); if (s_addr==cp->caddr && s_port==cp->cport && @@ -227,7 +229,7 @@ protocol==cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); - __ip_vs_set_expire(cp, 0); + __ip_vs_unset_expire(cp); ct_read_unlock(hash); return cp; } @@ -241,24 +243,24 @@ struct ip_vs_conn *ip_vs_conn_in_get (int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) { - struct ip_vs_conn *ret; + struct ip_vs_conn *cp; - ret = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port); - if (!ret && atomic_read(&ip_vs_conn_no_cport_cnt)) - ret = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); + cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port); + if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) + cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", ip_vs_proto_name(protocol), NIPQUAD(s_addr), ntohs(s_port), NIPQUAD(d_addr), ntohs(d_port), - ret?"hit":"not hit"); + cp?"hit":"not hit"); - return ret; + return cp; } /* - * Gets ip_vs_conn associated with supplied parameters in the ip_vs_table. + * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. * Called for pkts coming from inside-to-OUTside. * s_addr, s_port: pkt source address (inside host) * d_addr, d_port: pkt dest address (foreign host) @@ -271,14 +273,14 @@ struct ip_vs_conn *cp, *ret=NULL; struct list_head *l,*e; - /* + /* * Check for "full" addressed entries */ hash = ip_vs_conn_hashkey(protocol, d_addr, d_port); + l = &ip_vs_conn_tab[hash]; ct_read_lock(hash); - - l = &ip_vs_table[hash]; + for (e=l->next; e!=l; e=e->next) { cp = list_entry(e, struct ip_vs_conn, c_list); if (d_addr == cp->caddr && d_port == cp->cport && @@ -286,14 +288,14 @@ protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); - __ip_vs_set_expire(cp, 0); + __ip_vs_unset_expire(cp); ret = cp; break; } } ct_read_unlock(hash); - + IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", ip_vs_proto_name(protocol), NIPQUAD(s_addr), ntohs(s_port), @@ -304,24 +306,27 @@ } +/* + * Put back the conn and start its timer with the original expires + */ +void __ip_vs_conn_put(struct ip_vs_conn *cp) +{ + /* set it expire with its original expires */ + __ip_vs_set_expire(cp, cp->timer.expires); + + atomic_dec(&cp->refcnt); +} + + +/* + * Put back the conn and restart its timer with its timeout + */ void ip_vs_conn_put(struct ip_vs_conn *cp) { - /* - * Decrement refcnt - */ - __ip_vs_conn_put(cp); + /* reset it expire in its timeout */ + __ip_vs_set_expire(cp, jiffies+cp->timeout); - /* - * if refcnt==1 (only referenced by the conn table - */ - if (atomic_read(&cp->refcnt) == 1) { - __ip_vs_set_expire(cp, cp->timeout); - } else { - IP_VS_DBG(0, "did not set timer with refcnt=%d, " - "called from %p\n", - atomic_read(&cp->refcnt), - __builtin_return_address(0)); - } + atomic_dec(&cp->refcnt); } @@ -371,6 +376,7 @@ }, /* timeout */ }; + /* * Timeout table to use for the VS entries * If NULL we use the default table (vs_timeout_table). @@ -379,7 +385,6 @@ static struct ip_vs_timeout_table *ip_vs_timeout_table = &vs_timeout_table; - static const char * state_name_table[IP_VS_S_LAST+1] = { [IP_VS_S_NONE] = "NONE", [IP_VS_S_ESTABLISHED] = "ESTABLISHED", @@ -1099,7 +1104,7 @@ /* * Unbind a connection entry with its VS destination - * Called by the connection_expire function. + * Called by the ip_vs_conn_expire function. */ void ip_vs_unbind_dest(struct ip_vs_conn *cp) { @@ -1119,9 +1124,10 @@ /* * Decrease the inactconns or activeconns counter - * if it is not a connection template (cp->cport!=0). + * if it is not a connection template ((cp->cport!=0) + * || (cp->flags & IP_VS_CONN_F_NO_CPORT)). */ - if (cp->cport) { + if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) { if (cp->flags & IP_VS_CONN_F_INACTIVE) { atomic_dec(&dest->inactconns); } else { @@ -1219,21 +1225,23 @@ if (atomic_read(&cp->n_control)) goto expire_later; - /* + /* * does anybody controls me? */ if (cp->control) ip_vs_control_del(cp); - if (ip_vs_conn_unhash(cp)) { - ip_vs_unbind_dest(cp); - ip_vs_unbind_app(cp); - } - /* + * unhash it if it is hashed in the conn table + */ + ip_vs_conn_unhash(cp); + + /* * refcnt==1 implies I'm the only one referrer */ if (atomic_read(&cp->refcnt) == 1) { + ip_vs_unbind_dest(cp); + ip_vs_unbind_app(cp); ip_vs_timeout_detach(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); @@ -1255,7 +1263,7 @@ /* - * Create a new connection entry for IPVS and hash it into the ip_vs_table. + * Create a new connection entry for IPVS and hash it into the ip_vs_conn_tab. */ struct ip_vs_conn * ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport, @@ -1287,7 +1295,7 @@ cp->flags = flags; cp->app_data = NULL; cp->control = NULL; - + atomic_set(&cp->n_control, 0); atomic_set(&cp->in_pkts, 0); @@ -1298,7 +1306,7 @@ */ atomic_set(&cp->refcnt, 1); - /* Hash it in the ip_vs_table */ + /* Hash it in the ip_vs_conn_tab */ ip_vs_conn_hash(cp); if (flags & IP_VS_CONN_F_NO_CPORT) @@ -1336,7 +1344,7 @@ */ ct_read_lock_bh(idx); - l = &ip_vs_table[idx]; + l = &ip_vs_conn_tab[idx]; for (e=l->next; e!=l; e=e->next) { cp = list_entry(e, struct ip_vs_conn, c_list); pos += 128; @@ -1418,18 +1426,18 @@ * Lock is actually needed in this loop. */ ct_write_lock(hash); - - l = &ip_vs_table[hash]; + + l = &ip_vs_conn_tab[hash]; for (e=l->next; e!=l; e=e->next) { cp = list_entry(e, struct ip_vs_conn, c_list); - if (cp->cport == 0) + if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) /* connection template */ continue; switch(cp->state) { case IP_VS_S_SYN_RECV: case IP_VS_S_SYNACK: break; - + case IP_VS_S_ESTABLISHED: case IP_VS_S_UDP: if (todrop_entry(cp)) @@ -1447,29 +1455,29 @@ IP_VS_DBG(4, "Drop connection\n"); ct = cp->control; fn = (cp->timer).function; - if (!del_sltimer(&cp->timer)) + if (del_sltimer(&cp->timer)) fn((unsigned long)cp); if (ct && !atomic_read(&ct->n_control)) { IP_VS_DBG(4, "Drop connection template\n"); - del_sltimer(&ct->timer); - fn((unsigned long)ct); + if(del_sltimer(&ct->timer)) + fn((unsigned long)ct); } ct_write_lock(hash); } ct_write_unlock(hash); } } - + int ip_vs_conn_init(void) { int idx; - + /* * Allocate the connection hash table and initialize its list heads */ - ip_vs_table = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head)); - if (!ip_vs_table) { + ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head)); + if (!ip_vs_conn_tab) { return -ENOMEM; } @@ -1478,10 +1486,10 @@ sizeof(struct ip_vs_conn), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!ip_vs_conn_cachep) { - vfree(ip_vs_table); + vfree(ip_vs_conn_tab); return -ENOMEM; } - + IP_VS_INFO("Connection hash table configured " "(size=%d, memory=%ldKbytes)\n", IP_VS_CONN_TAB_SIZE, @@ -1490,7 +1498,7 @@ sizeof(struct ip_vs_conn)); for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_table[idx]); + INIT_LIST_HEAD(&ip_vs_conn_tab[idx]); } for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { @@ -1507,5 +1515,5 @@ /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); proc_net_remove("ip_vs_conn"); - vfree(ip_vs_table); + vfree(ip_vs_conn_tab); } Index: ip_vs_core.c =================================================================== RCS file: /home/wensong/cvsroot/ipvs/ip_vs_core.c,v retrieving revision 1.16 diff -u -r1.16 ip_vs_core.c --- ip_vs_core.c 2001/03/22 12:57:46 1.16 +++ ip_vs_core.c 2001/04/03 14:57:09 @@ -24,10 +24,6 @@ * */ -#ifdef MODULE -#define EXPORT_SYMTAB -#endif - #include #include #include @@ -314,15 +310,8 @@ ip_vs_bind_dest(ct, dest); ct->timeout = svc->timeout; } else { - /* - * Template found and its destination is available. - */ + /* set destination with the found template */ dest = ct->dest; - - /* - * Delete its timer so that it can be put back. - */ - del_sltimer(&ct->timer); } dport = dest->port; } else { @@ -379,12 +368,8 @@ ip_vs_bind_dest(ct, dest); ct->timeout = svc->timeout; } else { + /* set destination with the found template */ dest = ct->dest; - - /* - * Delete its timer so that it can be put back. - */ - del_sltimer(&ct->timer); } dport = portp[1]; } @@ -531,15 +516,15 @@ /* bind the bypass_xmit */ ip_vs_bind_bypass_xmit(cp); + /* statistics */ + ip_vs_in_stats(cp, skb); + /* set state */ ip_vs_set_state(cp, VS_STATE_INPUT, iph, portp); /* transmit the first SYN packet */ ret = cp->packet_xmit(skb, cp); - /* statistics */ - ip_vs_in_stats(cp, skb); - ip_vs_conn_put(cp); return ret; } @@ -679,13 +664,14 @@ /* the TCP/UDP dest port - cannot redo check */ pptr[1] = cp->vport; - ip_vs_out_stats(cp, skb); - __ip_vs_conn_put(cp); - /* And finally the ICMP checksum */ icmph->checksum = 0; icmph->checksum = ip_compute_csum((unsigned char *) icmph, len); + /* do the statistics and put it back */ + ip_vs_out_stats(cp, skb); + ip_vs_conn_put(cp); + IP_VS_DBG(11, "Forwarding correct outgoing ICMP to " "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n", NIPQUAD(ciph->saddr), ntohs(pptr[0]), @@ -784,11 +770,10 @@ ip_vs_proto_name(iph->protocol), csum); skb->csum = csum_partial(h.raw , doff, csum); csum_ok++; - break; - case CHECKSUM_HW: if (csum_tcpudp_magic(iph->saddr, iph->daddr, size, iph->protocol, skb->csum)) { + ip_vs_conn_put(cp); IP_VS_DBG(0, "Outgoing failed %s checksum " "from %d.%d.%d.%d (size=%d)!\n", ip_vs_proto_name(iph->protocol), @@ -997,7 +982,7 @@ /* The ICMP packet for VS/NAT must be written to correct addresses before being forwarded to the right server */ if ((skb=vs_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) { - __ip_vs_conn_put(cp); + ip_vs_conn_put(cp); return NF_DROP; } @@ -1015,7 +1000,6 @@ /* the TCP/UDP source port - cannot redo check */ pptr[0] = cp->dport; - __ip_vs_conn_put(cp); /* And finally the ICMP checksum */ icmph->checksum = 0; @@ -1063,12 +1047,14 @@ skb->nf_debug = 1 << NF_IP_LOCAL_OUT; #endif /* CONFIG_NETFILTER_DEBUG */ ip_send(skb); + ip_vs_conn_put(cp); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); + ip_vs_conn_put(cp); return NF_STOLEN; } @@ -1142,10 +1128,9 @@ */ if (cp && cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* - * If the dest is not avaiable, don't restart the timer - * of the packet, but silently drop it. + * If the dest is not avaiable, start the timer of the conn + * with its original expires, and silently drop the packet. */ - add_sltimer(&cp->timer); __ip_vs_conn_put(cp); return NF_DROP; }