Hi Runhua,
I finally understand what your "goto out_next" means now. :) The code
might be more readable if we change it in the following way.
static void tcpsp_conn_flush(void)
{
int idx;
struct list_head *list;
struct tcpsp_conn *cp;
flush_again:
for (idx=0; idx<TCPSP_CONN_TAB_SIZE; idx++) {
/*
* Lock is actually needed in this loop.
*/
write_lock_bh(&tcpsp_conn_lock);
list = &tcpsp_conn_tab1[idx];
while (!list_empty(list)) {
cp = list_entry(list->next, struct tcpsp_conn, f_list);
TCPSP_DBG(4, "delete the spliced connection\n");
if (del_timer(&cp->timer)) {
write_unlock(&tcpsp_conn_lock);
tcpsp_conn_expire_now(cp);
write_lock(&tcpsp_conn_lock);
}
}
write_unlock_bh(&tcpsp_conn_lock);
}
/* the counter may be not NULL, because maybe some conn entries
are run by slow timer handler or unhashed but still referred */
if (atomic_read(&tcpsp_conn_count) != 0) {
schedule();
goto flush_again;
}
}
Thanks,
Wensong
On Thu, 13 Nov 2003, yangrunhua wrote:
> Hi,Wensong
>
> But I think there is another problem:
>
> write_unlock(&tcpsp_conn_lock);
> //here is out of protection of tcpsp_conn_lock
> //so TIMER_BH on the other CPU may happen, it may frees the nxt pointer
> tcpsp_conn_expire_now(cp);
> write_lock(&tcpsp_conn_lock);
>
> When list_for_each_safe continues at next loop, the nxt pointer may already
> freed
> by TIMER_BH on the other CPU, so continue looping is not safe.
>
> like the following may be better:
> if (del_timer(&cp->timer)){
> write_unlock_bh(&tcpsp_conn_lock);
> tcpsp_conn_expire_now(cp);
> goto out_next;
> }
>
> out_next continues at next idx of outer loop, not list_for_each_safe inner
> loop, so it is safe.
>
> Regards,
> Runhua Yang
>
> -----Original Message-----
> From: lvs-users-bounces@xxxxxxxxxxxxxxxxxxxxxx
> [mailto:lvs-users-bounces@xxxxxxxxxxxxxxxxxxxxxx] On Behalf Of Wensong Zhang
> Sent: Thursday, November 13, 2003 12:14 AM
> To: LinuxVirtualServer.org users mailing list.
> Subject: Re: [PATCH tcpsp]To avoid SMP race when rmmod
>
>
>
> Hi Runhua,
>
> Thanks for the finding and patch. Yes, the original code has race problem.
>
> It may be better to change the code as follows:
>
> static void tcpsp_conn_flush(void)
> {
> int idx;
> struct tcpsp_conn *cp;
> struct list_head *e, *nxt;
>
> flush_again:
> for (idx=0; idx<TCPSP_CONN_TAB_SIZE; idx++) {
> /*
> * Lock is actually needed in this loop.
> */
> write_lock_bh(&tcpsp_conn_lock);
>
> list_for_each_safe (e, nxt, &tcpsp_conn_tab1[idx]) {
> cp = list_entry(e, struct tcpsp_conn, f_list);
>
> TCPSP_DBG(4, "delete spliced connection\n");
> if (del_timer(&cp->timer)) {
> write_unlock(&tcpsp_conn_lock);
> tcpsp_conn_expire_now(cp);
> write_lock(&tcpsp_conn_lock);
> }
> }
> write_unlock_bh(&tcpsp_conn_lock);
> }
>
> /* the counter may be not NULL, because maybe some conn entries
> are run by slow timer handler or unhashed but still referred */
> if (atomic_read(&tcpsp_conn_count) != 0) {
> schedule();
> goto flush_again;
> }
> }
>
>
> Regards,
>
> Wensong
>
>
> On Tue, 11 Nov 2003, yangrunhua wrote:
>
> > The original is not SMP safe, should like this:
> > static void tcpsp_conn_flush(void)
> > {
> > int idx;
> > struct tcpsp_conn *cp;
> > struct list_head *e, *nxt;
> >
> > flush_again:
> > for (idx=0; idx<TCPSP_CONN_TAB_SIZE; idx++) {
> > /*
> > * Lock is actually needed in this loop.
> > */
> > write_lock_bh(&tcpsp_conn_lock); //should disable BH on this CPU
> >
> > list_for_each_safe (e, nxt, &tcpsp_conn_tab1[idx]) {
> > cp = list_entry(e, struct tcpsp_conn, f_list);
> > TCPSP_DBG(4, "del splicing connection\n");
> > if (del_timer(&cp->timer)){
> > //call del_timer under the protection of write lock of
> > //tcpsp_conn_lock, or cp maybe already freed
> > write_unlock_bh(&tcpsp_conn_lock);
> > tcpsp_conn_expire_now(cp);
> > goto out_next;
> > //should not continue, b/c nxt maybe already freed by
> > //TIMER_BH on the other CPU. And continue at
> > next idx may be
> > //good, b/c it will not stick on this same cp
> > if its ref count
> > //is still over 1 under high load, let's
> > continue at next idx.
> > }
> > }
> > write_unlock_bh(&tcpsp_conn_lock);
> > out_next:
> > continue;
> > }
> >
> > /* the counter may be not NULL, because maybe some conn entries
> > are run by slow timer handler or unhashed but still referred */
> > if (atomic_read(&tcpsp_conn_count) != 0) {
> > schedule();
> > goto flush_again;
> > }
> > }
> >
> >
> > Regards,
> > Runhua Yang
> >
>
> _______________________________________________
> LinuxVirtualServer.org mailing list - lvs-users@xxxxxxxxxxxxxxxxxxxxxx
> Send requests to lvs-users-request@xxxxxxxxxxxxxxxxxxxxxx
> or go to http://www.in-addr.de/mailman/listinfo/lvs-users
>
> _______________________________________________
> LinuxVirtualServer.org mailing list - lvs-users@xxxxxxxxxxxxxxxxxxxxxx
> Send requests to lvs-users-request@xxxxxxxxxxxxxxxxxxxxxx
> or go to http://www.in-addr.de/mailman/listinfo/lvs-users
>
|