LVS
lvs-users
Google
 
Web LinuxVirtualServer.org

[lvs-users] Dead servers not being removed from pool, ldirectord

To: <lvs-users@xxxxxxxxxxxxxxxxxxxxxx>
Subject: [lvs-users] Dead servers not being removed from pool, ldirectord
From: "Michael S. Moody" <michael@xxxxxx>
Date: Thu, 22 May 2008 07:05:34 -0600
I'm having a (fairly serious) problem where dead servers are suddenly not
being removed from the pool. Up until recently, this behavior worked as
expected. Now I can stop apache on a server, and either it will not be
removed from the pool (as happened this morning, apache was dead, yet
requests kept coming in, and because of an equal weight, but no Active
Sessions, ALL requests were going to the dead server), or they're removed
from the pool far past the checktimeout (sometimes on the order of minutes
or even hours).

 

This was working up until very recently. It has become a serious problem
causing downtime.

 

Relevant details:

 

OS is Gentoo:

 

[ebuild   R   ] sys-cluster/heartbeat-2.0.8  USE="ldirectord snmp -doc
-management" 0 kB

 

lvs1 ~ # ldirectord -v

ldirectord  version 1.186

1999-2006 Jacob Rief, Horms and others

<http://www.vergenet.net/linux/ldirectord/>

 

ldirectord comes with ABSOLUTELY NO WARRANTY.

This is free software, and you are welcome to redistribute it

under certain conditions. See the GNU General Public Licence for details.

 

lvs1 ~ # /usr/lib/heartbeat/heartbeat -V

2.0.8

 

lvs1 resource.d # perl -v

 

This is perl, v5.8.8 built for x86_64-linux

 

Copyright 1987-2006, Larry Wall

 

Perl may be copied only under the terms of either the Artistic License or
the

GNU General Public License, which may be found in the Perl 5 source kit.

 

Complete documentation for Perl, including FAQ lists, should be found on

this system using "man perl" or "perldoc perl".  If you have access to the

Internet, point your browser at http://www.perl.org/, the Perl Home Page.

 

lvs1 heartbeat # uname -a

Linux lvs1 2.6.24-gentoo-r8 #1 SMP Wed May 21 02:15:45 PDT 2008 x86_64
Intel(R) Pentium(R) D CPU 3.00GHz GenuineIntel GNU/Linux

 

 

 

lvs1 ha.d # cat ldirectord.cf

#

# Sample ldirectord configuration file to configure various virtual
services.

#

# Ldirectord will connect to each real server once per second and request

# /index.html. If the data returned by the server does not contain the

# string "Test Message" then the test fails and the real server will be

# taken out of the available pool. The real server will be added back into

# the pool once the test succeeds. If all real servers are removed from the

# pool then localhost:80 is added to the pool as a fallback measure.

 

# Global Directives

checktimeout=30

checkinterval=3

fallback=192.168.1.40:80 gate 1

autoreload=yes

#logfile="/var/log/ldirectord.log"

#logfile="local0"

quiescent=no

 

# A sample virual with a fallback that will override the gobal setting

virtual=10.0.0.16:22

        real=192.168.1.16:22 gate 1

        fallback=192.168.1.50:22 gate 1

        protocol=tcp

        checktype=ping

virtual=10.0.0.16:2049

        real=192.168.1.16:2049 gate 1

        fallback=192.168.1.50:2049 gate 1

        protocol=tcp

        checktype=connect

virtual=10.0.0.195:21

        real=192.168.1.195:21 gate 1

        fallback=192.168.1.196:21 gate 1

        protocol=tcp

        service=ftp

        login="testftp"

        passwd="testftplvs1"

        persistent=30000

virtual=10.0.0.195:80

        real=192.168.1.195:80 gate 1

        real=192.168.1.196:80 gate 1

        real=192.168.1.197:80 gate 1

        service=http

        request="/"

        receive="This page is not used"

        virtualhost=webcast.us.com

        scheduler=wlc

        persistent=30

        #netmask=255.255.255.255

        protocol=tcp

virtual=10.0.0.20:80

        real=192.168.1.54:80 gate 100

#       real=192.168.1.20:80 gate 100

        real=192.168.1.74:80 gate 100

        real=192.168.1.72:80 gate 100

        real=192.168.1.70:80 gate 100

        #fallback=192.168.1.40:80 gate 1

        service=http

        #request="/"

        #receive="This page is not used"

        #virtualhost=blog.us.com

        scheduler=wlc

        persistent=30

        #netmask=255.255.255.255

        protocol=tcp

virtual=10.0.0.120:80

        real=192.168.1.120:80 gate 100

        #fallback=192.168.1.120:80 gate 1

        service=http

        request="/test.html"

        receive="UP"

        virtualhost=ad.us.com

        scheduler=wlc

        persistent=30

        #netmask=255.255.255.255

        protocol=tcp

virtual=10.0.0.20:443

        real=192.168.1.54:443 gate 1

#        real=192.168.1.20:443 gate 1

        real=192.168.1.74:443 gate 1

        real=192.168.1.72:443 gate 1

        real=192.168.1.70:443 gate 1

        service=https

#       request="/"

#       receive="This page is not used"

#       virtualhost=webcast.us.com

        scheduler=wlc

        persistent=300

        #netmask=255.255.255.255

        protocol=tcp

virtual=10.0.0.24:443

        real=192.168.1.24:443 gate 1

        real=192.168.1.75:443 gate 1

#       real=192.168.1.23:443 gate 1

        real=192.168.1.71:443 gate 1

        real=192.168.1.73:443 gate 1

        service=https

#       request="/"

#       receive="This page is not used"

#       virtualhost=test.us.com

        scheduler=wlc

        persistent=60

        #netmask=255.255.255.255

        protocol=tcp

virtual=10.0.0.16:80

        real=192.168.1.50:80 gate 10

        real=192.168.1.51:80 gate 10

#       real=192.168.1.16:80 gate 10

        real=192.168.1.76:80 gate 10

#       real=192.168.1.78:80 gate 10

        service=http

        request="/"

        receive="TEST STRING"

        virtualhost=www.us.com

        scheduler=wlc

        persistent=600

        #netmask=255.255.255.255

        protocol=tcp

        emailalert="support@xxxxxx"

        emailalertfreq=3600

virtual=10.0.0.16:443

        real=192.168.1.50:443 gate 10

        real=192.168.1.51:443 gate 10

        real=192.168.1.16:443 gate 10

        real=192.168.1.76:443 gate 10

#       real=192.168.1.78:443 gate 10

        service=https

        request="/"

        receive="TEST STRING"

        virtualhost=www.us.com

        scheduler=wlc

        persistent=900

        #netmask=255.255.255.255

        protocol=tcp

virtual=10.0.0.12:80

        real=192.168.1.52:80 gate 12

        real=192.168.1.53:80 gate 12

        real=192.168.1.12:80 gate 12

        real=192.168.1.55:80 gate 12

#       checktimeout=17

#       checkinterval=10

        fallback=192.168.1.40:80 gate 1

        service=http

        request="/faq.php"

        receive="Us.com Forums vBulletin 3 Style"

        virtualhost=forum.us.com

        scheduler=wlc

        persistent=30

        #netmask=255.255.255.255

        protocol=tcp

 

lvs1 ha.d # cat haresources

lvs1 10.0.0.13/24/eth2:1 ldirectord

lvs1 10.0.0.16/24/eth2:2 ldirectord

lvs1 10.0.0.12/24/eth2:3 ldirectord

lvs1 10.0.0.195/24/eth2:4 ldirectord

lvs1 10.0.0.20/24/eth2:5 ldirectord

lvs1 10.0.0.24/24/eth2:6 ldirectord

lvs1 10.0.0.120/24/eth2:7 ldirectord

 

 

lvs1 ha.d # ipvsadm -Ln

IP Virtual Server version 1.2.1 (size=1048576)

Prot LocalAddress:Port Scheduler Flags

  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn

TCP  10.0.0.16:22 wrr

  -> 192.168.1.16:22              Route   1      0          0

TCP  10.0.0.16:2049 wrr

  -> 192.168.1.16:2049            Route   1      0          0

TCP  10.0.0.120:80 wlc persistent 30

  -> 192.168.1.120:80             Route   100    792        3554

TCP  10.0.0.20:80 wlc persistent 30

  -> 192.168.1.70:80              Route   100    325        633

  -> 192.168.1.72:80              Route   100    329        984

  -> 192.168.1.74:80              Route   0      1          567
--------------> currently weighted to 0 for other reasons

  -> 192.168.1.54:80              Route   100    329        834

TCP  10.0.0.16:80 wlc persistent 600

  -> 192.168.1.76:80              Route   10     697        1446

  -> 192.168.1.51:80              Route   10     697        2255

  -> 192.168.1.50:80              Route   10     707        1520

TCP  10.0.0.12:80 wlc persistent 30

  -> 192.168.1.55:80              Route   12     311        648

  -> 192.168.1.12:80              Route   12     300        1087

  -> 192.168.1.53:80              Route   12     320        782

  -> 192.168.1.52:80              Route   12     303        729

TCP  10.0.0.195:80 wlc persistent 30

  -> 192.168.1.197:80             Route   1      68         1

  -> 192.168.1.196:80             Route   1      68         17

  -> 192.168.1.195:80             Route   1      67         58

TCP  10.0.0.24:443 wlc persistent 60

  -> 192.168.1.73:443             Route   1      0          4

  -> 192.168.1.71:443             Route   1      0          7

  -> 192.168.1.75:443             Route   1      0          1

  -> 192.168.1.24:443             Route   1      0          10

TCP  10.0.0.20:443 wlc persistent 300

  -> 192.168.1.70:443             Route   1      0          0

  -> 192.168.1.72:443             Route   1      0          0

  -> 192.168.1.74:443             Route   1      0          0

  -> 192.168.1.54:443             Route   1      0          0

TCP  10.0.0.16:443 wlc persistent 900

  -> 192.168.1.76:443             Route   10     2          17

  -> 192.168.1.51:443             Route   10     4          84

  -> 192.168.1.50:443             Route   10     1          84

TCP  10.0.0.195:21 wrr persistent 30000

  -> 192.168.1.196:21             Route   1      0          1

 

OUTPUT AFTER STOPPING APACHE FOR SEVERAL MINUTES ON 192.168.1.52:80, note
the InActConn column doubled for that server

 

lvs1 resource.d # ipvsadm -Ln

IP Virtual Server version 1.2.1 (size=1048576)

Prot LocalAddress:Port Scheduler Flags

  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn

TCP  10.0.0.16:22 wrr

  -> 192.168.1.16:22              Route   1      0          0

TCP  10.0.0.16:2049 wrr

  -> 192.168.1.16:2049            Route   1      0          0

TCP  10.0.0.120:80 wlc persistent 30

  -> 192.168.1.120:80             Route   100    812        3483

TCP  10.0.0.20:80 wlc persistent 30

  -> 192.168.1.70:80              Route   100    312        767

  -> 192.168.1.72:80              Route   100    299        1238

  -> 192.168.1.74:80              Route   0      1          525

  -> 192.168.1.54:80              Route   100    311        396

TCP  10.0.0.16:80 wlc persistent 600

  -> 192.168.1.76:80              Route   10     776        1387

  -> 192.168.1.51:80              Route   10     704        2342

  -> 192.168.1.50:80              Route   10     723        1303

TCP  10.0.0.12:80 wlc persistent 30

  -> 192.168.1.55:80              Route   12     291        681

  -> 192.168.1.12:80              Route   12     291        918

  -> 192.168.1.53:80              Route   12     299        907

  -> 192.168.1.52:80              Route   12     199        2100

TCP  10.0.0.195:80 wlc persistent 30

  -> 192.168.1.197:80             Route   1      69         6

  -> 192.168.1.196:80             Route   1      69         18

  -> 192.168.1.195:80             Route   1      69         15

TCP  10.0.0.24:443 wlc persistent 60

  -> 192.168.1.73:443             Route   1      0          8

  -> 192.168.1.71:443             Route   1      0          3

  -> 192.168.1.75:443             Route   1      0          1

  -> 192.168.1.24:443             Route   1      0          8

TCP  10.0.0.20:443 wlc persistent 300

  -> 192.168.1.70:443             Route   1      0          0

  -> 192.168.1.72:443             Route   1      0          0

  -> 192.168.1.74:443             Route   1      0          0

  -> 192.168.1.54:443             Route   1      0          0

TCP  10.0.0.16:443 wlc persistent 900

  -> 192.168.1.76:443             Route   10     4          153

  -> 192.168.1.51:443             Route   10     0          13

  -> 192.168.1.50:443             Route   10     0          39

TCP  10.0.0.195:21 wrr persistent 30000

  -> 192.168.1.196:21             Route   1      0          1

 


<Prev in Thread] Current Thread [Next in Thread>