| 
 
 
I'm having a (fairly serious) problem where dead servers are suddenly not
being removed from the pool. Up until recently, this behavior worked as
expected. Now I can stop apache on a server, and either it will not be
removed from the pool (as happened this morning, apache was dead, yet
requests kept coming in, and because of an equal weight, but no Active
Sessions, ALL requests were going to the dead server), or they're removed
from the pool far past the checktimeout (sometimes on the order of minutes
or even hours).
 
This was working up until very recently. It has become a serious problem
causing downtime.
 
Relevant details:
 
OS is Gentoo:
 
[ebuild   R   ] sys-cluster/heartbeat-2.0.8  USE="ldirectord snmp -doc
-management" 0 kB
 
lvs1 ~ # ldirectord -v
ldirectord  version 1.186
1999-2006 Jacob Rief, Horms and others
<http://www.vergenet.net/linux/ldirectord/>
 
ldirectord comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it
under certain conditions. See the GNU General Public Licence for details.
 
lvs1 ~ # /usr/lib/heartbeat/heartbeat -V
2.0.8
 
lvs1 resource.d # perl -v
 
This is perl, v5.8.8 built for x86_64-linux
 
Copyright 1987-2006, Larry Wall
 
Perl may be copied only under the terms of either the Artistic License or
the
GNU General Public License, which may be found in the Perl 5 source kit.
 
Complete documentation for Perl, including FAQ lists, should be found on
this system using "man perl" or "perldoc perl".  If you have access to the
Internet, point your browser at http://www.perl.org/, the Perl Home Page.
 
lvs1 heartbeat # uname -a
Linux lvs1 2.6.24-gentoo-r8 #1 SMP Wed May 21 02:15:45 PDT 2008 x86_64
Intel(R) Pentium(R) D CPU 3.00GHz GenuineIntel GNU/Linux
 
 
 
lvs1 ha.d # cat ldirectord.cf
#
# Sample ldirectord configuration file to configure various virtual
services.
#
# Ldirectord will connect to each real server once per second and request
# /index.html. If the data returned by the server does not contain the
# string "Test Message" then the test fails and the real server will be
# taken out of the available pool. The real server will be added back into
# the pool once the test succeeds. If all real servers are removed from the
# pool then localhost:80 is added to the pool as a fallback measure.
 
# Global Directives
checktimeout=30
checkinterval=3
fallback=192.168.1.40:80 gate 1
autoreload=yes
#logfile="/var/log/ldirectord.log"
#logfile="local0"
quiescent=no
 
# A sample virual with a fallback that will override the gobal setting
virtual=10.0.0.16:22
        real=192.168.1.16:22 gate 1
        fallback=192.168.1.50:22 gate 1
        protocol=tcp
        checktype=ping
virtual=10.0.0.16:2049
        real=192.168.1.16:2049 gate 1
        fallback=192.168.1.50:2049 gate 1
        protocol=tcp
        checktype=connect
virtual=10.0.0.195:21
        real=192.168.1.195:21 gate 1
        fallback=192.168.1.196:21 gate 1
        protocol=tcp
        service=ftp
        login="testftp"
        passwd="testftplvs1"
        persistent=30000
virtual=10.0.0.195:80
        real=192.168.1.195:80 gate 1
        real=192.168.1.196:80 gate 1
        real=192.168.1.197:80 gate 1
        service=http
        request="/"
        receive="This page is not used"
        virtualhost=webcast.us.com
        scheduler=wlc
        persistent=30
        #netmask=255.255.255.255
        protocol=tcp
virtual=10.0.0.20:80
        real=192.168.1.54:80 gate 100
#       real=192.168.1.20:80 gate 100
        real=192.168.1.74:80 gate 100
        real=192.168.1.72:80 gate 100
        real=192.168.1.70:80 gate 100
        #fallback=192.168.1.40:80 gate 1
        service=http
        #request="/"
        #receive="This page is not used"
        #virtualhost=blog.us.com
        scheduler=wlc
        persistent=30
        #netmask=255.255.255.255
        protocol=tcp
virtual=10.0.0.120:80
        real=192.168.1.120:80 gate 100
        #fallback=192.168.1.120:80 gate 1
        service=http
        request="/test.html"
        receive="UP"
        virtualhost=ad.us.com
        scheduler=wlc
        persistent=30
        #netmask=255.255.255.255
        protocol=tcp
virtual=10.0.0.20:443
        real=192.168.1.54:443 gate 1
#        real=192.168.1.20:443 gate 1
        real=192.168.1.74:443 gate 1
        real=192.168.1.72:443 gate 1
        real=192.168.1.70:443 gate 1
        service=https
#       request="/"
#       receive="This page is not used"
#       virtualhost=webcast.us.com
        scheduler=wlc
        persistent=300
        #netmask=255.255.255.255
        protocol=tcp
virtual=10.0.0.24:443
        real=192.168.1.24:443 gate 1
        real=192.168.1.75:443 gate 1
#       real=192.168.1.23:443 gate 1
        real=192.168.1.71:443 gate 1
        real=192.168.1.73:443 gate 1
        service=https
#       request="/"
#       receive="This page is not used"
#       virtualhost=test.us.com
        scheduler=wlc
        persistent=60
        #netmask=255.255.255.255
        protocol=tcp
virtual=10.0.0.16:80
        real=192.168.1.50:80 gate 10
        real=192.168.1.51:80 gate 10
#       real=192.168.1.16:80 gate 10
        real=192.168.1.76:80 gate 10
#       real=192.168.1.78:80 gate 10
        service=http
        request="/"
        receive="TEST STRING"
        virtualhost=www.us.com
        scheduler=wlc
        persistent=600
        #netmask=255.255.255.255
        protocol=tcp
        emailalert="support@xxxxxx"
        emailalertfreq=3600
virtual=10.0.0.16:443
        real=192.168.1.50:443 gate 10
        real=192.168.1.51:443 gate 10
        real=192.168.1.16:443 gate 10
        real=192.168.1.76:443 gate 10
#       real=192.168.1.78:443 gate 10
        service=https
        request="/"
        receive="TEST STRING"
        virtualhost=www.us.com
        scheduler=wlc
        persistent=900
        #netmask=255.255.255.255
        protocol=tcp
virtual=10.0.0.12:80
        real=192.168.1.52:80 gate 12
        real=192.168.1.53:80 gate 12
        real=192.168.1.12:80 gate 12
        real=192.168.1.55:80 gate 12
#       checktimeout=17
#       checkinterval=10
        fallback=192.168.1.40:80 gate 1
        service=http
        request="/faq.php"
        receive="Us.com Forums vBulletin 3 Style"
        virtualhost=forum.us.com
        scheduler=wlc
        persistent=30
        #netmask=255.255.255.255
        protocol=tcp
 
lvs1 ha.d # cat haresources
lvs1 10.0.0.13/24/eth2:1 ldirectord
lvs1 10.0.0.16/24/eth2:2 ldirectord
lvs1 10.0.0.12/24/eth2:3 ldirectord
lvs1 10.0.0.195/24/eth2:4 ldirectord
lvs1 10.0.0.20/24/eth2:5 ldirectord
lvs1 10.0.0.24/24/eth2:6 ldirectord
lvs1 10.0.0.120/24/eth2:7 ldirectord
 
 
lvs1 ha.d # ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=1048576)
Prot LocalAddress:Port Scheduler Flags
  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
TCP  10.0.0.16:22 wrr
  -> 192.168.1.16:22              Route   1      0          0
TCP  10.0.0.16:2049 wrr
  -> 192.168.1.16:2049            Route   1      0          0
TCP  10.0.0.120:80 wlc persistent 30
  -> 192.168.1.120:80             Route   100    792        3554
TCP  10.0.0.20:80 wlc persistent 30
  -> 192.168.1.70:80              Route   100    325        633
  -> 192.168.1.72:80              Route   100    329        984
  -> 192.168.1.74:80              Route   0      1          567
--------------> currently weighted to 0 for other reasons
  -> 192.168.1.54:80              Route   100    329        834
TCP  10.0.0.16:80 wlc persistent 600
  -> 192.168.1.76:80              Route   10     697        1446
  -> 192.168.1.51:80              Route   10     697        2255
  -> 192.168.1.50:80              Route   10     707        1520
TCP  10.0.0.12:80 wlc persistent 30
  -> 192.168.1.55:80              Route   12     311        648
  -> 192.168.1.12:80              Route   12     300        1087
  -> 192.168.1.53:80              Route   12     320        782
  -> 192.168.1.52:80              Route   12     303        729
TCP  10.0.0.195:80 wlc persistent 30
  -> 192.168.1.197:80             Route   1      68         1
  -> 192.168.1.196:80             Route   1      68         17
  -> 192.168.1.195:80             Route   1      67         58
TCP  10.0.0.24:443 wlc persistent 60
  -> 192.168.1.73:443             Route   1      0          4
  -> 192.168.1.71:443             Route   1      0          7
  -> 192.168.1.75:443             Route   1      0          1
  -> 192.168.1.24:443             Route   1      0          10
TCP  10.0.0.20:443 wlc persistent 300
  -> 192.168.1.70:443             Route   1      0          0
  -> 192.168.1.72:443             Route   1      0          0
  -> 192.168.1.74:443             Route   1      0          0
  -> 192.168.1.54:443             Route   1      0          0
TCP  10.0.0.16:443 wlc persistent 900
  -> 192.168.1.76:443             Route   10     2          17
  -> 192.168.1.51:443             Route   10     4          84
  -> 192.168.1.50:443             Route   10     1          84
TCP  10.0.0.195:21 wrr persistent 30000
  -> 192.168.1.196:21             Route   1      0          1
 
OUTPUT AFTER STOPPING APACHE FOR SEVERAL MINUTES ON 192.168.1.52:80, note
the InActConn column doubled for that server
 
lvs1 resource.d # ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=1048576)
Prot LocalAddress:Port Scheduler Flags
  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
TCP  10.0.0.16:22 wrr
  -> 192.168.1.16:22              Route   1      0          0
TCP  10.0.0.16:2049 wrr
  -> 192.168.1.16:2049            Route   1      0          0
TCP  10.0.0.120:80 wlc persistent 30
  -> 192.168.1.120:80             Route   100    812        3483
TCP  10.0.0.20:80 wlc persistent 30
  -> 192.168.1.70:80              Route   100    312        767
  -> 192.168.1.72:80              Route   100    299        1238
  -> 192.168.1.74:80              Route   0      1          525
  -> 192.168.1.54:80              Route   100    311        396
TCP  10.0.0.16:80 wlc persistent 600
  -> 192.168.1.76:80              Route   10     776        1387
  -> 192.168.1.51:80              Route   10     704        2342
  -> 192.168.1.50:80              Route   10     723        1303
TCP  10.0.0.12:80 wlc persistent 30
  -> 192.168.1.55:80              Route   12     291        681
  -> 192.168.1.12:80              Route   12     291        918
  -> 192.168.1.53:80              Route   12     299        907
  -> 192.168.1.52:80              Route   12     199        2100
TCP  10.0.0.195:80 wlc persistent 30
  -> 192.168.1.197:80             Route   1      69         6
  -> 192.168.1.196:80             Route   1      69         18
  -> 192.168.1.195:80             Route   1      69         15
TCP  10.0.0.24:443 wlc persistent 60
  -> 192.168.1.73:443             Route   1      0          8
  -> 192.168.1.71:443             Route   1      0          3
  -> 192.168.1.75:443             Route   1      0          1
  -> 192.168.1.24:443             Route   1      0          8
TCP  10.0.0.20:443 wlc persistent 300
  -> 192.168.1.70:443             Route   1      0          0
  -> 192.168.1.72:443             Route   1      0          0
  -> 192.168.1.74:443             Route   1      0          0
  -> 192.168.1.54:443             Route   1      0          0
TCP  10.0.0.16:443 wlc persistent 900
  -> 192.168.1.76:443             Route   10     4          153
  -> 192.168.1.51:443             Route   10     0          13
  -> 192.168.1.50:443             Route   10     0          39
TCP  10.0.0.195:21 wrr persistent 30000
  -> 192.168.1.196:21             Route   1      0          1
 
 |