Weird and big issue with server
Re: Weird and big issue with server
Do you have any automounted fs on the remotes. Could you post a /etc/fstab for us? And a mount cmd, Thanks.
Re: Weird and big issue with server
This is just an example from one of the boxes having the issues.
FSTAB
procs/mounts
DF (hung after tempfs)
Not sure about the questions you asked.
FSTAB
Code: Select all
/dev/VolGroup00/rootVol00 / ext3 defaults 1 1
/dev/VolGroup00/tmpVol00 /tmp ext3 defaults 1 2
/dev/VolGroup00/usrVol00 /usr ext3 defaults 1 2
/dev/VolGroup00/varVol00 /var ext3 defaults 1 2
LABEL=/boot /boot ext3 defaults 1 2
tmpfs /dev/shm tmpfs defaults 0 0
devpts /dev/pts devpts gid=5,mode=620 0 0
sysfs /sys sysfs defaults 0 0
proc /proc proc defaults 0 0
/dev/VolGroup00/swpVol06 swap swap defaults 0 0
/dev/VolGroup00/swpVol03 swap swap defaults 0 0
/dev/VolGroup00/swpVol01 swap swap defaults 0 0
/dev/VolGroup00/swpVol08 swap swap defaults 0 0
/dev/VolGroup00/swpVol02 swap swap defaults 0 0
/dev/VolGroup00/swpVol07 swap swap defaults 0 0
/dev/VolGroup00/swpVol05 swap swap defaults 0 0
/dev/VolGroup00/swpVol04 swap swap defaults 0 0
/dev/VolGroup00/swpVol00 swap swap defaults 0 0
/dev/VolGroup00/swpVol09 swap swap defaults 0 0
###########################################################################################
/dev/mapper/us1001p1 /us1001 ext3 _netdev,defaults,acl 0 0
/dev/mapper/ur1001p1 /ur1001 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/ut1002p1 /ut1002 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/un1001p1 /un1001 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/un1002p1 /un1002 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/uc1001p1 /uc1001 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/uf1001p1 /uf1001 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/ud1007p1 /ud1007 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/ud1008p1 /ud1008 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/ud1010p1 /ud1010 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/ud1011p1 /ud1011 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/ud1012p1 /ud1012 ocfs2 _netdev,datavolume,nointr 0 0
/dev/mapper/ut1003p1 /ut1003 ocfs2 _netdev,datavolume,nointr 0 0
################################################################################################################
10.5.1.225:/interface /interface nfs _netdev,nolock,rw,hard,retry=3 0 0
#10.5.1.225:/archlog/PRODEBS /archlog/PRODEBS nfs _netdev,nolock,rw,hard,retry=3 0 0
10.5.2.6:/vol/vol_extn_NFS/qt_extn_chi_pap02_ua1002 /ua1002 nfs _netdev,rw,hard,retry=3 0 0
10.5.2.6:/vol/vol_extn_NFS/qt_extn_chi_pap02_ua1001 /ua1001 nfs _netdev,rw,hard,retry=3 0 0
10.5.2.6:/vol/vol_extn_NFS_temp/qt_backupnew /backupnew nfs _netdev,rw,hard,retry=3 0 0
#10.5.2.6:/vol/vol_extn_NFS_temp/qt_backupnew /backupnew nfs _netdev,rw,nolock,soft,bg,retry=3 0 0
########################## FOR OATM Project - Feb 17 2014 ##############################
/dev/mapper/backupp1 /backup ext3 _netdev,defaults,acl 0 0
Code: Select all
rootfs / rootfs rw 0 0
/dev/root / ext3 rw,data=ordered 0 0
/dev /dev tmpfs rw 0 0
/proc /proc proc rw 0 0
/sys /sys sysfs rw 0 0
/proc/bus/usb /proc/bus/usb usbfs rw 0 0
devpts /dev/pts devpts rw 0 0
/dev/VolGroup00/tmpVol00 /tmp ext3 rw,data=ordered 0 0
/dev/VolGroup00/usrVol00 /usr ext3 rw,data=ordered 0 0
/dev/VolGroup00/varVol00 /var ext3 rw,data=ordered 0 0
/dev/sdcg1 /boot ext3 rw,data=ordered 0 0
tmpfs /dev/shm tmpfs rw 0 0
none /proc/sys/fs/binfmt_misc binfmt_misc rw 0 0
sunrpc /var/lib/nfs/rpc_pipefs rpc_pipefs rw 0 0
configfs /sys/kernel/config configfs rw 0 0
ocfs2_dlmfs /dlm ocfs2_dlmfs rw 0 0
10.5.1.225:/interface /interface nfs rw,vers=3,rsize=524288,wsize=524288,hard,nolock,proto=tcp,timeo=600,retrans=2,sec=sys,addr=10.5.1.225 0 0
10.5.2.6:/vol/vol_extn_NFS/qt_extn_chi_pap02_ua1002 /ua1002 nfs rw,vers=3,rsize=65536,wsize=65536,hard,proto=tcp,timeo=600,retrans=2,sec=sys,addr=10.5.2.6 0 0
10.5.2.6:/vol/vol_extn_NFS/qt_extn_chi_pap02_ua1001 /ua1001 nfs rw,vers=3,rsize=65536,wsize=65536,hard,proto=tcp,timeo=600,retrans=2,sec=sys,addr=10.5.2.6 0 0
10.5.2.6:/vol/vol_extn_NFS_temp/qt_backupnew /backupnew nfs rw,vers=3,rsize=65536,wsize=65536,hard,proto=tcp,timeo=600,retrans=2,sec=sys,addr=10.5.2.6 0 0
/dev/mapper/us1001p1 /us1001 ext3 rw,data=ordered 0 0
/dev/mapper/ur1001p1 /ur1001 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/ut1002p1 /ut1002 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/un1001p1 /un1001 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/un1002p1 /un1002 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/uc1001p1 /uc1001 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/uf1001p1 /uf1001 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/ud1007p1 /ud1007 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/ud1008p1 /ud1008 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/ud1010p1 /ud1010 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/ud1011p1 /ud1011 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/ud1012p1 /ud1012 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
/dev/mapper/backupp1 /backup ext3 rw,data=ordered 0 0
/dev/mapper/ut1003p1 /ut1003 ocfs2 rw,_netdev,heartbeat=local,nointr,data=ordered,errors=remount-ro,datavolume 0 0
Code: Select all
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/mapper/VolGroup00-rootVol00
2031440 1556456 370128 81% /
/dev/mapper/VolGroup00-tmpVol00
10157368 9648492 0 100% /tmp
/dev/mapper/VolGroup00-usrVol00
15236080 11039008 3410640 77% /usr
/dev/mapper/VolGroup00-varVol00
15236080 14446204 3444 100% /var
/dev/sdcg1 100738 17343 78193 19% /boot
tmpfs 66009236 61844 65947392 1% /dev/shm
2 of XI5.6.14 Prod/DR/DEV - Nagios LogServer 2 Nodes
See my projects on the Exchange at BanditBBS - Also check out my Nagios stuff on my personal page at Bandit's Home and at github
See my projects on the Exchange at BanditBBS - Also check out my Nagios stuff on my personal page at Bandit's Home and at github
Re: Weird and big issue with server
Thanks for the info. To explain what I was asking for was, what options were used on the fstab nfs mount. The hard option retries indefinitely, and timeo states how long the NFS client waits for a response before it retries an NFS request. retrans the number of retries a request.
Anyway, having df -h hang, tells us it's not just the check that the system would have problems with on those NFS mounts. If you can, please try to exclude the NFS on the check_disk with the -X nfs added on the check. Otherwise I would suggest you check with your systems admin folks to see why that NFS mount is not working.
Anyway, having df -h hang, tells us it's not just the check that the system would have problems with on those NFS mounts. If you can, please try to exclude the NFS on the check_disk with the -X nfs added on the check. Otherwise I would suggest you check with your systems admin folks to see why that NFS mount is not working.
Re: Weird and big issue with server
I haven't solved the issue yet, but I know the reason. Our NFS shares are all mounted with HARD and standard is to also use intr so they can be interrupted, but apparently a good percentage don't have intr set as well.
Even if that is set though, sometimes the check_disk doesn't properly end the process when the timeout variable is reached. That might be a bug in the actual plugin though. Nothing else for this thread though, so please feel free to close it. I may end up reaching out to the plugin dev team in regards to the check_disk not properly closing on timeout.
Thanks
Even if that is set though, sometimes the check_disk doesn't properly end the process when the timeout variable is reached. That might be a bug in the actual plugin though. Nothing else for this thread though, so please feel free to close it. I may end up reaching out to the plugin dev team in regards to the check_disk not properly closing on timeout.
Thanks
2 of XI5.6.14 Prod/DR/DEV - Nagios LogServer 2 Nodes
See my projects on the Exchange at BanditBBS - Also check out my Nagios stuff on my personal page at Bandit's Home and at github
See my projects on the Exchange at BanditBBS - Also check out my Nagios stuff on my personal page at Bandit's Home and at github
Re: Weird and big issue with server
Thanks Banditt for getting back on this issue. Hope you can have the timeout issue resolved even with the NFS issues. We'll close this. Thanks.