Page 2 of 3

Re: How to monitor long running processes?

Posted: Thu Oct 04, 2018 1:21 am
by Siddharth Hegde
It is called zeus used for kafka. It is a single process

Re: How to monitor long running processes?

Posted: Thu Oct 04, 2018 7:08 am
by scottwilkerson
Siddharth Hegde wrote:It is called zeus used for kafka. It is a single process
Can you show which process it is when running

Code: Select all

ps -ef

Re: How to monitor long running processes?

Posted: Thu Oct 04, 2018 8:21 am
by Siddharth Hegde
scottwilkerson wrote:
Siddharth Hegde wrote:It is called zeus used for kafka. It is a single process
Can you show which process it is when running

Code: Select all

ps -ef

Code: Select all

/home/siddarth# ps -ef
UID         PID   PPID  C STIME TTY          TIME CMD
root          1      0  0 Jun24 ?        00:01:11 /sbin/init
root          2      0  0 Jun24 ?        00:00:01 [kthreadd]
root          4      2  0 Jun24 ?        00:00:00 [kworker/0:0H]
root          6      2  0 Jun24 ?        00:00:00 [mm_percpu_wq]
root          7      2  0 Jun24 ?        00:00:22 [ksoftirqd/0]
root          8      2  0 Jun24 ?        00:18:02 [rcu_sched]
root          9      2  0 Jun24 ?        00:00:00 [rcu_bh]
root         10      2  0 Jun24 ?        00:00:05 [migration/0]
root         11      2  0 Jun24 ?        00:00:21 [watchdog/0]
root         12      2  0 Jun24 ?        00:00:00 [cpuhp/0]
root         13      2  0 Jun24 ?        00:00:00 [cpuhp/1]
root         14      2  0 Jun24 ?        00:00:19 [watchdog/1]
root         15      2  0 Jun24 ?        00:00:05 [migration/1]
root         16      2  0 Jun24 ?        00:00:23 [ksoftirqd/1]
root         18      2  0 Jun24 ?        00:00:00 [kworker/1:0H]
root         19      2  0 Jun24 ?        00:00:00 [cpuhp/2]
root         20      2  0 Jun24 ?        00:00:17 [watchdog/2]
root         21      2  0 Jun24 ?        00:00:05 [migration/2]
root         22      2  0 Jun24 ?        00:01:45 [ksoftirqd/2]
root         24      2  0 Jun24 ?        00:00:00 [kworker/2:0H]
root         25      2  0 Jun24 ?        00:00:00 [cpuhp/3]
root         26      2  0 Jun24 ?        00:00:18 [watchdog/3]
root         27      2  0 Jun24 ?        00:00:05 [migration/3]
root         28      2  0 Jun24 ?        00:00:25 [ksoftirqd/3]
root         30      2  0 Jun24 ?        00:00:00 [kworker/3:0H]
root         31      2  0 Jun24 ?        00:00:00 [kdevtmpfs]
root         32      2  0 Jun24 ?        00:00:00 [netns]
root         33      2  0 Jun24 ?        00:00:00 [rcu_tasks_kthre]
root         34      2  0 Jun24 ?        00:00:00 [kauditd]
root         36      2  0 Jun24 ?        00:00:06 [khungtaskd]
root         37      2  0 Jun24 ?        00:00:07 [oom_reaper]
root         38      2  0 Jun24 ?        00:00:00 [writeback]
root         39      2  0 Jun24 ?        00:00:00 [kcompactd0]
root         40      2  0 Jun24 ?        00:00:00 [ksmd]
root         41      2  0 Jun24 ?        00:03:08 [khugepaged]
root         42      2  0 Jun24 ?        00:00:00 [crypto]
root         43      2  0 Jun24 ?        00:00:00 [kintegrityd]
root         44      2  0 Jun24 ?        00:00:00 [kblockd]
root         45      2  0 Jun24 ?        00:00:00 [ata_sff]
root         46      2  0 Jun24 ?        00:00:00 [md]
root         47      2  0 Jun24 ?        00:00:00 [edac-poller]
root         48      2  0 Jun24 ?        00:00:00 [ib-comp-wq]
root         49      2  0 Jun24 ?        00:00:00 [ib_mcast]
root         50      2  0 Jun24 ?        00:00:00 [ib_nl_sa_wq]
root         51      2  0 Jun24 ?        00:00:00 [hv_vmbus_con]
root         52      2  0 Jun24 ?        00:00:00 [devfreq_wq]
root         53      2  0 Jun24 ?        00:00:00 [watchdogd]
root         56      2  0 Jun24 ?        00:41:37 [kswapd0]
root         57      2  0 Jun24 ?        00:00:00 [ecryptfs-kthrea]
root        100      2  0 Jun24 ?        00:00:00 [kthrotld]
root        101      2  0 Jun24 ?        00:00:00 [nfit]
root        102      2  0 Jun24 ?        00:00:00 [scsi_eh_0]
root        103      2  0 Jun24 ?        00:00:00 [scsi_tmf_0]
root        104      2  0 Jun24 ?        00:00:00 [storvsc_error_w]
root        105      2  0 Jun24 ?        00:00:00 [scsi_eh_1]
root        106      2  0 Jun24 ?        00:00:00 [scsi_tmf_1]
root        107      2  0 Jun24 ?        00:00:00 [storvsc_error_w]
root        108      2  0 Jun24 ?        00:00:00 [scsi_eh_2]
root        109      2  0 Jun24 ?        00:00:00 [scsi_tmf_2]
root        110      2  0 Jun24 ?        00:00:00 [storvsc_error_w]
root        111      2  0 Jun24 ?        00:00:00 [scsi_eh_3]
root        112      2  0 Jun24 ?        00:00:00 [scsi_tmf_3]
root        113      2  0 Jun24 ?        00:00:00 [storvsc_error_w]
root        117      2  0 Jun24 ?        00:00:16 [kworker/0:1H]
root        119      2  0 Jun24 ?        00:00:00 [scsi_eh_4]
root        120      2  0 Jun24 ?        00:00:00 [scsi_tmf_4]
root        121      2  0 Jun24 ?        00:00:00 [scsi_eh_5]
root        122      2  0 Jun24 ?        00:00:00 [scsi_tmf_5]
root        123      2  0 Jun24 ?        00:00:00 [mlx4]
root        125      2  0 Jun24 ?        00:00:00 [rdma_cm]
root        126      2  0 Jun24 ?        00:00:00 [mlx4_ib]
root        127      2  0 Jun24 ?        00:00:00 [mlx4_ib_mcg]
root        131      2  0 Jun24 ?        00:00:00 [ipv6_addrconf]
root        142      2  0 Jun24 ?        00:00:00 [kstrp]
root        160      2  0 Jun24 ?        00:00:19 [kworker/1:1H]
root        333      2  0 Jun24 ?        00:00:00 [raid5wq]
root        383      2  0 Jun24 ?        00:00:54 [jbd2/sda1-8]
root        384      2  0 Jun24 ?        00:00:00 [ext4-rsv-conver]
root        397      2  0 Jun24 ?        00:00:21 [kworker/3:1H]
root        430      2  0 Jun24 ?        00:00:32 [kworker/2:1H]
root        449      1  0 Jun24 ?        00:05:22 /lib/systemd/systemd-journald
root        458      2  0 Jun24 ?        00:00:00 [iscsi_eh]
root        473      1  0 Jun24 ?        00:00:00 /sbin/lvmetad -f
root        504      1  0 Jun24 ?        00:00:12 /lib/systemd/systemd-udevd
root        745      2  0 Jun24 ?        00:02:10 [hv_balloon]
systemd+    841      1  0 Jun24 ?        00:00:06 /lib/systemd/systemd-timesyncd
root        881      2  0 Jun24 ?        00:00:30 [jbd2/sdc1-8]
root        882      2  0 Jun24 ?        00:00:00 [ext4-rsv-conver]
root       1057      1  0 Jun24 ?        00:00:00 /sbin/dhclient -1 -v -pf /run/dhclient.eth0.pid -lf /var/lib/dhcp/dhclient.eth0.leases -I -df /var/lib/dhcp/dhclient6.eth0.leases eth0
root       1247      2  0 Jun24 ?        00:00:00 [jbd2/sdb1-8]
root       1248      2  0 Jun24 ?        00:00:00 [ext4-rsv-conver]
message+   1272      1  0 Jun24 ?        00:00:02 /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation
root       1273      1  0 Jun24 ?        00:02:03 /sbin/iscsid
root       1277      1  0 Jun24 ?        00:09:58 /sbin/iscsid
root       1287      1  0 Jun24 ?        00:00:06 /lib/systemd/systemd-logind
root       1294      1  0 Jun24 ?        00:00:00 /usr/sbin/acpid
unscd      1310      1  0 Jun24 ?        00:17:17 /usr/sbin/nscd -d
root       1326      1  0 Jun24 ?        00:01:04 /usr/bin/lxcfs /var/lib/lxcfs/
daemon     1333      1  0 Jun24 ?        00:00:00 /usr/sbin/atd -f
root       1334      1  0 Jun24 ?        00:02:12 /usr/lib/accountsservice/accounts-daemon
root       1335      1  0 Jun24 ?        00:01:02 /usr/sbin/sshd -D
root       1336      1  0 Jun24 ?        00:03:09 /usr/lib/snapd/snapd
root       1338      1  0 Jun24 ?        00:01:03 /usr/sbin/cron -f
root       1340      1  0 Jun24 ?        00:00:00 /usr/bin/python3 -u /usr/sbin/waagent -daemon
root       1400      1  0 Jun24 ?        00:15:47 /usr/bin/python2 /var/lib/waagent/Microsoft.OSTCExtensions.LinuxDiagnostic-2.3.9029/diagnostic.py -daemon
root       1582      1  0 Jun24 ttyS0    00:00:00 /sbin/agetty --keep-baud 115200 38400 9600 ttyS0 vt220
root       1585      1  0 Jun24 tty1     00:00:00 /sbin/agetty --noclear tty1 linux
root       1696      1  0 Jun24 ?        00:04:59 /usr/sbin/irqbalance --pid=/var/run/irqbalance.pid
root       1735      1  0 Jun24 ?        00:17:49 /opt/omi/bin/omiserver --configfile=/etc/opt/omi/conf/omiserver.conf -d
root       1739      1  0 Jun24 ?        00:00:00 /sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan --syslog
syslog     1906      1  0 Jun24 ?        00:02:33 /usr/sbin/rsyslogd -n
root       2030      2  0 12:31 ?        00:00:00 [kworker/1:0]
root       2051   1400  0 Jun24 ?        01:52:59 /var/lib/waagent/Microsoft.OSTCExtensions.LinuxDiagnostic-2.3.9029/bin/mdsd -A -C -c /var/lib/waagent/Microsoft.OSTCExtensions.LinuxDiagnosti
root       2232   1735  0 Jun24 ?        00:32:44 /opt/omi/bin/omiagent 9 11 --destdir / --providerdir /opt/omi/lib --idletimeout 90 --loglevel WARNING
root       5036      2  0 12:55 ?        00:00:00 [kworker/u256:1]
root       5392      2  0 12:58 ?        00:00:00 [kworker/2:0]
root       5393      2  0 12:58 ?        00:00:00 [kworker/0:0]
user2   5641      1 96 13:00 ?        00:10:19 java -Xmx1g -Xms512m -Dlog4j.configuration=file:/extdrive/jobs/Recommender/resources/log4j.properties -jar /extdrive/jobs/Recommender
root       5883      2  0 13:01 ?        00:00:00 [kworker/u256:2]
root       6209      2  0 13:04 ?        00:00:00 [kworker/3:0]
root       6692  80878  0 13:08 ?        00:00:00 sleep 360
root       6768      2  0 13:09 ?        00:00:00 [kworker/3:1]
root       6778      2  0 13:09 ?        00:00:00 [kworker/u256:0]
user2   6921      1 43 13:10 ?        00:00:19 java -Xmx1g -Xms512m -Dlog4j.configuration=file:/extdrive/jobs/Cloud/resources/log4j.properties -jar /extdrive/jobs/Cloud/CloudGen
root       7091   1335  0 13:10 ?        00:00:00 sshd: siddarth [priv]
siddarth   7109      1  0 13:10 ?        00:00:00 /lib/systemd/systemd --user
siddarth   7110   7109  0 13:10 ?        00:00:00 (sd-pam)
siddarth   7208   7091  0 13:10 ?        00:00:00 sshd: siddarth@pts/0
siddarth   7210   7208  0 13:10 pts/0    00:00:00 -sh
root       7230   7210  0 13:10 pts/0    00:00:00 sudo su
root       7231   7230  0 13:10 pts/0    00:00:00 su
root       7232   7231  0 13:10 pts/0    00:00:00 bash
root       7246   7232  0 13:10 pts/0    00:00:00 ps -ef
root      14578      2  0 Sep11 ?        00:00:31 [inmwrkrd]
root      14579      2  0 Sep11 ?        00:00:00 [inmwrkrd]
root      14580      2  0 Sep11 ?        00:00:00 [inmsvcd]
user2  18761      1  0 Sep12 ?        01:03:27 java -Xmx1g -Xms1g -Dlog4j.configuration=file:/extdrive/jobs/manualRelated/manual_rel/resources/log4j.properties -jar /extdrive/jobs/manual
user1   39983      1  0 Sep18 ?        00:00:00 /lib/systemd/systemd --user
user1   39984  39983  0 Sep18 ?        00:00:00 (sd-pam)
root      40402      1  0 Jul17 ?        00:00:00 /usr/lib/policykit-1/polkitd --no-debug
user2  40818      1  0 Sep18 ?        00:46:29 java -Xmx1g -Xms512m -Dlog4j.configuration=file:/extdrive/jobs/FeedbackAnalyzer/resources/log4j.properties -javaagent:/extdrive/jobs
user2  40978      1  0 Sep18 ?        00:13:42 java -Xmx1g -Xms512m -Dlog4j.configuration=file:/extdrive/jobs/EventAnalyzer/resources/log4j.properties -javaagent:/extdrive/jobs/Sea
user2  41300      1  0 Sep18 ?        00:10:19 java -Xmx1g -Xms512m -Dlog4j.configuration=file:/extdrive/jobs/Auto/resources/log4j.properties -jar /extdrive/jobs/Auto/AutoT
user2  41439      1  0 Sep18 ?        01:33:04 java -Xmx1g -Xms512m -Dlog4j.configuration=file:/extdrive/jobs/DupWitKafkaConsumer/resources/log4j.properties -jar /extdrive/jobs/DupWitKaf
nagios    54761      1  0 Sep14 ?        00:00:01 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -f
user2  54911      1  1 Oct02 ?        00:33:14 java -Xmx1g -Xms512m -Dlog4j.configuration=file:/extdrive/jobs/userVectorBuilder/resources/log4j.properties -javaagent:/extdrive/jobs/userV
root      56684      2  0 Jul03 ?        00:00:00 [xfsalloc]
root      56690      2  0 Jul03 ?        00:00:00 [xfs_mru_cache]
root      65368      2  0 03:42 ?        00:00:00 [kworker/0:1]
root      80742      1  0 Sep11 ?        00:00:06 /usr/local/ASR/Vx/bin/appservice
root      80744      1  0 Sep11 ?        00:08:12 /usr/local/ASR/Vx/bin/svagents
root      80878      1  0 Sep11 ?        00:00:15 /bin/bash /usr/local/ASR/Vx/bin/uarespawndagent
root      84606  80744  0 06:14 ?        00:00:00 /usr/local/ASR/Vx/bin/s2 svagents
root      87552      2  0 06:36 ?        00:00:00 [kworker/1:1]
root      92120   1340  1 Sep11 ?        07:25:00 python3 -u bin/WALinuxAgent-2.2.31-py2.7.egg -run-exthandlers
root     109845  80744  0 09:29 ?        00:00:01 /usr/local/ASR/Vx/bin/evtcollforw /Environment A2A_Source
root     111634      2  0 09:42 ?        00:00:00 [kworker/2:1]
root     111643      2  0 09:42 ?        00:00:00 [kworker/3:3]
user2 125958      1  0 Sep13 ?        00:20:08 java -Xmx1g -Xms512m -Dlog4j.configuration=file:/extdrive/jobs/EventAnalyzer/resources/log4j.properties -javaagent:/extdrive/jobs/Sea
user2 126234      1  0 Sep12 ?        00:19:29 java -Xmx1g -Xms512m -Dlog4j.configuration=file:/extdrive/jobs/Duplicate/resources/log4j.properties -jar /extdrive/jobs/Duplicate/D

Re: How to monitor long running processes?

Posted: Thu Oct 04, 2018 8:47 am
by scottwilkerson
But which of these processes is the one you want to monitor?

Re: How to monitor long running processes?

Posted: Fri Oct 05, 2018 1:50 am
by Siddharth Hegde

Code: Select all

ps -ef
ps -ef
UID         PID   PPID  C STIME TTY          TIME CMD
root          1      0  0 Jun24 ?        00:01:11 /sbin/init
root          2      0  0 Jun24 ?        00:00:01 [kthreadd]
root          4      2  0 Jun24 ?        00:00:00 [kworker/0:0H]
root          6      2  0 Jun24 ?        00:00:00 [mm_percpu_wq]
root          7      2  0 Jun24 ?        00:00:22 [ksoftirqd/0]
root          8      2  0 Jun24 ?        00:18:09 [rcu_sched]
root          9      2  0 Jun24 ?        00:00:00 [rcu_bh]
root         10      2  0 Jun24 ?        00:00:05 [migration/0]
root         11      2  0 Jun24 ?        00:00:21 [watchdog/0]
root         12      2  0 Jun24 ?        00:00:00 [cpuhp/0]
root         13      2  0 Jun24 ?        00:00:00 [cpuhp/1]
root         14      2  0 Jun24 ?        00:00:19 [watchdog/1]
root         15      2  0 Jun24 ?        00:00:05 [migration/1]
root         16      2  0 Jun24 ?        00:00:23 [ksoftirqd/1]
root         18      2  0 Jun24 ?        00:00:00 [kworker/1:0H]
root         19      2  0 Jun24 ?        00:00:00 [cpuhp/2]
root         20      2  0 Jun24 ?        00:00:17 [watchdog/2]
root         21      2  0 Jun24 ?        00:00:05 [migration/2]
root         22      2  0 Jun24 ?        00:01:46 [ksoftirqd/2]
root         24      2  0 Jun24 ?        00:00:00 [kworker/2:0H]
root         25      2  0 Jun24 ?        00:00:00 [cpuhp/3]
root         26      2  0 Jun24 ?        00:00:18 [watchdog/3]
root         27      2  0 Jun24 ?        00:00:05 [migration/3]
root         28      2  0 Jun24 ?        00:00:25 [ksoftirqd/3]
root         30      2  0 Jun24 ?        00:00:00 [kworker/3:0H]
root         31      2  0 Jun24 ?        00:00:00 [kdevtmpfs]
root         32      2  0 Jun24 ?        00:00:00 [netns]
root         33      2  0 Jun24 ?        00:00:00 [rcu_tasks_kthre]
root         34      2  0 Jun24 ?        00:00:00 [kauditd]
root         36      2  0 Jun24 ?        00:00:06 [khungtaskd]
root         37      2  0 Jun24 ?        00:00:07 [oom_reaper]
root         38      2  0 Jun24 ?        00:00:00 [writeback]
root         39      2  0 Jun24 ?        00:00:00 [kcompactd0]
root         40      2  0 Jun24 ?        00:00:00 [ksmd]
root         41      2  0 Jun24 ?        00:03:09 [khugepaged]
root         42      2  0 Jun24 ?        00:00:00 [crypto]
root         43      2  0 Jun24 ?        00:00:00 [kintegrityd]
root         44      2  0 Jun24 ?        00:00:00 [kblockd]
root         45      2  0 Jun24 ?        00:00:00 [ata_sff]
root         46      2  0 Jun24 ?        00:00:00 [md]
root         47      2  0 Jun24 ?        00:00:00 [edac-poller]
root         48      2  0 Jun24 ?        00:00:00 [ib-comp-wq]
root         49      2  0 Jun24 ?        00:00:00 [ib_mcast]
root         50      2  0 Jun24 ?        00:00:00 [ib_nl_sa_wq]
root         51      2  0 Jun24 ?        00:00:00 [hv_vmbus_con]
root         52      2  0 Jun24 ?        00:00:00 [devfreq_wq]
root         53      2  0 Jun24 ?        00:00:00 [watchdogd]
root         56      2  0 Jun24 ?        00:41:37 [kswapd0]
root         57      2  0 Jun24 ?        00:00:00 [ecryptfs-kthrea]
root        100      2  0 Jun24 ?        00:00:00 [kthrotld]
root        101      2  0 Jun24 ?        00:00:00 [nfit]
root        102      2  0 Jun24 ?        00:00:00 [scsi_eh_0]
root        103      2  0 Jun24 ?        00:00:00 [scsi_tmf_0]
root        104      2  0 Jun24 ?        00:00:00 [storvsc_error_w]
root        105      2  0 Jun24 ?        00:00:00 [scsi_eh_1]
root        106      2  0 Jun24 ?        00:00:00 [scsi_tmf_1]
root        107      2  0 Jun24 ?        00:00:00 [storvsc_error_w]
root        108      2  0 Jun24 ?        00:00:00 [scsi_eh_2]
root        109      2  0 Jun24 ?        00:00:00 [scsi_tmf_2]
root        110      2  0 Jun24 ?        00:00:00 [storvsc_error_w]
root        111      2  0 Jun24 ?        00:00:00 [scsi_eh_3]
root        112      2  0 Jun24 ?        00:00:00 [scsi_tmf_3]
root        113      2  0 Jun24 ?        00:00:00 [storvsc_error_w]
root        117      2  0 Jun24 ?        00:00:16 [kworker/0:1H]
root        119      2  0 Jun24 ?        00:00:00 [scsi_eh_4]
root        120      2  0 Jun24 ?        00:00:00 [scsi_tmf_4]
root        121      2  0 Jun24 ?        00:00:00 [scsi_eh_5]
root        122      2  0 Jun24 ?        00:00:00 [scsi_tmf_5]
root        123      2  0 Jun24 ?        00:00:00 [mlx4]
root        125      2  0 Jun24 ?        00:00:00 [rdma_cm]
root        126      2  0 Jun24 ?        00:00:00 [mlx4_ib]
root        127      2  0 Jun24 ?        00:00:00 [mlx4_ib_mcg]
root        131      2  0 Jun24 ?        00:00:00 [ipv6_addrconf]
root        142      2  0 Jun24 ?        00:00:00 [kstrp]
root        160      2  0 Jun24 ?        00:00:19 [kworker/1:1H]
root        333      2  0 Jun24 ?        00:00:00 [raid5wq]
root        383      2  0 Jun24 ?        00:00:54 [jbd2/sda1-8]
root        384      2  0 Jun24 ?        00:00:00 [ext4-rsv-conver]
root        397      2  0 Jun24 ?        00:00:21 [kworker/3:1H]
root        430      2  0 Jun24 ?        00:00:32 [kworker/2:1H]
root        449      1  0 Jun24 ?        00:05:25 /lib/systemd/systemd-journald
root        458      2  0 Jun24 ?        00:00:00 [iscsi_eh]
root        473      1  0 Jun24 ?        00:00:00 /sbin/lvmetad -f
root        504      1  0 Jun24 ?        00:00:13 /lib/systemd/systemd-udevd
root        745      2  0 Jun24 ?        00:02:11 [hv_balloon]
systemd+    841      1  0 Jun24 ?        00:00:06 /lib/systemd/systemd-timesyncd
root        881      2  0 Jun24 ?        00:00:30 [jbd2/sdc1-8]
root        882      2  0 Jun24 ?        00:00:00 [ext4-rsv-conver]
root       1057      1  0 Jun24 ?        00:00:00 /sbin/dhclient -1 -v -pf /run/dhclient.eth0.pid -lf /var/lib/dhcp/dhclient.eth0.leases -I -df /var/lib/dhcp/dhclient6.eth0.leases eth0
root       1247      2  0 Jun24 ?        00:00:00 [jbd2/sdb1-8]
root       1248      2  0 Jun24 ?        00:00:00 [ext4-rsv-conver]
message+   1272      1  0 Jun24 ?        00:00:02 /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation
root       1273      1  0 Jun24 ?        00:02:03 /sbin/iscsid
root       1277      1  0 Jun24 ?        00:10:02 /sbin/iscsid
root       1287      1  0 Jun24 ?        00:00:06 /lib/systemd/systemd-logind
root       1294      1  0 Jun24 ?        00:00:00 /usr/sbin/acpid
unscd      1310      1  0 Jun24 ?        00:17:20 /usr/sbin/nscd -d
root       1326      1  0 Jun24 ?        00:01:05 /usr/bin/lxcfs /var/lib/lxcfs/
daemon     1333      1  0 Jun24 ?        00:00:00 /usr/sbin/atd -f
root       1334      1  0 Jun24 ?        00:02:13 /usr/lib/accountsservice/accounts-daemon
root       1335      1  0 Jun24 ?        00:01:02 /usr/sbin/sshd -D
root       1336      1  0 Jun24 ?        00:03:10 /usr/lib/snapd/snapd
root       1338      1  0 Jun24 ?        00:01:03 /usr/sbin/cron -f
root       1340      1  0 Jun24 ?        00:00:00 /usr/bin/python3 -u /usr/sbin/waagent -daemon
root       1400      1  0 Jun24 ?        00:15:53 /usr/bin/python2 /var/lib/waagent/Microsoft.OSTCExtensions.LinuxDiagnostic-2.3.9029/diagnostic.py -daemon
root       1582      1  0 Jun24 ttyS0    00:00:00 /sbin/agetty --keep-baud 115200 38400 9600 ttyS0 vt220
root       1585      1  0 Jun24 tty1     00:00:00 /sbin/agetty --noclear tty1 linux
root       1696      1  0 Jun24 ?        00:05:01 /usr/sbin/irqbalance --pid=/var/run/irqbalance.pid
root       1735      1  0 Jun24 ?        00:17:56 /opt/omi/bin/omiserver --configfile=/etc/opt/omi/conf/omiserver.conf -d
root       1739      1  0 Jun24 ?        00:00:00 /sbin/mdadm --monitor --pid-file /run/mdadm/monitor.pid --daemonise --scan --syslog
syslog     1906      1  0 Jun24 ?        00:02:34 /usr/sbin/rsyslogd -n
root       2051   1400  0 Jun24 ?        01:53:47 /var/lib/waagent/Microsoft.OSTCExtensions.LinuxDiagnostic-2.3.9029/bin/mdsd -A -C -c /var/lib/waagent/Microsoft.OSTCExtensions.LinuxDiagnosti
root       2232   1735  0 Jun24 ?        00:32:58 /opt/omi/bin/omiagent 9 11 --destdir / --providerdir /opt/omi/lib --idletimeout 90 --loglevel WARNING
root       2380      2  0 05:31 ?        00:00:00 [kworker/0:1]
root       2382      2  0 05:31 ?        00:00:00 [kworker/2:1]
root       2427      2  0 05:31 ?        00:00:00 [kworker/1:1]
root       3619      2  0 05:40 ?        00:00:00 [kworker/u256:2]
root       7327  80744  0 06:10 ?        00:00:00 /usr/local/ASR/Vx/bin/evtcollforw /Environment A2A_Source
root       7475      2  0 06:11 ?        00:00:00 [kworker/u256:1]
root      10503   1335  0 06:35 ?        00:00:00 sshd: siddarth [priv]
siddarth  10513      1  0 06:35 ?        00:00:00 /lib/systemd/systemd --user
siddarth  10514  10513  0 06:35 ?        00:00:00 (sd-pam)
siddarth  10611  10503  0 06:35 ?        00:00:00 sshd: siddarth@pts/0
siddarth  10613  10611  0 06:35 pts/0    00:00:00 -sh
root      10627  10613  0 06:35 pts/0    00:00:00 sudo su
root      10628  10627  0 06:35 pts/0    00:00:00 su
root      10629  10628  0 06:35 pts/0    00:00:00 bash
root      10674  10629  0 06:35 pts/0    00:00:00 sudo su user3
root      10675  10674  0 06:35 pts/0    00:00:00 su user3
user3  10676  10675  0 06:35 pts/0    00:00:00 bash
root      10723      2  0 Oct04 ?        00:00:00 [kworker/2:0]
root      10729      2  0 Oct04 ?        00:00:01 [kworker/0:2]
root      10914   1335  0 06:37 ?        00:00:00 sshd: user1 [priv]
user1    10918      1  0 06:37 ?        00:00:00 /lib/systemd/systemd --user
user1    10919  10918  0 06:37 ?        00:00:00 (sd-pam)
user1    10952  10914  0 06:37 ?        00:00:00 sshd: user1@pts/1
user1    10957  10952  0 06:37 pts/1    00:00:00 -sh
user1    10961  10957  0 06:37 pts/1    00:00:00 bash
root      10986  10961  0 06:37 pts/1    00:00:00 sudo su user3
root      10988  10986  0 06:37 pts/1    00:00:00 su user3
user3  10989  10988  0 06:37 pts/1    00:00:00 bash
root      11115      2  0 06:37 ?        00:00:00 [kworker/u256:0]
root      11189      2  0 06:38 ?        00:00:00 [kworker/u256:3]
root      11243  80878  0 06:38 ?        00:00:00 sleep 360
[b]user3  11936  10989 56 06:43 pts/1    00:00:14 java -jar Zeus.jar[/b]
user3  11999  10676  0 06:43 pts/0    00:00:00 ps -ef
root      14578      2  0 Sep11 ?        00:00:31 [inmwrkrd]
root      14579      2  0 Sep11 ?        00:00:00 [inmwrkrd]
root      14580      2  0 Sep11 ?        00:00:00 [inmsvcd]
user3  18761      1  0 Sep12 ?        01:05:32 java -Xmx1g -Xms1g -Dlog4j.configuration=file:/datadrive/jobs/manualRelated/manual_rel/resources/log4j.properties -jar /datadrive/jobs/manual
user2   39983      1  0 Sep18 ?        00:00:00 /lib/systemd/systemd --user
user2   39984  39983  0 Sep18 ?        00:00:00 (sd-pam)
root      40402      1  0 Jul17 ?        00:00:00 /usr/lib/policykit-1/polkitd --no-debug
nagios    54761      1  0 Sep14 ?        00:00:01 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -f
root      56684      2  0 Jul03 ?        00:00:00 [xfsalloc]
root      56690      2  0 Jul03 ?        00:00:00 [xfs_mru_cache]
root      80742      1  0 Sep11 ?        00:00:06 /usr/local/ASR/Vx/bin/appservice
root      80744      1  0 Sep11 ?        00:08:23 /usr/local/ASR/Vx/bin/svagents
root      80878      1  0 Sep11 ?        00:00:15 /bin/bash /usr/local/ASR/Vx/bin/uarespawndagent
root      84606  80744  0 Oct04 ?        00:00:01 /usr/local/ASR/Vx/bin/s2 svagents
root      92120   1340  1 Sep11 ?        07:36:58 python3 -u bin/WALinuxAgent-2.2.31-py2.7.egg -run-exthandlers
root      93022      2  0 00:17 ?        00:00:00 [kworker/3:2]
root     112109      2  0 02:46 ?        00:00:00 [kworker/3:1]
root     112110      2  0 02:46 ?        00:00:00 [kworker/1:0]
I want to monitor this one

Code: Select all

user3  11936  10989 56 06:43 pts/1    00:00:14 java -jar Zeus.jar

Re: How to monitor long running processes?

Posted: Fri Oct 05, 2018 7:15 am
by scottwilkerson

Code: Select all

/usr/local/nagios/libexec/check_procs -C java -a Zeus.jar -c 1:5
This would monitor the process and alert if there is less than 1 or more than 5 of these processes.

Re: How to monitor long running processes?

Posted: Fri Oct 05, 2018 8:04 am
by Siddharth Hegde
In Nagios client.cfg

Code: Select all

define service{
        use                             generic-service
        host_name                       server
        service_description             Processes
        check_command                   check_process
        } 
In client nrpe.cfg

Code: Select all

command[check_process]=/usr/local/nagios/libexec/check_procs -C java -a Zeus.jar -c 1:5
This is the configuration in nagios server and nrpe-server?

Re: How to monitor long running processes?

Posted: Fri Oct 05, 2018 9:14 am
by scottwilkerson
This would depend on how you defined check_process in nagios

A more common command would be

Code: Select all

define service{
        use                             generic-service
        host_name                       server
        service_description             Processes
        check_command                   check_nrpe!check_process
        } 
With the check_nrpe command setup like this

Code: Select all

define command {
    command_name    check_nrpe
    command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -t 30 -c $ARG1$ $ARG2$
}

Re: How to monitor long running processes?

Posted: Mon Oct 08, 2018 1:31 am
by Siddharth Hegde
This is the check_nrpe command in server

Code: Select all

define command{
        command_name check_nrpe
        command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
I'm using it to monitor around 15 servers. So, should I change it? If I change it, won't it affect other servers?

Is there any other way to monitor it without changing check_nrpe?

Re: How to monitor long running processes?

Posted: Mon Oct 08, 2018 1:24 pm
by cdienger
I would change it to:

Code: Select all

define command {
    command_name    check_nrpe
    command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -t 30 -c $ARG1$ $ARG2$
}
Yes, it would also change how other servers run checks but probably not in any destructive way - $ARG2$ doesn't always need to contain a value and -t 30 sets the timeout to 30 seconds.

Another option would be to duplicate the check_nrpe command and then configure the checks that need it to use it:

Code: Select all

define command {
    command_name    check_nrpe2
    command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -t 30 -c $ARG1$ $ARG2$
}