jolson wrote:How much memory do you have on each node? Could we get some detail about the hardware involved here?
Also, please run the following commands from one of your nodes.
Code: Select all
cat /etc/sysconfig/elasticsearch
curl -XGET localhost:9200/_nodes/jvm?pretty
Each Node has 8 VCPU, 16GB RAM
These are the processes and memory allocations for elasticSearch and logstash
nagios 1312 1 21 May20 ? 04:36:43 /usr/bin/java -Xms8g -Xmx8g -Xss256k -Djava.awt.headless=true -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly -XX:+HeapDumpOnOutOfMemoryError -XX:+DisableExplicitGC -Des.cluster.name=d5d524d3-1f75-4846-aebd-043e13b0bdb9 -Des.node.name=00a07fa5-7288-4d96-880c-474285dad017 -Des.discovery.zen.ping.unicast.hosts=localhost,10.67.1.246,10.68.1.246,10.68.1.147,10.67.1.247 -Delasticsearch -Des.pidfile=/var/run/elasticsearch/elasticsearch.pid -Des.path.home=/usr/local/nagioslogserver/elasticsearch -cp :/usr/local/nagioslogserver/elasticsearch/lib/elasticsearch-1.3.2.jar:/usr/local/nagioslogserver/elasticsearch/lib/*:/usr/local/nagioslogserver/elasticsearch/lib/sigar/* -Des.default.path.home=/usr/local/nagioslogserver/elasticsearch -Des.default.path.logs=/var/log/elasticsearch -Des.default.path.data=/usr/local/nagioslogserver/elasticsearch/data -Des.default.path.work=/usr/local/nagioslogserver/tmp/elasticsearch -Des.default.path.conf=/usr/local/nagioslogserver/elasticsearch/config org.elasticsearch.bootstrap.Elasticsearch
root 1379 1 0 May20 ? 00:00:00 runuser -s /bin/sh -c exec /usr/local/nagioslogserver/logstash/bin/logstash agent -f /usr/local/nagioslogserver/logstash/etc/conf.d -l /var/log/logstash/logstash.log -w 4 nagios
nagios 1381 1379 99 May20 ? 1-00:52:45 /usr/bin/java -Djava.io.tmpdir=/usr/local/nagioslogserver/tmp -Xmx2g -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -Djava.awt.headless=true -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly -jar /usr/local/nagioslogserver/logstash/vendor/jar/jruby-complete-1.7.11.jar -I/usr/local/nagioslogserver/logstash/lib /usr/local/nagioslogserver/logstash/lib/logstash/runner.rb agent -f /usr/local/nagioslogserver/logstash/etc/conf.d -l /var/log/logstash/logstash.log -w 4
cat /etc/sysconfig/elasticsearch
# Directory where the Elasticsearch binary distribution resides
APP_DIR="/usr/local/nagioslogserver"
ES_HOME="$APP_DIR/elasticsearch"
# Heap Size (defaults to 256m min, 1g max)
ES_HEAP_SIZE=8g
# Heap new generation
#ES_HEAP_NEWSIZE=
# max direct memory
#ES_DIRECT_SIZE=
# Additional Java OPTS
#ES_JAVA_OPTS=
# Maximum number of open files
MAX_OPEN_FILES=65535
# Maximum amount of locked memory
#MAX_LOCKED_MEMORY=
# Maximum number of VMA (Virtual Memory Areas) a process can own
MAX_MAP_COUNT=262144
# Elasticsearch log directory
LOG_DIR=/var/log/elasticsearch
# Elasticsearch data directory
DATA_DIR="$ES_HOME/data"
# Elasticsearch work directory
WORK_DIR="$APP_DIR/tmp/elasticsearch"
# Elasticsearch conf directory
CONF_DIR="$ES_HOME/config"
# Elasticsearch configuration file (elasticsearch.yml)
CONF_FILE="$ES_HOME/config/elasticsearch.yml"
# User to run as, change this to a specific elasticsearch user if possible
# Also make sure, this user can write into the log directories in case you change them
# This setting only works for the init script, but has to be configured separately for systemd startup
ES_USER=nagios
ES_GROUP=nagios
# Configure restart on package upgrade (true, every other setting will lead to not restarting)
#RESTART_ON_UPGRADE=true
if [ "x$1" == "xstart" -o "x$1" == "xrestart" -o "x$1" == "xreload" -o "x$1" == "xforce-reload" ];then
GET_ES_CONFIG_MESSAGE="$( php $APP_DIR/scripts/get_es_config.php )"
GET_ES_CONFIG_RETURN=$?
if [ "$GET_ES_CONFIG_RETURN" != "0" ]; then
echo $GET_ES_CONFIG_MESSAGE
exit 1
else
ES_JAVA_OPTS="$GET_ES_CONFIG_MESSAGE"
fi
fi
curl -XGET localhost:9200/_nodes/jvm?pretty
{
"cluster_name" : "d5d524d3-1f75-4846-aebd-043e13b0bdb9",
"nodes" : {
"cuNvwfnFQ1uEiZs4bgdkbg" : {
"name" : "78a3bc74-18f9-46c6-a763-267d4860c047",
"transport_address" : "inet[/xx.xx.1.246:9300]",
"host" : "pden2nls1",
"ip" : "xx.xx.1.246",
"version" : "1.3.2",
"build" : "dee175d",
"http_address" : "inet[localhost/127.0.0.1:9200]",
"attributes" : {
"max_local_storage_nodes" : "1"
},
"jvm" : {
"pid" : 1294,
"version" : "1.7.0_71",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "24.65-b04",
"vm_vendor" : "Oracle Corporation",
"start_time_in_millis" : 1432158624231,
"mem" : {
"heap_init_in_bytes" : 8589934592,
"heap_max_in_bytes" : 8520204288,
"non_heap_init_in_bytes" : 24313856,
"non_heap_max_in_bytes" : 224395264,
"direct_max_in_bytes" : 8520204288
},
"gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
"memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
}
},
"VxB_0AoTRD-JB1N88tQbyw" : {
"name" : "617c6cbc-2cb5-4d2b-aae3-445ce04d3040",
"transport_address" : "inet[/xx.xx.1.247:9300]",
"host" : "pbur2nls2",
"ip" : "xx.xx.1.247",
"version" : "1.3.2",
"build" : "dee175d",
"http_address" : "inet[localhost/127.0.0.1:9200]",
"attributes" : {
"max_local_storage_nodes" : "1"
},
"jvm" : {
"pid" : 1302,
"version" : "1.7.0_71",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "24.65-b04",
"vm_vendor" : "Oracle Corporation",
"start_time_in_millis" : 1432136510138,
"mem" : {
"heap_init_in_bytes" : 8589934592,
"heap_max_in_bytes" : 8520204288,
"non_heap_init_in_bytes" : 24313856,
"non_heap_max_in_bytes" : 224395264,
"direct_max_in_bytes" : 8520204288
},
"gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
"memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
}
},
"OiKmhgfmQsaQqV5EtVeIvg" : {
"name" : "00a07fa5-7288-4d96-880c-474285dad017",
"transport_address" : "inet[/xx.xx.1.246:9300]",
"host" : "pbur2nls1",
"ip" : "xx.xx.1.246",
"version" : "1.3.2",
"build" : "dee175d",
"http_address" : "inet[localhost/127.0.0.1:9200]",
"attributes" : {
"max_local_storage_nodes" : "1"
},
"jvm" : {
"pid" : 1312,
"version" : "1.7.0_71",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "24.65-b04",
"vm_vendor" : "Oracle Corporation",
"start_time_in_millis" : 1432158392726,
"mem" : {
"heap_init_in_bytes" : 8589934592,
"heap_max_in_bytes" : 8520204288,
"non_heap_init_in_bytes" : 24313856,
"non_heap_max_in_bytes" : 224395264,
"direct_max_in_bytes" : 8520204288
},
"gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
"memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
}
},
"NpWXPspTTqOrfBdy3FMAtA" : {
"name" : "18bf7360-254a-43fd-9ecf-9540cfc864a1",
"transport_address" : "inet[/xx.xx.1.147:9300]",
"host" : "pden2nls2",
"ip" : "xx.xx.1.147",
"version" : "1.3.2",
"build" : "dee175d",
"http_address" : "inet[localhost/127.0.0.1:9200]",
"attributes" : {
"max_local_storage_nodes" : "1"
},
"jvm" : {
"pid" : 1302,
"version" : "1.7.0_71",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "24.65-b04",
"vm_vendor" : "Oracle Corporation",
"start_time_in_millis" : 1432158927712,
"mem" : {
"heap_init_in_bytes" : 8589934592,
"heap_max_in_bytes" : 8520204288,
"non_heap_init_in_bytes" : 24313856,
"non_heap_max_in_bytes" : 224395264,
"direct_max_in_bytes" : 8520204288
},
"gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
"memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
}
}
}
}