After several weeks of stability; My NLS cluster; Consisting of 4 servers; Began today to present an "New used memory from field would be larger than configured breaker" message.
Code: Select all
[2017-06-19 14:30:47,567][WARN ][indices.breaker ] [5c998cfb-0460-4e56-8697-83b65c086a13] [FIELDDATA] New used memory 19882309258 [18.5gb] from field [host.raw] would be larger than configured breaker: 19877383372 [18.5gb], breaking
[2017-06-19 14:30:47,570][WARN ][indices.breaker ] [5c998cfb-0460-4e56-8697-83b65c086a13] [FIELDDATA] New used memory 19883369828 [18.5gb] from field [host.raw] would be larger than configured breaker: 19877383372 [18.5gb], breaking
[2017-06-19 14:30:47,570][WARN ][indices.breaker ] [5c998cfb-0460-4e56-8697-83b65c086a13] [FIELDDATA] New used memory 19882113135 [18.5gb] from field [host.raw] would be larger than configured breaker: 19877383372 [18.5gb], breaking
[2017-06-19 14:30:49,253][WARN ][indices.breaker ] [5c998cfb-0460-4e56-8697-83b65c086a13] [FIELDDATA] New used memory 19883458282 [18.5gb] from field [@timestamp] would be larger than configured breaker: 19877383372 [18.5gb], breaking
[2017-06-19 14:30:49,264][WARN ][indices.breaker ] [5c998cfb-0460-4e56-8697-83b65c086a13] [FIELDDATA] New used memory 19883813218 [18.5gb] from field [@timestamp] would be larger than configured breaker: 19877383372 [18.5gb], breaking
[2017-06-19 14:30:49,364][WARN ][indices.breaker ] [5c998cfb-0460-4e56-8697-83b65c086a13] [FIELDDATA] New used memory 19884254761 [18.5gb] from field [@timestamp] would be larger than configured breaker: 19877383372 [18.5gb], breaking
[2017-06-19 14:30:49,642][WARN ][indices.breaker ] [5c998cfb-0460-4e56-8697-83b65c086a13] [FIELDDATA] New used memory 19887274414 [18.5gb] from field [@timestamp] would be larger than configured breaker: 19877383372 [18.5gb], breaking
[root@datalog-ugt-log2 ~]# free -m
total used free shared buff/cache available
Mem: 72314 35141 3748 3342 33424 33118
Swap: 4095 508 3587
[root@datalog-ugt-log2 ~]# cat /etc/sysconfig/elasticsearch | grep ^ES_HEAP_SIZE
ES_HEAP_SIZE=31g
[root@datalog-ugt-log2 ~]# curl -XGET 'localhost:9200/_cluster/health?pretty'
{
"cluster_name" : "a5726a09-769e-4f2b-be91-d786c8165c6f",
"status" : "green",
"timed_out" : false,
"number_of_nodes" : 4,
"number_of_data_nodes" : 4,
"active_primary_shards" : 166,
"active_shards" : 498,
"relocating_shards" : 1,
"initializing_shards" : 0,
"unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0
}
Code: Select all
[root@datalog-ugt-log2 ~]# curl -XGET 'localhost:9200/_nodes/jvm?pretty'
{
"cluster_name" : "a5726a09-769e-4f2b-be91-d786c8165c6f",
"nodes" : {
"9pp_1x92S6-hEhFP1n0__A" : {
"name" : "5c998cfb-0460-4e56-8697-83b65c086a13",
"transport_address" : "inet[/10.0.0.12:9300]",
"host" : "datalog-ugt-log2",
"ip" : "10.0.0.12",
"version" : "1.6.0",
"build" : "cdd3ac4",
"http_address" : "inet[localhost/127.0.0.1:9200]",
"attributes" : {
"max_local_storage_nodes" : "1"
},
"jvm" : {
"pid" : 13848,
"version" : "1.7.0_141",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "24.141-b02",
"vm_vendor" : "Oracle Corporation",
"start_time_in_millis" : 1496425042458,
"mem" : {
"heap_init_in_bytes" : 33285996544,
"heap_max_in_bytes" : 33128972288,
"non_heap_init_in_bytes" : 24313856,
"non_heap_max_in_bytes" : 224395264,
"direct_max_in_bytes" : 33128972288
},
"gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
"memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
}
},
"lhD0xAVESLWv_lecY6frhA" : {
"name" : "765cc658-3e5f-4923-804e-5eb57735f761",
"transport_address" : "inet[/10.0.0.21:9300]",
"host" : "datalog-utb-log1",
"ip" : "10.0.0.21",
"version" : "1.6.0",
"build" : "cdd3ac4",
"http_address" : "inet[localhost/127.0.0.1:9200]",
"attributes" : {
"max_local_storage_nodes" : "1"
},
"jvm" : {
"pid" : 19096,
"version" : "1.7.0_141",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "24.141-b02",
"vm_vendor" : "Oracle Corporation",
"start_time_in_millis" : 1496293179623,
"mem" : {
"heap_init_in_bytes" : 33285996544,
"heap_max_in_bytes" : 33128972288,
"non_heap_init_in_bytes" : 24313856,
"non_heap_max_in_bytes" : 224395264,
"direct_max_in_bytes" : 33128972288
},
"gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
"memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
}
},
"7swAa8gnSQy-_K5fhLA2BA" : {
"name" : "8471b9e1-1a82-4c3d-98bc-03f2ce871369",
"transport_address" : "inet[/10.0.0.11:9300]",
"host" : "datalog-ugt-log1",
"ip" : "10.0.0.11",
"version" : "1.6.0",
"build" : "cdd3ac4",
"http_address" : "inet[localhost/127.0.0.1:9200]",
"attributes" : {
"max_local_storage_nodes" : "1"
},
"jvm" : {
"pid" : 13484,
"version" : "1.7.0_141",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "24.141-b02",
"vm_vendor" : "Oracle Corporation",
"start_time_in_millis" : 1496421653019,
"mem" : {
"heap_init_in_bytes" : 33285996544,
"heap_max_in_bytes" : 33128972288,
"non_heap_init_in_bytes" : 24313856,
"non_heap_max_in_bytes" : 224395264,
"direct_max_in_bytes" : 33128972288
},
"gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
"memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
}
},
"2F8aaAFlQbSjVb4sYyd-xw" : {
"name" : "8d4f2dfb-f10c-4655-a4b7-8b5eaa9f6a3c",
"transport_address" : "inet[/10.0.0.22:9300]",
"host" : "datalog-utb-log2",
"ip" : "10.0.0.22",
"version" : "1.6.0",
"build" : "cdd3ac4",
"http_address" : "inet[localhost/127.0.0.1:9200]",
"attributes" : {
"max_local_storage_nodes" : "1"
},
"jvm" : {
"pid" : 24838,
"version" : "1.7.0_141",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "24.141-b02",
"vm_vendor" : "Oracle Corporation",
"start_time_in_millis" : 1496293351712,
"mem" : {
"heap_init_in_bytes" : 33285996544,
"heap_max_in_bytes" : 33128972288,
"non_heap_init_in_bytes" : 24313856,
"non_heap_max_in_bytes" : 224395264,
"direct_max_in_bytes" : 33128972288
},
"gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
"memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
}
}
}
}
To get around the problem I needed to close 10 logs.
However, my environment receives so many logs, and probably the size of the logs will increase.
Memory, CPU, IO, we have resources left over. I would like an help to set up NLS properly, so we can work with more open logs. What do we need to do?
Code: Select all
[root@datalog-ugt-log2 ~]# curl -s -XGET 'localhost:9200/_cat/indices?v' | sort
close logstash-2017.05.09
close logstash-2017.05.10
close logstash-2017.05.11
close logstash-2017.05.12
close logstash-2017.05.13
close logstash-2017.05.14
close logstash-2017.05.15
close logstash-2017.05.16
close logstash-2017.05.17
close logstash-2017.05.18
close logstash-2017.05.19
green open kibana-int 5 2 108 3 1.7mb 539.9kb
green open logstash-2017.05.20 5 2 26606540 0 87.2gb 29gb
green open logstash-2017.05.21 5 2 26624164 0 84gb 28gb
green open logstash-2017.05.22 5 2 41441696 0 136.5gb 45.5gb
green open logstash-2017.05.23 5 2 37812347 0 124.3gb 41.4gb
green open logstash-2017.05.24 5 2 43097009 0 137.3gb 45.7gb
green open logstash-2017.05.25 5 2 40294483 0 132.9gb 44.3gb
green open logstash-2017.05.26 5 2 40041142 0 130.1gb 43.3gb
green open logstash-2017.05.27 5 2 24195671 0 77gb 25.6gb
green open logstash-2017.05.28 5 2 23589158 0 73.7gb 24.5gb
green open logstash-2017.05.29 5 2 40094850 0 131.2gb 43.9gb
green open logstash-2017.05.30 5 2 41718756 0 135.7gb 45.2gb
green open logstash-2017.05.31 5 2 42482940 0 135.6gb 45.2gb
green open logstash-2017.06.01 5 2 36646468 0 123.1gb 41gb
green open logstash-2017.06.02 5 2 40674580 0 133gb 44.3gb
green open logstash-2017.06.03 5 2 29353391 0 93.7gb 31.2gb
green open logstash-2017.06.04 5 2 27326270 0 84.6gb 28.2gb
green open logstash-2017.06.05 5 2 44021973 0 137.3gb 45.7gb
green open logstash-2017.06.06 5 2 44203740 0 146.3gb 48.8gb
green open logstash-2017.06.07 5 2 42019441 0 139.1gb 46.4gb
green open logstash-2017.06.08 5 2 41688743 0 137.5gb 45.8gb
green open logstash-2017.06.09 5 2 40190230 0 133gb 44.3gb
green open logstash-2017.06.10 5 2 27519036 0 92.9gb 30.9gb
green open logstash-2017.06.11 5 2 27408543 0 92.7gb 30.9gb
green open logstash-2017.06.12 5 2 40310201 0 134gb 44.6gb
green open logstash-2017.06.13 5 2 40307224 0 134.3gb 44.7gb
green open logstash-2017.06.14 5 2 40969701 0 136gb 45.3gb
green open logstash-2017.06.15 5 2 27337539 0 89.7gb 29.9gb
green open logstash-2017.06.16 5 2 37181105 0 124.8gb 41.5gb
green open logstash-2017.06.17 5 2 25845080 0 84.7gb 28.2gb
green open logstash-2017.06.18 5 2 27413419 0 90.1gb 30gb
green open logstash-2017.06.19 5 2 28416635 0 97.5gb 32gb
green open nagioslogserver 1 2 120 6 667.6kb 219.2kb
green open nagioslogserver_log 5 2 354906 0 176.9mb 63.2mb
health status index pri rep docs.count docs.deleted store.size pri.store.size