logstash failed action with response of 400, dropping action
Posted: Mon Mar 06, 2017 5:33 pm
Hello,
The logstash.log file is filling rapidly with very long messages beginning with "failed action with response of 400, dropping action". Literally 99.9% of the messages are like this:
I see this question has been asked before, so I tried the suggestion of changing the port in /etc/rsyslog.d/nagioslogserver.conf to this:
but that didn't seem to help.
This is not just a nuisance message - it's actually filling logstash.log up at a rate of >1G/hr (sometimes much faster) on the primary node.
The secondary node has no such messages in the log file.
Some things about this system:
2 node cluster, CentOS 7.3, NLS 1.4.4
Load average is light:
$ cat /etc/hosts
$ head -n100 /usr/local/nagioslogserver/logstash/etc/conf.d/*
Not sure if this is relevant, but in a 15-min snapshot, 11841 messages came from 127.0.0.1, with only 379 from the next highest host.
This problem started just a a few days ago, and I am not aware of any recent changes.
The logstash.log file is filling rapidly with very long messages beginning with "failed action with response of 400, dropping action". Literally 99.9% of the messages are like this:
Code: Select all
{:timestamp=>"2017-03-06T15:12:23.292000-0700", :message=>"failed action with response of 400, dropping action: [\"index\", {:_id=>nil, :_index=>\"logstash-2017.03.06\", :_type=>\"syslog\", :_routing=>nil}, #<LogStash::Event:0x49bd9288 @metadata_accessors=#<LogStash::Util::Accessors:0x77e9b08c @store={\"retry_count\"=>0}, @lut={}>, @cancelled=false, @data={\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, @metadata={\"retry_count\"=>0}, @accessors=#<LogStash::Util::Accessors:0x12f372ac @store={\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, @lut={\"type\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"type\"], \"host\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"host\"], \"message\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"message\"], \"priority\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"priority\"], \"timestamp\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"timestamp\"], \"logsource\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"logsource\"], \"program\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"program\"], \"tags\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"tags\"], \"severity\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"severity\"], \"facility\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"facility\"], \"timestamp8601\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"timestamp8601\"], \"@timestamp\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"@timestamp\"], \"facility_label\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"facility_label\"], \"severity_label\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"severity_label\"], \"[program]\"=>[{\"message\"=>\"blk_cloned_rq_check_limits: over max size limit.\\n\", \"@version\"=>\"1\", \"@timestamp\"=>\"2017-03-06T22:12:22.000Z\", \"type\"=>\"syslog\", \"host\"=>\"10.30.216.7\", \"priority\"=>3, \"timestamp\"=>\"Mar 6 15:12:22\", \"logsource\"=>\"nitrogen\", \"program\"=>\"kernel\", \"severity\"=>3, \"facility\"=>0, \"facility_label\"=>\"kernel\", \"severity_label\"=>\"Error\"}, \"program\"]}>>]", :level=>:warn}Code: Select all
*.* @@localhost:2057This is not just a nuisance message - it's actually filling logstash.log up at a rate of >1G/hr (sometimes much faster) on the primary node.
The secondary node has no such messages in the log file.
Some things about this system:
2 node cluster, CentOS 7.3, NLS 1.4.4
Load average is light:
Code: Select all
08:50:01 AM runq-sz plist-sz ldavg-1 ldavg-5 ldavg-15 blocked
...
Average: 8 695 0.36 0.36 0.36 0Code: Select all
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6$ head -n100 /usr/local/nagioslogserver/logstash/etc/conf.d/*
Code: Select all
==> /usr/local/nagioslogserver/logstash/etc/conf.d/000_inputs.conf <==
#
# Logstash Configuration File
# Dynamically created by Nagios Log Server
#
# DO NOT EDIT THIS FILE. IT WILL BE OVERWRITTEN.
#
# Created Wed, 22 Jun 2016 17:27:47 -0600
#
#
# Global inputs
#
input {
syslog {
type => 'syslog'
port => 5544
}
tcp {
type => 'eventlog'
port => 3515
codec => json {
charset => 'CP1252'
}
}
tcp {
type => 'import_raw'
tags => 'import_raw'
port => 2056
}
tcp {
type => 'import_json'
tags => 'import_json'
port => 2057
codec => json
}
syslog {
type => 'syslog'
port => 1514
}
http {
port => 8544
codec => 'json'
}
}
#
# Local inputs
#
==> /usr/local/nagioslogserver/logstash/etc/conf.d/500_filters.conf <==
#
# Logstash Configuration File
# Dynamically created by Nagios Log Server
#
# DO NOT EDIT THIS FILE. IT WILL BE OVERWRITTEN.
#
# Created Wed, 22 Jun 2016 17:27:47 -0600
#
#
# Global filters
#
filter {
if [program] == 'apache_access' {
grok {
match => [ 'message', '%{COMBINEDAPACHELOG}']
}
date {
match => [ 'timestamp', 'dd/MMM/yyyy:HH:mm:ss Z' ]
}
mutate {
replace => [ 'type', 'apache_access' ]
convert => [ 'bytes', 'integer' ]
convert => [ 'response', 'integer' ]
}
}
if [program] == 'apache_error' {
grok {
match => [ 'message', '\[(?<timestamp>%{DAY:day} %{MONTH:month} %{MONTHDAY} %{TIME} %{YEAR})\] \[%{WORD:class}\] \[%{WORD:originator} %{IP:clientip}\] %{GREEDYDATA:errmsg}']
}
mutate {
replace => [ 'type', 'apache_error' ]
}
}
if [program] == 'nginx_access' {
grok {
match => [ 'message', '%{COMBINEDAPACHELOG}']
}
date {
match => [ 'timestamp', 'dd/MMM/yyyy:HH:mm:ss Z' ]
}
mutate {
replace => [ 'type', 'nginx_access' ]
convert => [ 'bytes', 'integer' ]
convert => [ 'response', 'integer' ]
}
}
if [program] == 'nginx_error' {
grok {
match => [ 'message', '\[(?<timestamp>%{DAY:day} %{MONTH:month} %{MONTHDAY} %{TIME} %{YEAR})\] \[%{WORD:class}\] \[%{WORD:originator} %{IP:clientip}\] %{GREEDYDATA:errmsg}']
}
mutate {
replace => [ 'type', 'nginx_error' ]
}
}
}
#
# Local filters
#
==> /usr/local/nagioslogserver/logstash/etc/conf.d/999_outputs.conf <==
#
# Logstash Configuration File
# Dynamically created by Nagios Log Server
#
# DO NOT EDIT THIS FILE. IT WILL BE OVERWRITTEN.
#
# Created Wed, 22 Jun 2016 17:27:47 -0600
#
#
# Required output for Nagios Log Server
#
output {
elasticsearch {
cluster => '76900ee2-f769-413c-9948-850204a96b32'
host => 'localhost'
document_type => '%{type}'
node_name => '707a55e2-e8cb-4073-a800-2fbc5bd4c6df'
protocol => 'transport'
workers => 4
}
}
#
# Global outputs
#
#
# Local outputs
#
This problem started just a a few days ago, and I am not aware of any recent changes.