]> git.uio.no Git - uio-zabbix.git/blame - zabbix_elasticsearch_node_stats.py
Use env python header + Upgrade code to support python3 [GID-1248]
[uio-zabbix.git] / zabbix_elasticsearch_node_stats.py
CommitLineData
753b94ce 1#!/usr/bin/env python
d0022abf
RM
2#
3# Authors:
4# rafael@postgresql.org.es / http://www.postgresql.org.es/
5#
6# Copyright (c) 2016 USIT-University of Oslo
7#
8# zabbix_elasticsearch_node_stats.py: Used by zabbix_agent to pull
9# elasticsearch cluster health information from an ES cluster and send
10# this information to Zabbix via trappers.
11#
12# zabbix_elasticsearch_node_stats.py is free software: you can
13# redistribute it and/or modify it under the terms of the GNU General
14# Public License as published by the Free Software Foundation, either
15# version 3 of the License, or (at your option) any later version.
16#
17# zabbix_elasticsearch_node_stats.py is distributed in the hope
18# that it will be useful, but WITHOUT ANY WARRANTY; without even the
19# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20# PURPOSE. See the GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with sms_send. If not, see <http://www.gnu.org/licenses/>.
24#
25
26#
27# This script gets ES node stats information from the ES rest API,
28# extracts the parameters we have defined in stats_keys{} and sends
29# data back to zabbix via zabbix_sender to defined trap-items.
30#
31# The script is executed via zabbix_agent and is defined in an
32# UserParameter that will return 0 (execution OK) or 1 (execution
33# ERROR) so zabbix can register if it cannot get data from ES.
34#
35# UserParameter=get.es.node.stats[*],/usr/bin/zabbix_elasticsearch_node_stats.py $1
36#
37
38import requests
39import json
40import sys
41import os
42import time
43import tempfile
44
45# Elasticsearch clients
753b94ce 46elasticsearch_clients = ['es-client.uio.no']
d0022abf
RM
47
48# Zabbix proxy
b5316abe 49zabbix_proxy = ['zabbix-proxy-prod03.uio.no','zabbix-proxy-prod04.uio.no']
d0022abf
RM
50
51# Path to zabbix_sender
52zabbix_sender = '/usr/bin/zabbix_sender'
53
54# Temp file with full json output
55tmp_stat_file = tempfile.NamedTemporaryFile(delete=False,dir='/tmp')
56
57# Item prefix
58item_prefix = 'es.node.'
59
60# keys for health page
61stats_keys = {
62 "indices.docs.count",
63 "indices.docs.deleted",
64 "indices.store.size_in_bytes",
65 "indices.store.throttle_time_in_millis",
66 "indices.indexing.index_total",
67 "indices.indexing.index_time_in_millis",
68 "indices.indexing.index_current",
69 "indices.indexing.index_failed",
70 "indices.indexing.delete_total",
71 "indices.indexing.delete_time_in_millis",
72 "indices.indexing.delete_current",
73 "indices.get.total",
74 "indices.get.time_in_millis",
75 "indices.get.exists_total",
76 "indices.get.exists_time_in_millis",
77 "indices.get.missing_total",
78 "indices.get.missing_time_in_millis",
79 "indices.get.current",
80 "indices.search.open_contexts",
81 "indices.search.query_total",
82 "indices.search.query_time_in_millis",
83 "indices.search.query_current",
84 "indices.search.fetch_total",
85 "indices.search.fetch_time_in_millis",
86 "indices.search.fetch_current",
87 "indices.merges.current",
88 "indices.merges.current_docs",
89 "indices.merges.total_size_in_bytes",
90 "indices.merges.total",
91 "indices.merges.total_time_in_millis",
92 "indices.merges.total_docs",
93 "indices.merges.total_size_in_bytes",
94 "indices.merges.total_stopped_time_in_millis",
b5316abe
MO
95 "indices.query_cache.memory_size_in_bytes",
96 "indices.query_cache.total_count",
97 "indices.query_cache.hit_count",
98 "indices.query_cache.miss_count",
99 "indices.query_cache.cache_size",
100 "indices.query_cache.cache_count",
101 "indices.query_cache.evictions",
d0022abf
RM
102 "indices.fielddata.memory_size_in_bytes",
103 "indices.fielddata.evictions",
104 "indices.segments.count",
b5316abe
MO
105 "indices.segments.memory_in_bytes",
106 "indices.segments.terms_memory_in_bytes",
107 "indices.segments.stored_fields_memory_in_bytes",
108 "indices.segments.norms_memory_in_bytes",
109 "indices.segments.points_memory_in_bytes",
110 "indices.segments.doc_values_memory_in_bytes",
111 "indices.segments.index_writer_memory_in_bytes",
112 "indices.segments.version_map_memory_in_bytes",
113 "indices.segments.fixed_bit_set_memory_in_bytes",
114 "indices.translog.operations",
115 "indices.translog.size_in_bytes",
116 "indices.request_cache.memory_size_in_bytes",
117 "indices.request_cache.evictions",
118 "indices.request_cache.hit_count",
119 "indices.request_cache.miss_count",
d0022abf
RM
120 "process.open_file_descriptors",
121 "process.max_file_descriptors",
122 "jvm.mem.heap_used_in_bytes",
123 "jvm.mem.heap_used_percent",
124 "jvm.mem.heap_committed_in_bytes",
125 "jvm.mem.heap_max_in_bytes",
126 "jvm.mem.non_heap_used_in_bytes",
127 "jvm.mem.non_heap_committed_in_bytes",
b5316abe
MO
128 "jvm.mem.pools.young.used_in_bytes",
129 "jvm.mem.pools.young.max_in_bytes",
130 "jvm.mem.pools.young.peak_used_in_bytes",
131 "jvm.mem.pools.young.peak_max_in_bytes",
132 "jvm.mem.pools.survivor.used_in_bytes",
133 "jvm.mem.pools.survivor.max_in_bytes",
134 "jvm.mem.pools.survivor.peak_used_in_bytes",
135 "jvm.mem.pools.survivor.peak_max_in_bytes",
136 "jvm.mem.pools.old.used_in_bytes",
137 "jvm.mem.pools.old.max_in_bytes",
138 "jvm.mem.pools.old.peak_used_in_bytes",
139 "jvm.mem.pools.old.peak_max_in_bytes",
d0022abf
RM
140 "jvm.threads.count",
141 "jvm.threads.peak_count",
142 "jvm.gc.collectors.young.collection_count",
143 "jvm.gc.collectors.young.collection_time_in_millis",
144 "jvm.gc.collectors.old.collection_count",
145 "jvm.gc.collectors.old.collection_time_in_millis",
b5316abe
MO
146 "jvm.buffer_pools.direct.count",
147 "jvm.buffer_pools.direct.used_in_bytes",
148 "jvm.buffer_pools.direct.total_capacity_in_bytes",
149 "jvm.buffer_pools.mapped.count",
150 "jvm.buffer_pools.mapped.used_in_bytes",
151 "jvm.buffer_pools.mapped.total_capacity_in_bytes",
d0022abf
RM
152 "transport.server_open",
153 "transport.rx_count",
154 "transport.rx_size_in_bytes",
155 "transport.tx_count",
eed5ea63
RM
156 "transport.tx_size_in_bytes",
157 "thread_pool.index.threads",
158 "thread_pool.index.active",
159 "thread_pool.index.queue",
160 "thread_pool.index.rejected",
161 "thread_pool.index.completed",
162 "thread_pool.bulk.threads",
163 "thread_pool.bulk.active",
164 "thread_pool.bulk.queue",
165 "thread_pool.bulk.rejected",
166 "thread_pool.bulk.completed",
167 "thread_pool.search.threads",
168 "thread_pool.search.active",
169 "thread_pool.search.queue",
170 "thread_pool.search.rejected",
171 "thread_pool.search.completed",
172 "thread_pool.flush.threads",
173 "thread_pool.flush.active",
174 "thread_pool.flush.queue",
175 "thread_pool.flush.rejected",
176 "thread_pool.flush.completed",
177 "thread_pool.management.threads",
178 "thread_pool.management.active",
179 "thread_pool.management.queue",
180 "thread_pool.management.rejected",
181 "thread_pool.management.completed",
182 "thread_pool.warmer.threads",
183 "thread_pool.warmer.active",
184 "thread_pool.warmer.queue",
185 "thread_pool.warmer.rejected",
186 "thread_pool.warmer.completed",
187 "thread_pool.refresh.threads",
188 "thread_pool.refresh.active",
189 "thread_pool.refresh.queue",
190 "thread_pool.refresh.rejected",
191 "thread_pool.refresh.completed",
192 "thread_pool.generic.threads",
193 "thread_pool.generic.active",
194 "thread_pool.generic.queue",
195 "thread_pool.generic.rejected",
b5316abe
MO
196 "thread_pool.generic.completed",
197 "breakers.request.limit_size_in_bytes",
198 "breakers.request.estimated_size_in_bytes",
199 "breakers.request.overhead",
200 "breakers.request.tripped",
201 "breakers.fielddata.limit_size_in_bytes",
202 "breakers.fielddata.estimated_size_in_bytes",
203 "breakers.fielddata.overhead",
204 "breakers.fielddata.tripped",
205 "breakers.in_flight_requests.limit_size_in_bytes",
206 "breakers.in_flight_requests.estimated_size_in_bytes",
207 "breakers.in_flight_requests.overhead",
208 "breakers.in_flight_requests.tripped",
209 "breakers.parent.limit_size_in_bytes",
210 "breakers.parent.estimated_size_in_bytes",
211 "breakers.parent.overhead",
212 "breakers.parent.tripped"
d0022abf
RM
213 }
214
215# ############################################
216# getKeys()
217# ############################################
218
219def getKeys(json_data,keys,node_fqdn):
220 result=''
221
222 for key in keys:
223 attributes=key.split('.')
224 value=json_data
225
226 for index in range(len(attributes)):
227 value=value.get(attributes.pop(0),{})
228
229 if value=={}:
230 continue
231
232 result += node_fqdn + ' ' + item_prefix + "{0} {1}\n".format(key,value)
233
234 return result
235
236
237# ############################################
238# get_node_stats_data()
239# ############################################
240
241def get_node_stats_data(node_fqdn):
242 """
243 Get ES node stats data
244 """
245
246 #
247 # We try all ES clients defined in elasticsearch_clients[] until
248 # one of them returns an ansver with a 200 status code.
249 #
250
251 for client in elasticsearch_clients:
252
253 try:
353b7de6 254 request_data = requests.get("http://" + client + ":9200/_nodes/" + node_fqdn + "/stats")
d0022abf
RM
255
256 if request_data.status_code != 200:
257 continue
258
259 stats_data = request_data.json()
260
261 break
262
263 except Exception:
264 pass
265
266 try:
267
268 for node_id in stats_data['nodes']:
269 result = getKeys(stats_data['nodes'][node_id],stats_keys,node_fqdn)
270
753b94ce
RMG
271 except Exception as e:
272 print ("1")
d0022abf
RM
273 sys.exit(1)
274
275 return result
276
277# ############################################
278# Main
279# ############################################
280
281if __name__ == '__main__':
282
283 try:
284
285 if len(sys.argv) == 2:
286
287 node_fqdn = sys.argv[1].lower()
288
289 result = get_node_stats_data(node_fqdn)
290
291 #
292 # We create a file with the data that zabbix_sender will
293 # send in a bulk execution.
294 #
295
296 with open(tmp_stat_file.name,'w') as f:
297 f.write(result)
298
299 #
300 # The monitoring of this host can be done by any of the
301 # zabbix proxyer defined in zabbix_proxy[]. We try all of
302 # them until one of them accepts our data
303 #
304
305 for proxy in zabbix_proxy:
306 returncode = os.system(zabbix_sender + ' -z ' + proxy + ' -i ' + tmp_stat_file.name + ' > /dev/null 2>&1')
307
308 if returncode == 0:
309 break
310
311 if returncode != 0:
753b94ce 312 print ("1")
d0022abf
RM
313 sys.exit(1)
314
315 else:
753b94ce 316 print ("1")
d0022abf
RM
317 sys.exit(1)
318
753b94ce
RMG
319 except Exception as e:
320 print ("1")
d0022abf
RM
321 sys.exit(1)
322
323 # Delete temp file with zabbix_sender data
324 os.remove(tmp_stat_file.name)
325
326 # Return value 0 = execution OK
753b94ce 327 print ("0")