]>
Commit | Line | Data |
---|---|---|
753b94ce | 1 | #!/usr/bin/env python |
d0022abf RM |
2 | # |
3 | # Authors: | |
4 | # rafael@postgresql.org.es / http://www.postgresql.org.es/ | |
5 | # | |
6 | # Copyright (c) 2016 USIT-University of Oslo | |
7 | # | |
8 | # zabbix_elasticsearch_node_stats.py: Used by zabbix_agent to pull | |
9 | # elasticsearch cluster health information from an ES cluster and send | |
10 | # this information to Zabbix via trappers. | |
11 | # | |
12 | # zabbix_elasticsearch_node_stats.py is free software: you can | |
13 | # redistribute it and/or modify it under the terms of the GNU General | |
14 | # Public License as published by the Free Software Foundation, either | |
15 | # version 3 of the License, or (at your option) any later version. | |
16 | # | |
17 | # zabbix_elasticsearch_node_stats.py is distributed in the hope | |
18 | # that it will be useful, but WITHOUT ANY WARRANTY; without even the | |
19 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
20 | # PURPOSE. See the GNU General Public License for more details. | |
21 | # | |
22 | # You should have received a copy of the GNU General Public License | |
23 | # along with sms_send. If not, see <http://www.gnu.org/licenses/>. | |
24 | # | |
25 | ||
26 | # | |
27 | # This script gets ES node stats information from the ES rest API, | |
28 | # extracts the parameters we have defined in stats_keys{} and sends | |
29 | # data back to zabbix via zabbix_sender to defined trap-items. | |
30 | # | |
31 | # The script is executed via zabbix_agent and is defined in an | |
32 | # UserParameter that will return 0 (execution OK) or 1 (execution | |
33 | # ERROR) so zabbix can register if it cannot get data from ES. | |
34 | # | |
35 | # UserParameter=get.es.node.stats[*],/usr/bin/zabbix_elasticsearch_node_stats.py $1 | |
36 | # | |
37 | ||
38 | import requests | |
39 | import json | |
40 | import sys | |
41 | import os | |
42 | import time | |
43 | import tempfile | |
44 | ||
45 | # Elasticsearch clients | |
753b94ce | 46 | elasticsearch_clients = ['es-client.uio.no'] |
d0022abf RM |
47 | |
48 | # Zabbix proxy | |
b5316abe | 49 | zabbix_proxy = ['zabbix-proxy-prod03.uio.no','zabbix-proxy-prod04.uio.no'] |
d0022abf RM |
50 | |
51 | # Path to zabbix_sender | |
52 | zabbix_sender = '/usr/bin/zabbix_sender' | |
53 | ||
54 | # Temp file with full json output | |
55 | tmp_stat_file = tempfile.NamedTemporaryFile(delete=False,dir='/tmp') | |
56 | ||
57 | # Item prefix | |
58 | item_prefix = 'es.node.' | |
59 | ||
60 | # keys for health page | |
61 | stats_keys = { | |
62 | "indices.docs.count", | |
63 | "indices.docs.deleted", | |
64 | "indices.store.size_in_bytes", | |
65 | "indices.store.throttle_time_in_millis", | |
66 | "indices.indexing.index_total", | |
67 | "indices.indexing.index_time_in_millis", | |
68 | "indices.indexing.index_current", | |
69 | "indices.indexing.index_failed", | |
70 | "indices.indexing.delete_total", | |
71 | "indices.indexing.delete_time_in_millis", | |
72 | "indices.indexing.delete_current", | |
73 | "indices.get.total", | |
74 | "indices.get.time_in_millis", | |
75 | "indices.get.exists_total", | |
76 | "indices.get.exists_time_in_millis", | |
77 | "indices.get.missing_total", | |
78 | "indices.get.missing_time_in_millis", | |
79 | "indices.get.current", | |
80 | "indices.search.open_contexts", | |
81 | "indices.search.query_total", | |
82 | "indices.search.query_time_in_millis", | |
83 | "indices.search.query_current", | |
84 | "indices.search.fetch_total", | |
85 | "indices.search.fetch_time_in_millis", | |
86 | "indices.search.fetch_current", | |
87 | "indices.merges.current", | |
88 | "indices.merges.current_docs", | |
89 | "indices.merges.total_size_in_bytes", | |
90 | "indices.merges.total", | |
91 | "indices.merges.total_time_in_millis", | |
92 | "indices.merges.total_docs", | |
93 | "indices.merges.total_size_in_bytes", | |
94 | "indices.merges.total_stopped_time_in_millis", | |
b5316abe MO |
95 | "indices.query_cache.memory_size_in_bytes", |
96 | "indices.query_cache.total_count", | |
97 | "indices.query_cache.hit_count", | |
98 | "indices.query_cache.miss_count", | |
99 | "indices.query_cache.cache_size", | |
100 | "indices.query_cache.cache_count", | |
101 | "indices.query_cache.evictions", | |
d0022abf RM |
102 | "indices.fielddata.memory_size_in_bytes", |
103 | "indices.fielddata.evictions", | |
104 | "indices.segments.count", | |
b5316abe MO |
105 | "indices.segments.memory_in_bytes", |
106 | "indices.segments.terms_memory_in_bytes", | |
107 | "indices.segments.stored_fields_memory_in_bytes", | |
108 | "indices.segments.norms_memory_in_bytes", | |
109 | "indices.segments.points_memory_in_bytes", | |
110 | "indices.segments.doc_values_memory_in_bytes", | |
111 | "indices.segments.index_writer_memory_in_bytes", | |
112 | "indices.segments.version_map_memory_in_bytes", | |
113 | "indices.segments.fixed_bit_set_memory_in_bytes", | |
114 | "indices.translog.operations", | |
115 | "indices.translog.size_in_bytes", | |
116 | "indices.request_cache.memory_size_in_bytes", | |
117 | "indices.request_cache.evictions", | |
118 | "indices.request_cache.hit_count", | |
119 | "indices.request_cache.miss_count", | |
d0022abf RM |
120 | "process.open_file_descriptors", |
121 | "process.max_file_descriptors", | |
122 | "jvm.mem.heap_used_in_bytes", | |
123 | "jvm.mem.heap_used_percent", | |
124 | "jvm.mem.heap_committed_in_bytes", | |
125 | "jvm.mem.heap_max_in_bytes", | |
126 | "jvm.mem.non_heap_used_in_bytes", | |
127 | "jvm.mem.non_heap_committed_in_bytes", | |
b5316abe MO |
128 | "jvm.mem.pools.young.used_in_bytes", |
129 | "jvm.mem.pools.young.max_in_bytes", | |
130 | "jvm.mem.pools.young.peak_used_in_bytes", | |
131 | "jvm.mem.pools.young.peak_max_in_bytes", | |
132 | "jvm.mem.pools.survivor.used_in_bytes", | |
133 | "jvm.mem.pools.survivor.max_in_bytes", | |
134 | "jvm.mem.pools.survivor.peak_used_in_bytes", | |
135 | "jvm.mem.pools.survivor.peak_max_in_bytes", | |
136 | "jvm.mem.pools.old.used_in_bytes", | |
137 | "jvm.mem.pools.old.max_in_bytes", | |
138 | "jvm.mem.pools.old.peak_used_in_bytes", | |
139 | "jvm.mem.pools.old.peak_max_in_bytes", | |
d0022abf RM |
140 | "jvm.threads.count", |
141 | "jvm.threads.peak_count", | |
142 | "jvm.gc.collectors.young.collection_count", | |
143 | "jvm.gc.collectors.young.collection_time_in_millis", | |
144 | "jvm.gc.collectors.old.collection_count", | |
145 | "jvm.gc.collectors.old.collection_time_in_millis", | |
b5316abe MO |
146 | "jvm.buffer_pools.direct.count", |
147 | "jvm.buffer_pools.direct.used_in_bytes", | |
148 | "jvm.buffer_pools.direct.total_capacity_in_bytes", | |
149 | "jvm.buffer_pools.mapped.count", | |
150 | "jvm.buffer_pools.mapped.used_in_bytes", | |
151 | "jvm.buffer_pools.mapped.total_capacity_in_bytes", | |
d0022abf RM |
152 | "transport.server_open", |
153 | "transport.rx_count", | |
154 | "transport.rx_size_in_bytes", | |
155 | "transport.tx_count", | |
eed5ea63 RM |
156 | "transport.tx_size_in_bytes", |
157 | "thread_pool.index.threads", | |
158 | "thread_pool.index.active", | |
159 | "thread_pool.index.queue", | |
160 | "thread_pool.index.rejected", | |
161 | "thread_pool.index.completed", | |
162 | "thread_pool.bulk.threads", | |
163 | "thread_pool.bulk.active", | |
164 | "thread_pool.bulk.queue", | |
165 | "thread_pool.bulk.rejected", | |
166 | "thread_pool.bulk.completed", | |
167 | "thread_pool.search.threads", | |
168 | "thread_pool.search.active", | |
169 | "thread_pool.search.queue", | |
170 | "thread_pool.search.rejected", | |
171 | "thread_pool.search.completed", | |
172 | "thread_pool.flush.threads", | |
173 | "thread_pool.flush.active", | |
174 | "thread_pool.flush.queue", | |
175 | "thread_pool.flush.rejected", | |
176 | "thread_pool.flush.completed", | |
177 | "thread_pool.management.threads", | |
178 | "thread_pool.management.active", | |
179 | "thread_pool.management.queue", | |
180 | "thread_pool.management.rejected", | |
181 | "thread_pool.management.completed", | |
182 | "thread_pool.warmer.threads", | |
183 | "thread_pool.warmer.active", | |
184 | "thread_pool.warmer.queue", | |
185 | "thread_pool.warmer.rejected", | |
186 | "thread_pool.warmer.completed", | |
187 | "thread_pool.refresh.threads", | |
188 | "thread_pool.refresh.active", | |
189 | "thread_pool.refresh.queue", | |
190 | "thread_pool.refresh.rejected", | |
191 | "thread_pool.refresh.completed", | |
192 | "thread_pool.generic.threads", | |
193 | "thread_pool.generic.active", | |
194 | "thread_pool.generic.queue", | |
195 | "thread_pool.generic.rejected", | |
b5316abe MO |
196 | "thread_pool.generic.completed", |
197 | "breakers.request.limit_size_in_bytes", | |
198 | "breakers.request.estimated_size_in_bytes", | |
199 | "breakers.request.overhead", | |
200 | "breakers.request.tripped", | |
201 | "breakers.fielddata.limit_size_in_bytes", | |
202 | "breakers.fielddata.estimated_size_in_bytes", | |
203 | "breakers.fielddata.overhead", | |
204 | "breakers.fielddata.tripped", | |
205 | "breakers.in_flight_requests.limit_size_in_bytes", | |
206 | "breakers.in_flight_requests.estimated_size_in_bytes", | |
207 | "breakers.in_flight_requests.overhead", | |
208 | "breakers.in_flight_requests.tripped", | |
209 | "breakers.parent.limit_size_in_bytes", | |
210 | "breakers.parent.estimated_size_in_bytes", | |
211 | "breakers.parent.overhead", | |
212 | "breakers.parent.tripped" | |
d0022abf RM |
213 | } |
214 | ||
215 | # ############################################ | |
216 | # getKeys() | |
217 | # ############################################ | |
218 | ||
219 | def getKeys(json_data,keys,node_fqdn): | |
220 | result='' | |
221 | ||
222 | for key in keys: | |
223 | attributes=key.split('.') | |
224 | value=json_data | |
225 | ||
226 | for index in range(len(attributes)): | |
227 | value=value.get(attributes.pop(0),{}) | |
228 | ||
229 | if value=={}: | |
230 | continue | |
231 | ||
232 | result += node_fqdn + ' ' + item_prefix + "{0} {1}\n".format(key,value) | |
233 | ||
234 | return result | |
235 | ||
236 | ||
237 | # ############################################ | |
238 | # get_node_stats_data() | |
239 | # ############################################ | |
240 | ||
241 | def get_node_stats_data(node_fqdn): | |
242 | """ | |
243 | Get ES node stats data | |
244 | """ | |
245 | ||
246 | # | |
247 | # We try all ES clients defined in elasticsearch_clients[] until | |
248 | # one of them returns an ansver with a 200 status code. | |
249 | # | |
250 | ||
251 | for client in elasticsearch_clients: | |
252 | ||
253 | try: | |
353b7de6 | 254 | request_data = requests.get("http://" + client + ":9200/_nodes/" + node_fqdn + "/stats") |
d0022abf RM |
255 | |
256 | if request_data.status_code != 200: | |
257 | continue | |
258 | ||
259 | stats_data = request_data.json() | |
260 | ||
261 | break | |
262 | ||
263 | except Exception: | |
264 | pass | |
265 | ||
266 | try: | |
267 | ||
268 | for node_id in stats_data['nodes']: | |
269 | result = getKeys(stats_data['nodes'][node_id],stats_keys,node_fqdn) | |
270 | ||
753b94ce RMG |
271 | except Exception as e: |
272 | print ("1") | |
d0022abf RM |
273 | sys.exit(1) |
274 | ||
275 | return result | |
276 | ||
277 | # ############################################ | |
278 | # Main | |
279 | # ############################################ | |
280 | ||
281 | if __name__ == '__main__': | |
282 | ||
283 | try: | |
284 | ||
285 | if len(sys.argv) == 2: | |
286 | ||
287 | node_fqdn = sys.argv[1].lower() | |
288 | ||
289 | result = get_node_stats_data(node_fqdn) | |
290 | ||
291 | # | |
292 | # We create a file with the data that zabbix_sender will | |
293 | # send in a bulk execution. | |
294 | # | |
295 | ||
296 | with open(tmp_stat_file.name,'w') as f: | |
297 | f.write(result) | |
298 | ||
299 | # | |
300 | # The monitoring of this host can be done by any of the | |
301 | # zabbix proxyer defined in zabbix_proxy[]. We try all of | |
302 | # them until one of them accepts our data | |
303 | # | |
304 | ||
305 | for proxy in zabbix_proxy: | |
306 | returncode = os.system(zabbix_sender + ' -z ' + proxy + ' -i ' + tmp_stat_file.name + ' > /dev/null 2>&1') | |
307 | ||
308 | if returncode == 0: | |
309 | break | |
310 | ||
311 | if returncode != 0: | |
753b94ce | 312 | print ("1") |
d0022abf RM |
313 | sys.exit(1) |
314 | ||
315 | else: | |
753b94ce | 316 | print ("1") |
d0022abf RM |
317 | sys.exit(1) |
318 | ||
753b94ce RMG |
319 | except Exception as e: |
320 | print ("1") | |
d0022abf RM |
321 | sys.exit(1) |
322 | ||
323 | # Delete temp file with zabbix_sender data | |
324 | os.remove(tmp_stat_file.name) | |
325 | ||
326 | # Return value 0 = execution OK | |
753b94ce | 327 | print ("0") |