常用elasticsearch优化方案整合

在项目中常用到的优化方案. 统一记录于该页面

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
*index 写入性能提升*

curl -XPUT localhost:9300/monitor/_settings -H 'Content-Type: application/json' -d '{
"index" : {
"refresh_interval" : "60s",
"translog": {
"flush_threshold_size": "2048mb",
"durability": "async"
},
"merge": {
"scheduler": {
"max_thread_count": 1
}
}
}
}'

http.port: 9200 *http 接口访问, cluster port 统一*
transport.tcp.port: 9300 *tcp 同步, cluster port 统一*

/* split set filedata true */

PUT monitor/_mapping/logs
{
"properties": {
"ext.deviceId": {
"type": "text",
"fielddata": true
}
}
}

# filter 检索
GET monitor_2018-09-11/_search
{
"query" : {
"constant_score" : {
"filter" : {
"terms" : {
"newslist.id" : ["b"]
}
}
}
}
}


# index 预分片
PUT /monitor_2018-09-13
{
"settings": {
"number_of_shards": 5,
"number_of_replicas": 0
}
}

# 查看当前集群每个检索占用内存情况
GET /_stats/fielddata?fields=*

# 配置内存低于20% 则清理过期数据
PUT _cluster/settings
{
"persistent" : {
"indices.breaker.fielddata.limit" : "20%"
}
}
# 清理指定 index 缓存
POST monitor_2018-09-12/_cache/clear

# 查看集群状态
GET _cluster/stats

# 修复集群yellow状态(单节点)
curl -XPUT "http://localhost:9200/_settings" -d' { "number_of_replicas" : 0 } ' -H 'Content-Type: application/json'

# 自动分片, 如果不需要则设置为"none"
PUT /_cluster/settings
{
"transient": {
"cluster.routing.allocation.enable":"all"
}
}

# 设置 filddata 可用内存大小
put /_cluster/settings
{
"persistent" : {
"indices.breaker.fielddata.limit":"60%",
"indices.breaker.request.limit":"40%",
"indices.breaker.total.limit":"70%"
}
}

# 统计一天内有多少异常用户访问
# 对 deviceid 总量进行 group_by 查询聚合
GET monitor_2018-10-25/_search
{
"aggs" : {
"groupBy" : {
"terms" : {
"field" : "deviceId",
"size" : 20
}
}
},
"size":0
}

# 查询指定时间区间的数据
curl -XGET "localhost:9900/searchengine/_count?pretty" -d '{
"query":
{"range":
{"@timestamp":
{"gte" :
"2018-11-15T11:56:08.000+0800",
"lte": "2018-11-15T11:59:08.000+0800"
}
}
}
}' -H 'Content-Type: application/json'

# elastic 对某字段进行去重后统计数量
GET /cars/transactions/_search
{
"size" : 0,
"aggs" : {
"distinct_colors" : {
"cardinality" : {
"field" : "color"
}
}
}
}

/* 修改默认timestamp类型

PUT feeds
{
"mappings": {
"monitor":{
"properties": {
"ts":{
"type": "date",
"format": "epoch_second"
}
}
}
}
}

# 创建名为feeds的空index
curl -XPUT 'http://127.0.0.1:9200/feeds'

# 指定这个mapping
curl -XPOST "http://127.0.0.1:9200/feeds/monitor/_mapping?pretty" -d '{"monitor": {"properties": {"title": {"type": "text"}, "from": {"type": "text", "fielddata": true}, "ts": {"type": "date", "format": "epoch_second"}}}}' -H 'Content-Type: application/json'

# 需要指定grafana可查询字段则设置fielddata:true
# 如果需要指定unix时间戳 则设置type: date 并把format设置epoch_second

#指定mapping的json结构体如下
{
"monitor": {
"properties": {
"title": {
"type": "string",
},
"from": {
"type": "text",
"fielddata": true
},
"ts": {
"type": "date",
"format": "epoch_second"
}
}
}
}

# 清空index的type中全部的数据
curl -XPOST 'localhost:9200/feeds/monitor/_delete_by_query?refresh&slices=5&pretty' -H 'Content-Type: application/json' -d'{"query": {"match_all": {}}}'

# 清空index的type中超过7天的数据
curl -H 'Content-Type:application/json' -d '{"query":{"range":{"@timestamp":{"lt":"now-7d","format":"epoch_millis"}}}}' -X POST "http://localhost:9200/*/_delete_by_query?pretty"


# 创建文档的template, 在相同index_patterns前缀下, 所有新建的index都使用同一个模板
PUT /_template/account_docs_template
{
"index_patterns": "account_docs_*",
"settings": {
"index": {
"number_of_shards": 3,
"number_of_replicas": 1
}
},
"mappings": {
"doc":{
"properties": {
"detail": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"title": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"ts":{
"type": "date",
"format": "epoch_second"
},
"group_id":{
"type": "text"
},
"publish_time":{
"type": "text"
},
"publish_info":{
"type": "text"
},
"publisher":{
"type": "text"
}
}
}
}
}