我們正在使用 Elasticsearch 7.9
我們的索引名為method_info_tree,包含兩級嵌套欄位:
- 頂層描述了一個java方法,
- 它下面的嵌套級別描述了運行該方法的執行緒,并且
- 執行緒內的第三個嵌套級別描述了該執行緒隨時間的狀態。
下面是 Elasticsearch 中索引的映射:
{
"mappings": {
"properties": {
"method_id" : {
"type" : "long"
},
"threads": {
"type": "nested",
"properties": {
"thread_id": {
"type": "long"
},
"states": {
"type": "nested",
"properties": {
"collect_time": {
"type": "date"
},
"state": {
"type": "keyword"
},
"elapsed_time": {
"type" : "integer"
}
}
}
}
}
}
}
}
這是索引中的示例檔案:
{
"took" : 13,
"timed_out" : false,
"_shards" : {
"total" : 2,
"successful" : 2,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5198,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "method_info_tree-000001",
"_type" : "_doc",
"_id" : "WiHCCYQBhPdvF01n3kp1",
"_score" : 0.0,
"_routing" : "86163-d5c064d0-55a3-44b9-88fb-c44b7233cfa4",
"_source" : {
"timestamp" : 1666610993800,
"method_id" : 140280075031760,
"threads" : [
{
"thread_id" : 1,
"states_hit" : [
{
"state" : "RUNNABLE",
"collect_time" : 1666610994750,
"elapsed_time" : 50
},
{
"state" : "IO",
"collect_time" : 1666610994800,
"elapsed_time" : 50
},
{
"state" : "IO",
"collect_time" : 1666610994850,
"elapsed_time" : 50
},
{
"state" : "IO",
"collect_time" : 1666610994900,
"elapsed_time" : 50
},
{
"state" : "IO",
"collect_time" : 1666610994950,
"elapsed_time" : 50
},
{
"state" : "IO",
"collect_time" : 1666610995000,
"elapsed_time" : 50
},
{
"state" : "IO",
"collect_time" : 1666610995050,
"elapsed_time" : 50
},
{
"state" : "IO",
"collect_time" : 1666610995100,
"elapsed_time" : 50
},
{
"state" : "IO",
"collect_time" : 1666610995150,
"elapsed_time" : 50
}
]
}
]
}
}
]
}
}
請注意,對于每個method_id,我們都有許多用于各種thread_id的檔案。
我想,對于每個method_id,計算每個狀態的elapsed_time欄位的總和(對于所有執行緒),例如:
method_id ->
[
{
"state" : "IO",
"elapsed_time" : 566622.0
},
{
"state" : "RUNNABLE",
"elapsed_time" : 566572.0
},
{
"state" : "BLOCKED",
"elapsed_time" : 50.0
}
]
下面是我的 Elasticsearch 查詢:
GET method_info_tree/_search
{
"from": 0,
"size": 0,
"track_total_hits": true,
"query": {
"bool": {
"filter": [
{
"term": {
"session_id": "86163-d5c064d0-55a3-44b9-88fb-c44b7233cfa4"
}
},
{
"nested": {
"path": "threads.states_hit",
"query": {
"bool": {
"filter": [
{
"range": {
"threads.states_hit.collect_time": {
"gte": 0,
"lte": 2000000000000
}
}
}
]
}
}
}
}
]
}
},
"aggs": {
"top_methods_agg": {
"terms": {
"field": "method_id",
"size": 20
},
"aggs": {
"elapsed_time_agg": {
"nested": {
"path": "threads.states_hit"
},
"aggs": {
"states_range": {
"range": {
"field": "threads.states_hit.collect_time",
"ranges": [
{
"from": 0,
"to": 2000000000001
}
]
},
"aggs": {
"elapsed_time_per_state_agg": {
"terms": {
"field": "threads.states_hit.state",
"size": 10
},
"aggs": {
"elapsed_time": {
"sum": {
"field": "threads.states_hit.elapsed_time"
}
}
}
},
"total_self_elapsed_time": {
"sum": {
"field": "threads.states_hit.elapsed_time"
}
},
"wasted_elapsed_time": {
"filter": {
"terms": {
"threads.states_hit.state": [
"BLOCKED",
"IO"
]
}
},
"aggs": {
"total_wasted": {
"sum": {
"field": "threads.states_hit.elapsed_time"
}
}
}
}
}
}
}
}
}
}
}
}
示例結果將是:
{
"took" : 218,
"timed_out" : false,
"_shards" : {
"total" : 2,
"successful" : 2,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5727,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"top_methods_agg" : {
"doc_count_error_upper_bound" : 1,
"sum_other_doc_count" : 73,
"buckets" : [
{
"key" : 140280074341584,
"doc_count" : 728,
"elapsed_time_agg" : {
"doc_count" : 21838,
"states_range" : {
"buckets" : [
{
"key" : "1970-01-01T00:00:00.000Z-2033-05-18T03:33:20.001Z",
"from" : 0.0,
"from_as_string" : "1970-01-01T00:00:00.000Z",
"to" : 2.000000000001E12,
"to_as_string" : "2033-05-18T03:33:20.001Z",
"doc_count" : 21838,
"total_self_elapsed_time" : {
"value" : 1133244.0
},
"wasted_elapsed_time" : {
"doc_count" : 1,
"total_wasted" : {
"value" : 50.0
}
},
"elapsed_time_per_state_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "IO",
"doc_count" : 10919,
"elapsed_time" : {
"value" : 566622.0
}
},
{
"key" : "RUNNABLE",
"doc_count" : 10918,
"elapsed_time" : {
"value" : 566572.0
}
},
{
"key" : "BLOCKED",
"doc_count" : 1,
"elapsed_time" : {
"value" : 50.0
}
}
]
}
}
]
}
}
},
{
"key" : 140282650318928,
"doc_count" : 3,
"elapsed_time_agg" : {
"doc_count" : 3,
"states_range" : {
"buckets" : [
{
"key" : "1970-01-01T00:00:00.000Z-2033-05-18T03:33:20.001Z",
"from" : 0.0,
"from_as_string" : "1970-01-01T00:00:00.000Z",
"to" : 2.000000000001E12,
"to_as_string" : "2033-05-18T03:33:20.001Z",
"doc_count" : 3,
"total_self_elapsed_time" : {
"value" : 150.0
},
"wasted_elapsed_time" : {
"doc_count" : 0,
"total_wasted" : {
"value" : 0.0
}
},
"elapsed_time_per_state_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "RUNNABLE",
"doc_count" : 3,
"elapsed_time" : {
"value" : 150.0
}
}
]
}
}
]
}
}
}
]
}
}
}
請注意,我洗掉了一些結果存盤桶以使示例更加連貫。
我的問題: 我需要按“ total_self_elapsed_time ”對結果進行排序,并且只回傳前 5 個結果。由于結果是嵌套的,我無法訪問計算欄位“total_self_elapsed_time”。您能否指導我如何按此聚合欄位添加到我的查詢排序中?
uj5u.com熱心網友回復:
我已經用過濾器聚合替換了 states_range 范圍聚合。范圍聚合將為指定的每個范圍生成存盤桶。因此,您不能根據子多桶對術語進行排序。
為了排序,我在聚合方面使用了“順序”。
詢問
"aggs": {
"top_methods_agg": {
"terms": {
"field": "method_id",
"size": 20,
"order": {
"elapsed_time_agg>states_range>total_self_elapsed_time": "asc"
}
},
"aggs": {
"elapsed_time_agg": {
"nested": {
"path": "threads.states_hit"
},
"aggs": {
"states_range": {
"filter": {
"range": {
"threads.states_hit.collect_time": {
"gte": 0,
"lte": 2000000000000
}
}
},
"aggs": {
"elapsed_time_per_state_agg": {
"terms": {
"field": "threads.states_hit.state",
"size": 10
},
"aggs": {
"elapsed_time": {
"sum": {
"field": "threads.states_hit.elapsed_time"
}
}
}
},
"total_self_elapsed_time": {
"sum": {
"field": "threads.states_hit.elapsed_time"
}
},
"wasted_elapsed_time": {
"filter": {
"terms": {
"threads.states_hit.state": [
"BLOCKED",
"IO"
]
}
},
"aggs": {
"total_wasted": {
"sum": {
"field": "threads.states_hit.elapsed_time"
}
}
}
}
}
}
}
}
}
}
}
試試上面的,看看它是否適合你。
uj5u.com熱心網友回復:
為了完整起見,以下是有效的查詢:
"aggs": {
"top_methods_agg": {
"terms": {
"field": "method_id",
"size": 20
},
"aggs": {
"elapsed_time_agg": {
"nested": {
"path": "threads.states_hit"
},
"aggs": {
"states_range": {
"filter": {
"range": {
"threads.states_hit.collect_time": {
"gte": 0,
"lte": 2000000000000
}
}
},
"aggs": {
"elapsed_time_per_state_agg": {
"terms": {
"field": "threads.states_hit.state",
"size": 10
},
"aggs": {
"elapsed_time": {
"sum": {
"field": "threads.states_hit.elapsed_time"
}
}
}
},
"total_self_elapsed_time": {
"sum": {
"field": "threads.states_hit.elapsed_time"
}
},
"wasted_elapsed_time": {
"filter": {
"terms": {
"threads.states_hit.state": [
"BLOCKED",
"IO"
]
}
},
"aggs": {
"total_wasted": {
"sum": {
"field": "threads.states_hit.elapsed_time"
}
}
}
}
}
}
}
},
"top_methods_sort": {
"bucket_sort": {
"sort": [
{
"elapsed_time_agg>states_range>total_self_elapsed_time": {
"order": "desc"
}
}
],
"size": 5
}
}
}
}
}
非常感謝!
轉載請註明出處,本文鏈接:https://www.uj5u.com/qiye/522666.html
標籤:排序弹性搜索嵌套的聚合
上一篇:C 中四叉樹實作的奇怪問題
下一篇:如何按行對專案進行排序?
