我必須用單詞temp OR TEMP搜索帳戶temp123、TEMP456
這是我的 ngram tokenizer 索引和一些示例檔案
# index
PUT /demo
{
"settings": {
"index": {
"max_ngram_diff": "20",
"analysis": {
"analyzer": {
"account_analyzer": {
"tokenizer": "account_tokenizer"
}
},
"tokenizer": {
"account_tokenizer": {
"token_chars": [
"letter",
"digit"
],
"min_gram": "1",
"type": "ngram",
"max_gram": "15"
}
}
}
}
},
"mappings": {
"properties": {
"account": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "account_analyzer",
"search_analyzer": "standard"
}
}
}
}
# docs
PUT /demo/_doc/1
{
"account": "temp123"
}
PUT /demo/_doc/2
{
"account": "TEMP456"
}
通過以下查詢,我希望能得到兩個檔案。但我只有 doc 1。似乎我無法獲得大寫字母的檔案。
我應該怎么做才能用temp或TEMP取回兩個檔案?
POST /demo/_search/
{
"query": {
"bool": {
"must": [
{
"match": {
"account": {
"query": "temp",
"fuzziness": "AUTO"
}
}
}
]
}
}
}
POST /demo/_search/
{
"query": {
"bool": {
"must": [
{
"match": {
"account": {
"query": "TEMP",
"fuzziness": "AUTO"
}
}
}
]
}
}
}
uj5u.com熱心網友回復:
您可以使用 _analyze 檢查分析器生成的標記。
GET demo/_analyze
{
"analyzer": "account_analyzer",
"text": ["TEMP123"]
}
"tokens" : [
{
"token" : "T",
"start_offset" : 0,
"end_offset" : 1,
"type" : "word",
"position" : 0
},
{
"token" : "TE",
"start_offset" : 0,
"end_offset" : 2,
"type" : "word",
"position" : 1
},
{
"token" : "TEM",
"start_offset" : 0,
"end_offset" : 3,
"type" : "word",
"position" : 2
},
{
"token" : "TEMP",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 3
},
{
"token" : "TEMP1",
"start_offset" : 0,
"end_offset" : 5,
"type" : "word",
"position" : 4
},
{
"token" : "TEMP12",
"start_offset" : 0,
"end_offset" : 6,
"type" : "word",
"position" : 5
},
{
"token" : "TEMP123",
"start_offset" : 0,
"end_offset" : 7,
"type" : "word",
"position" : 6
},
{
"token" : "E",
"start_offset" : 1,
"end_offset" : 2,
"type" : "word",
"position" : 7
},
{
"token" : "EM",
"start_offset" : 1,
"end_offset" : 3,
"type" : "word",
"position" : 8
},
{
"token" : "EMP",
"start_offset" : 1,
"end_offset" : 4,
"type" : "word",
"position" : 9
},
{
"token" : "EMP1",
"start_offset" : 1,
"end_offset" : 5,
"type" : "word",
"position" : 10
},
{
"token" : "EMP12",
"start_offset" : 1,
"end_offset" : 6,
"type" : "word",
"position" : 11
},
{
"token" : "EMP123",
"start_offset" : 1,
"end_offset" : 7,
"type" : "word",
"position" : 12
},
{
"token" : "M",
"start_offset" : 2,
"end_offset" : 3,
"type" : "word",
"position" : 13
},
{
"token" : "MP",
"start_offset" : 2,
"end_offset" : 4,
"type" : "word",
"position" : 14
},
{
"token" : "MP1",
"start_offset" : 2,
"end_offset" : 5,
"type" : "word",
"position" : 15
},
{
"token" : "MP12",
"start_offset" : 2,
"end_offset" : 6,
"type" : "word",
"position" : 16
},
{
"token" : "MP123",
"start_offset" : 2,
"end_offset" : 7,
"type" : "word",
"position" : 17
},
{
"token" : "P",
"start_offset" : 3,
"end_offset" : 4,
"type" : "word",
"position" : 18
},
{
"token" : "P1",
"start_offset" : 3,
"end_offset" : 5,
"type" : "word",
"position" : 19
},
{
"token" : "P12",
"start_offset" : 3,
"end_offset" : 6,
"type" : "word",
"position" : 20
},
{
"token" : "P123",
"start_offset" : 3,
"end_offset" : 7,
"type" : "word",
"position" : 21
},
{
"token" : "1",
"start_offset" : 4,
"end_offset" : 5,
"type" : "word",
"position" : 22
},
{
"token" : "12",
"start_offset" : 4,
"end_offset" : 6,
"type" : "word",
"position" : 23
},
{
"token" : "123",
"start_offset" : 4,
"end_offset" : 7,
"type" : "word",
"position" : 24
},
{
"token" : "2",
"start_offset" : 5,
"end_offset" : 6,
"type" : "word",
"position" : 25
},
{
"token" : "23",
"start_offset" : 5,
"end_offset" : 7,
"type" : "word",
"position" : 26
},
{
"token" : "3",
"start_offset" : 6,
"end_offset" : 7,
"type" : "word",
"position" : 27
}
]
您需要向分析器添加一個小寫過濾器,以便生成的所有標記都具有小寫
{
"settings": {
"index": {
"max_ngram_diff": "20",
"analysis": {
"analyzer": {
"account_analyzer": {
"tokenizer": "account_tokenizer",
"filter": [ ----> note
"lowercase"
]
}
},
"tokenizer": {
"account_tokenizer": {
"token_chars": [
"letter",
"digit"
],
"min_gram": "1",
"type": "ngram",
"max_gram": "15"
}
}
}
}
},
"mappings": {
"properties": {
"account": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "account_analyzer",
"search_analyzer": "standard"
}
}
}
}
轉載請註明出處,本文鏈接:https://www.uj5u.com/qianduan/375333.html
