基本格式如下所示
PUT /my-index/_settings
{
"index": {
"analysis": {
"analyzer": {
"customHTMLSnowball": {
"type": "custom",
"char_filter": [
"html_strip"
],
"tokenizer": "standard",
"filter": [
"lowercase",
"stop",
"snowball"
]
}}}}}
{
"settings": {
"refresh_interval": "5s",
"number_of_shards": 1,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 50
},
"pinyin_simple_filter": {
"type": "pinyin",
"keep_first_letter": true,
"keep_separate_first_letter": false,
"keep_full_pinyin": false,
"keep_original": false,
"limit_first_letter_length": 50,
"lowercase": true
},
"pinyin_full_filter": {
"type": "pinyin",
"keep_first_letter": false,
"keep_separate_first_letter": false,
"keep_full_pinyin": true,
"none_chinese_pinyin_tokenize": true,
"keep_original": false,
"limit_first_letter_length": 50,
"lowercase": true
},
"t2s_convert": {
"type": "stconvert",
"delimiter": ",",
"convert_type": "t2s"
}
},
"char_filter": {
"charconvert": {
"type": "mapping",
"mappings_path": "char_filter_text.txt"
}
},
"tokenizer": {
"ik_smart": {
"type": "ik",
"use_smart": true
}
},
"analyzer": {
"ngramIndexAnalyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"edge_ngram_filter",
"lowercase"
],
"char_filter": [
"charconvert"
]
},
"ngramSearchAnalyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase"
],
"char_filter": [
"charconvert"
]
},
"ikIndexAnalyzer": {
"type": "custom",
"tokenizer": "ik",
"char_filter": [
"charconvert"
]
},
"ikSearchAnalyzer": {
"type": "custom",
"tokenizer": "ik",
"char_filter": [
"charconvert"
]
},
"pinyiSimpleIndexAnalyzer": {
"tokenizer": "keyword",
"filter": [
"pinyin_simple_filter",
"edge_ngram_filter",
"lowercase"
]
},
"pinyiSimpleSearchAnalyzer": {
"tokenizer": "keyword",
"filter": [
"pinyin_simple_filter",
"lowercase"
]
},
"pinyiFullIndexAnalyzer": {
"tokenizer": "keyword",
"filter": [
"pinyin_full_filter",
"lowercase"
]
},
"pinyiFullSearchAnalyzer": {
"tokenizer": "keyword",
"filter": [
"pinyin_full_filter",
"lowercase"
]
}
}
}
}
}
{
"settings":{
"analysis":{
"analyzer":{
"myanalyzer":{
"tokenizer":"mytokenizer"
}
},
"tokenizer":{
"mytokenizer":{
"type":"ngram",
"min_gram":1,
"max_gram":2,
"token_chars":[
"letter",
"digit",
"whitespace",
"punctuation",
"symbol"
]
}
}
}
}
}
{
"settings": {
"analysis": {
"filter": { # filter过滤器设置
"edge_ngram_filter":{
"type":"edge_ngram",
"min_gram":1,
"max_gram":50
},
"pinyin_simple_filter":{
"type":"pinyin",
"keep_first_letter":true,
"keep_separate_first_letter":false,
"keep_full_pinyin":false,
"keep_original":false,
"limit_first_letter_length":50,
"lowercase":true
},
"pinyin_full_filter":{
"type":"pinyin",
"keep_first_letter":false,
"keep_separate_first_letter":false,
"keep_full_pinyin":true,
"none_chinese_pinyin_tokenize":true,
"keep_original":false,
"limit_first_letter_length":50,
"lowercase":true
}
},
"tokenizer": { # tokenizer 分词器设置
"ik_max_word":{
"type":"ik_max_word",
"use_smart":true
}
},
"analyzer": { #analyzer 分析器设置
"ngramIndexAnalyzer":{
"type":"custom",
"tokenizer":"keyword",
"filter":[
"edge_ngram_filter",
"lowercase"
]
},
"ikIndexAnalyzer":{
"type":"custom",
"tokenizer":"ik_max_word"
},
"pinyiSimpleIndexAnalyzer":{
"tokenizer":"keyword",
"filter":[
"pinyin_simple_filter",
"edge_ngram_filter",
"lowercase"
]
},
"pinyiFullIndexAnalyzer":{
"tokenizer":"keyword",
"filter":[
"pinyin_full_filter",
"lowercase"
]
}
}
}
}
}
这是我们综合的实战的代码
{
"index": {
"number_of_replicas": "1",
"analysis": {
"char_filter": {
"ue_char_filter": {
"type": "mapping",
"mappings": [
"- => ,",
"— => ,"
]
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "ngram",
"min_gram": 2,
"max_gram": 3,
"token_chars": [
"letter",
"digit"
]
}
},
"filter": {
"my_pinyin": {
"type": "pinyin",
"keep_none_chinese": true,
"keep_none_chinese_together": true,
"none_chinese_pinyin_tokenize": false,
"lowercase": true,
"trim_whitespace": true,
"keep_first_letter": true,
"keep_full_pinyin": false,
"keep_joined_full_pinyin": true,
"keep_original": true
}
},
"analyzer": {
"ue_ik_pinyin_analyzer": {
"type": "custom",
"char_filter": [
"html_strip",
"ue_char_filter"
],
"tokenizer": "ik_max_word",
"filter": [
"my_pinyin"
]
},
"ue-ngram":{
"type": "custom",
"char_filter": [
"html_strip",
"ue_char_filter"
],
"tokenizer": "ngram_tokenizer"
}
}
}
}
}