elasticsearch研习(九)

# 标准分析器
POST _analyze
{
  "analyzer": "standard",
  "text":"To be or not to be,  That is a question ———— 莎士比亚"
}

POST _analyze
{
  "analyzer": "simple",
  "text":"To be or not to be,  That is a question ———— 莎士比亚"
}

POST _analyze
{
  "analyzer": "whitespace",
  "text":"To be or not to be,  That is a question ———— 莎士比亚"
}

POST _analyze
{
  "analyzer": "stop",
  "text":"To be or not to be,  That is a question ———— 莎士比亚"
}

POST _analyze
{
  "analyzer": "keyword",
  "text":"To be or not to be,  That is a question ———— 莎士比亚"
}

POST _analyze
{
  "analyzer": "pattern",
  "explain": false, 
  "text":"To be or not to be,  That is a question ———— 莎士比亚"
}

#自定义分析器
PUT pattern_test
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_email_analyzer":{
          "type":"pattern",
          "pattern":"\\W|_",
          "lowercase":true
        }
      }
    }
  }
}

#语言和多语言分析器:chinese
POST _analyze
{
  "analyzer": "chinese",
  "text":"To be or not to be,  That is a question ———— 莎士比亚"
}


#雪球分析器:snowball analyzer
POST _analyze
{
  "analyzer": "snowball",
  "text":"To be or not to be,  That is a question ———— 莎士比亚"
}

#html字符过滤器
POST _analyze
{
  "tokenizer": "keyword",
  "char_filter": ["html_strip"],
  "text":"<p>I&apos;m so <b>happy</b>!</p>"
}

#映射字符过滤器
PUT pattern_test4
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_analyzer":{
          "tokenizer":"keyword",
          "char_filter":["my_char_filter"]
        }
      },
      "char_filter":{
          "my_char_filter":{
            "type":"mapping",
            "mappings":["苍井空 => 666","武藤兰 => 888"]
          }
        }
    }
  }
}

POST pattern_test4/_analyze
{
  "analyzer": "my_analyzer",
  "text": "苍井空热爱武藤兰,可惜后来苍井空结婚了"
}

#模式替换过滤器
PUT pattern_test5
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_analyzer": {
          "tokenizer": "standard",
          "char_filter": [
            "my_char_filter"
          ]
        }
      },
      "char_filter": {
        "my_char_filter": {
          "type": "pattern_replace",
          "pattern": "(\\d+)-(?=\\d)",
          "replacement": "$1_"
        }
      }
    }
  }
}
POST pattern_test5/_analyze
{
  "analyzer": "my_analyzer",
  "text": "My credit card is 123-456-789"
}

版权声明:除特别注明外,本站所有文章均为王晨曦个人站点原创

转载请注明:出处来自王晨曦个人站点 » elasticsearch研习(九)

点赞

发表评论

电子邮件地址不会被公开。 必填项已用*标注