使用Elasticsearch的文本类型字段。

huangapple go评论97阅读模式
英文:

Use elasticsearch text type filed

问题

以下是翻译好的内容,只包括代码部分:

数据详情:

  1. {
  2. "took": 0,
  3. "timed_out": false,
  4. "_shards": {
  5. "total": 1,
  6. "successful": 1,
  7. "skipped": 0,
  8. "failed": 0
  9. },
  10. "hits": {
  11. "total": {
  12. "value": 1,
  13. "relation": "eq"
  14. },
  15. "max_score": 3.7750573,
  16. "hits": [
  17. {
  18. "_index": "myindex",
  19. "_id": "1650421750907600896",
  20. "_score": 3.7750573,
  21. "_source": {
  22. "areaCodeList": "350112201201,0,350112201202"
  23. }
  24. }
  25. ]
  26. }
  27. }

areaCodeList 是一个使用 ik 分词器的文本字段:

  1. POST /myindex/_analyze
  2. {
  3. "field": "areaCodeList",
  4. "text": "350112201201,0,350112201202"
  5. }
  1. {
  2. "tokens": [
  3. {
  4. "token": "350112201201,0,350112201202",
  5. "start_offset": 0,
  6. "end_offset": 27,
  7. "type": "ARABIC",
  8. "position": 0
  9. },
  10. {
  11. "token": "350112201201",
  12. "start_offset": 0,
  13. "end_offset": 12,
  14. "type": "LETTER",
  15. "position": 1
  16. },
  17. {
  18. "token": "0",
  19. "start_offset": 13,
  20. "end_offset": 14,
  21. "type": "LETTER",
  22. "position": 2
  23. },
  24. {
  25. "token": "350112201202",
  26. "start_offset": 15,
  27. "end_offset": 27,
  28. "type": "LETTER",
  29. "position": 3
  30. }
  31. ]
  32. }

最后,我使用以下查询语句,但结果为空:

  1. GET myindex/_search
  2. {
  3. "query": {
  4. "match": {
  5. "areaCodeList": "350112201201"
  6. }
  7. },
  8. "_source": ["areaCodeList"]
  9. }

如何匹配逗号分隔的数据?

英文:

Data detail:

  1. {
  2. "took": 0,
  3. "timed_out": false,
  4. "_shards": {
  5. "total": 1,
  6. "successful": 1,
  7. "skipped": 0,
  8. "failed": 0
  9. },
  10. "hits": {
  11. "total": {
  12. "value": 1,
  13. "relation": "eq"
  14. },
  15. "max_score": 3.7750573,
  16. "hits": [
  17. {
  18. "_index": "myindex",
  19. "_id": "1650421750907600896",
  20. "_score": 3.7750573,
  21. "_source": {
  22. "areaCodeList": "350112201201,0,350112201202"
  23. }
  24. }
  25. ]
  26. }
  27. }

areaCodeList is a text field that uses the ik tokenizer:

  1. POST /myindex/_analyze
  2. {
  3. "field": "areaCodeList",
  4. "text": "350112201201,0,350112201202"
  5. }
  1. {
  2. "tokens": [
  3. {
  4. "token": "350112201201,0,350112201202",
  5. "start_offset": 0,
  6. "end_offset": 27,
  7. "type": "ARABIC",
  8. "position": 0
  9. },
  10. {
  11. "token": "350112201201",
  12. "start_offset": 0,
  13. "end_offset": 12,
  14. "type": "LETTER",
  15. "position": 1
  16. },
  17. {
  18. "token": "0",
  19. "start_offset": 13,
  20. "end_offset": 14,
  21. "type": "LETTER",
  22. "position": 2
  23. },
  24. {
  25. "token": "350112201202",
  26. "start_offset": 15,
  27. "end_offset": 27,
  28. "type": "LETTER",
  29. "position": 3
  30. }
  31. ]
  32. }

Finally, i use the following query statement, but the result is empty:

  1. GET myindex/_search
  2. {
  3. "query": {
  4. "match": {
  5. "areaCodeList": "350112201201"
  6. }
  7. },
  8. "_source": ["areaCodeList"]
  9. }

How to match comma-separated data?

答案1

得分: 0

你可以使用模式分析器。它通过所有非单词字符对文本进行标记化。

模式分析器使用正则表达式将文本分割成词项。正则表达式应该匹配标记分隔符而不是词项本身。正则表达式的默认值为\W+(或所有非单词字符)。
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html

POST _analyze
{
"tokenizer": "pattern",
"text": "350112201201,0,350112201202"
}

PUT test_code_list
{
"mappings": {
"properties": {
"areaCodeList": {
"type": "text",
"analyzer": "pattern"
}
}
}
}

PUT test_code_list/_doc/1
{
"areaCodeList": "350112201201,0,350112201202"
}

GET test_code_list/_search
{
"query": {
"match": {
"areaCodeList": "350112201201"
}
}
}

英文:

You can use the pattern analyzer. It tokenizes the text by all non-word characters.

> The pattern analyzer uses a regular expression to split the text into
> terms. The regular expression should match the token separators not
> the tokens themselves. The regular expression defaults to \W+ (or all
> non-word characters).
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pattern-analyzer.html

  1. POST _analyze
  2. {
  3. "tokenizer": "pattern",
  4. "text": "350112201201,0,350112201202"
  5. }
  6. PUT test_code_list
  7. {
  8. "mappings": {
  9. "properties": {
  10. "areaCodeList": {
  11. "type": "text",
  12. "analyzer": "pattern"
  13. }
  14. }
  15. }
  16. }
  17. PUT test_code_list/_doc/1
  18. {
  19. "areaCodeList": "350112201201,0,350112201202"
  20. }
  21. GET test_code_list/_search
  22. {
  23. "query": {
  24. "match": {
  25. "areaCodeList": "350112201201"
  26. }
  27. }
  28. }

使用Elasticsearch的文本类型字段。

使用Elasticsearch的文本类型字段。

huangapple
  • 本文由 发表于 2023年7月10日 15:21:09
  • 转载请务必保留本文链接:https://go.coder-hub.com/76651507.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定