经验首页 前端设计 程序设计 Java相关 移动开发 数据库/运维 软件/图像 大数据/云计算 其他经验
当前位置:技术经验 » 程序设计 » Elasticsearch » 查看文章
Elasticsearch搜索功能的实现(五)-- 实战
来源:cnblogs  作者:gdwkong  时间:2023/4/19 9:02:06  对本文有异议

实战环境

elastic search 8.5.0 + kibna 8.5.0 + springboot 3.0.2 + spring data elasticsearch 5.0.2 + jdk 17

一、集成 spring data elasticsearch

1 添加依赖

  1. <dependency>
  2. <groupId>org.springframework.boot</groupId>
  3. <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
  4. </dependency>

2 配置es连接

  1. @Configuration
  2. public class ElasticsearchConfig extends ElasticsearchConfiguration {
  3. @Override
  4. public ClientConfiguration clientConfiguration() {
  5. return ClientConfiguration.builder()
  6. .connectedTo("127.0.0.1:9200")
  7. .withBasicAuth("elastic", "********")
  8. .build();
  9. }
  10. }

3 配置打印DSL语句

  1. # 日志配置
  2. logging:
  3. level:
  4. #es日志
  5. org.springframework.data.elasticsearch.client.WIRE : trace

二、index及mapping 文件编写

  1. @Data
  2. @Document(indexName = "news") //索引名
  3. @Setting(shards = 1,replicas = 0,refreshInterval = "1s") //shards 分片数 replicas 副本数
  4. @Schema(name = "News",description = "新闻对象")
  5. public class News implements Serializable {
  6. @Id //索引主键
  7. @NotBlank(message = "新闻ID不能为空")
  8. @Schema(type = "integer",description = "新闻ID",example = "1")
  9. private Integer id;
  10. @NotBlank(message = "新闻标题不能为空")
  11. @Schema(type = "String",description = "新闻标题")
  12. @MultiField(mainField = @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart"),
  13. otherFields = {@InnerField(type = FieldType.Keyword, suffix = "keyword") }) //混合类型字段 指定 建立索引时分词器与搜索时入参分词器
  14. private String title;
  15. @Schema(type = "LocalDate",description = "发布时间")
  16. @Field(type = FieldType.Date,format = DateFormat.date)
  17. private LocalDate pubDate;
  18. @Schema(type = "String",description = "来源")
  19. @Field(type = FieldType.Keyword)
  20. private String source;
  21. @Schema(type = "String",description = "行业类型代码",example = "1,2,3")
  22. @Field(type = FieldType.Text,analyzer = "ik_max_word",searchAnalyzer = "ik_smart")
  23. private String industry;
  24. @Schema(type = "String",description = "预警类型")
  25. @Field(type = FieldType.Keyword)
  26. private String type;
  27. @Schema(type = "String",description = "涉及公司")
  28. @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")
  29. private String companies;
  30. @Schema(type = "String",description = "新闻内容")
  31. @Field(type = FieldType.Text, analyzer = "ik_max_word", searchAnalyzer = "ik_smart")
  32. private String content;
  33. }

三、DAO层编写

  1. @Repository
  2. public interface NewsRepository extends ElasticsearchRepository<News,Integer> {
  3. Page<News> findByType(String type, Pageable pageable);
  4. }

四、简单功能实现

4.1 简单功能写法

  1. /**
  2. * 新增新闻
  3. * @param news
  4. * @return
  5. */
  6. @Override
  7. public void saveNews(News news) {
  8. newsRepository.save(news);
  9. }
  10. /**
  11. * 删除新闻
  12. * @param newsId
  13. */
  14. @Override
  15. public void delete(Integer newsId) {
  16. newsRepository.deleteById(newsId);
  17. }
  18. /**
  19. * 删除新闻索引
  20. */
  21. @Override
  22. public void deleteIndex() {
  23. operations.indexOps(News.class).delete();
  24. }
  25. /**
  26. * 创建索引
  27. */
  28. @Override
  29. public void createIndex() {
  30. operations.indexOps(News.class).createWithMapping();
  31. }
  32. @Override
  33. public PageResult findByType(String type) {
  34. // 先发布日期排序
  35. Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"));
  36. Pageable pageable = PageRequest.of(0,10,sort);
  37. final Page<News> newsPage = newsRepository.findByType(type, pageable);
  38. return new PageResult(newsPage.getTotalElements(),newsPage.getContent());
  39. }

实现效果图片:
image

实际执行的DSL语句:
image

注意: 当指定排序条件时 _score 会被置空

4.2 搜索功能的实现

  1. @Override
  2. public PageResult searchNews(NewsPageSearch search) {
  3. //创建原生查询DSL对象
  4. final NativeQueryBuilder nativeQueryBuilder = new NativeQueryBuilder();
  5. // 先发布日期再得分排序
  6. Sort sort = Sort.by(new Order(Sort.Direction.DESC, "pubDate"),new Order(Sort.Direction.DESC, "_score"));
  7. Pageable pageable = PageRequest.of(search.getCurPage(), search.getPageSize(),sort);
  8. final BoolQuery.Builder boolBuilder = new BoolQuery.Builder();
  9. //过滤条件
  10. setFilter(search, boolBuilder);
  11. //关键字搜索
  12. if (StringUtils.isNotBlank(search.getKeyword())){
  13. setKeyWordAndHighlightField(search, nativeQueryBuilder, boolBuilder);
  14. }else {
  15. nativeQueryBuilder.withQuery(q -> q.bool(boolBuilder.build()));
  16. }
  17. nativeQueryBuilder.withPageable(pageable);
  18. SearchHits<News> searchHits = operations.search(nativeQueryBuilder.build(), News.class);
  19. //高亮回填封装
  20. final List<News> newsList = searchHits.getSearchHits().stream()
  21. .map(s -> {
  22. final News content = s.getContent();
  23. final List<String> title = s.getHighlightFields().get("title");
  24. final List<String> contentList = s.getHighlightFields().get("content");
  25. if (!CollectionUtils.isEmpty(title)){
  26. s.getContent().setTitle(title.get(0));
  27. }
  28. if (!CollectionUtils.isEmpty(contentList)){
  29. s.getContent().setContent(contentList.get(0));
  30. }
  31. return content;
  32. }).collect(Collectors.toList());
  33. return new PageResult<News>(searchHits.getTotalHits(),newsList);
  34. }
  35. /**
  36. * 设置过滤条件 行业类型 来源 预警类型
  37. * @param search
  38. * @param boolBuilder
  39. */
  40. private void setFilter(NewsPageSearch search, BoolQuery.Builder boolBuilder) {
  41. //行业类型
  42. if(StringUtils.isNotBlank(search.getIndustry())){
  43. // 按逗号拆分
  44. List<Query> industryQueries = Arrays.asList(search.getIndustry().split(",")).stream().map(p -> {
  45. Query.Builder queryBuilder = new Query.Builder();
  46. queryBuilder.term(t -> t.field("industry").value(p));
  47. return queryBuilder.build();
  48. }).collect(Collectors.toList());
  49. boolBuilder.filter(f -> f.bool(t -> t.should(industryQueries)));
  50. }
  51. // 来源
  52. if(StringUtils.isNotBlank(search.getSource())){
  53. // 按逗号拆分
  54. List<Query> sourceQueries = Arrays.asList(search.getSource().split(",")).stream().map(p -> {
  55. Query.Builder queryBuilder = new Query.Builder();
  56. queryBuilder.term(t -> t.field("source").value(p));
  57. return queryBuilder.build();
  58. }).collect(Collectors.toList());
  59. boolBuilder.filter(f -> f.bool(t -> t.should(sourceQueries)));
  60. }
  61. // 预警类型
  62. if(StringUtils.isNotBlank(search.getType())){
  63. // 按逗号拆分
  64. List<Query> typeQueries = Arrays.asList(search.getType().split(",")).stream().map(p -> {
  65. Query.Builder queryBuilder = new Query.Builder();
  66. queryBuilder.term(t -> t.field("type").value(p));
  67. return queryBuilder.build();
  68. }).collect(Collectors.toList());
  69. boolBuilder.filter(f -> f.bool(t -> t.should(typeQueries)));
  70. }
  71. //范围区间
  72. if (StringUtils.isNotBlank(search.getStartDate())){
  73. boolBuilder.filter(f -> f.range(r -> r.field("pubDate")
  74. .gte(JsonData.of(search.getStartDate()))
  75. .lte(JsonData.of(search.getEndDate()))));
  76. }
  77. }
  78. /**
  79. * 关键字搜索 title 权重更高
  80. * 高亮字段 title 、content
  81. * @param search
  82. * @param nativeQueryBuilder
  83. * @param boolBuilder
  84. */
  85. private void setKeyWordAndHighlightField(NewsPageSearch search, NativeQueryBuilder nativeQueryBuilder, BoolQuery.Builder boolBuilder) {
  86. final String keyword = search.getKeyword();
  87. //查询条件
  88. boolBuilder.must(b -> b.multiMatch(m -> m.fields("title","content","companies").query(keyword)));
  89. //高亮
  90. final HighlightFieldParameters.HighlightFieldParametersBuilder builder = HighlightFieldParameters.builder();
  91. builder.withPreTags("<font color='red'>")
  92. .withPostTags("</font>")
  93. .withRequireFieldMatch(true) //匹配才加标签
  94. .withNumberOfFragments(0); //显示全文
  95. final HighlightField titleHighlightField = new HighlightField("title", builder.build());
  96. final HighlightField contentHighlightField = new HighlightField("content", builder.build());
  97. final Highlight titleHighlight = new Highlight(List.of(titleHighlightField,contentHighlightField));
  98. nativeQueryBuilder.withQuery(
  99. f -> f.functionScore(
  100. fs -> fs.query(q -> q.bool(boolBuilder.build()))
  101. .functions( FunctionScore.of(func -> func.filter(
  102. fq -> fq.match(ft -> ft.field("title").query(keyword))).weight(100.0)),
  103. FunctionScore.of(func -> func.filter(
  104. fq -> fq.match(ft -> ft.field("content").query(keyword))).weight(20.0)),
  105. FunctionScore.of(func -> func.filter(
  106. fq -> fq.match(ft -> ft.field("companies").query(keyword))).weight(10.0)))
  107. .scoreMode(FunctionScoreMode.Sum)
  108. .boostMode(FunctionBoostMode.Sum)
  109. .minScore(1.0)))
  110. .withHighlightQuery(new HighlightQuery(titleHighlight,News.class));
  111. }

实现效果

加权前效果:
image

加权后效果:
image

DSL 语句:

  1. {
  2. "from": 0,
  3. "size": 6,
  4. "sort": [{
  5. "pubDate": {
  6. "mode": "min",
  7. "order": "desc"
  8. }
  9. }, {
  10. "_score": {
  11. "order": "desc"
  12. }
  13. }],
  14. "highlight": {
  15. "fields": {
  16. "title": {
  17. "number_of_fragments": 0,
  18. "post_tags": ["</font>"],
  19. "pre_tags": ["<font color='red'>"]
  20. },
  21. "content": {
  22. "number_of_fragments": 0,
  23. "post_tags": ["</font>"],
  24. "pre_tags": ["<font color='red'>"]
  25. }
  26. }
  27. },
  28. "query": {
  29. "function_score": {
  30. "boost_mode": "sum",
  31. "functions": [{
  32. "filter": {
  33. "match": {
  34. "title": {
  35. "query": "立足优势稳住外贸基本盘"
  36. }
  37. }
  38. },
  39. "weight": 100.0
  40. }, {
  41. "filter": {
  42. "match": {
  43. "content": {
  44. "query": "立足优势稳住外贸基本盘"
  45. }
  46. }
  47. },
  48. "weight": 20.0
  49. }, {
  50. "filter": {
  51. "match": {
  52. "companies": {
  53. "query": "立足优势稳住外贸基本盘"
  54. }
  55. }
  56. },
  57. "weight": 10.0
  58. }],
  59. "min_score": 1.0,
  60. "query": {
  61. "bool": {
  62. "filter": [{
  63. "bool": {
  64. "should": [{
  65. "term": {
  66. "industry": {
  67. "value": "1"
  68. }
  69. }
  70. }, {
  71. "term": {
  72. "industry": {
  73. "value": "2"
  74. }
  75. }
  76. }, {
  77. "term": {
  78. "industry": {
  79. "value": "3"
  80. }
  81. }
  82. }]
  83. }
  84. }, {
  85. "bool": {
  86. "should": [{
  87. "term": {
  88. "source": {
  89. "value": "新华社"
  90. }
  91. }
  92. }, {
  93. "term": {
  94. "source": {
  95. "value": "中国经济网"
  96. }
  97. }
  98. }]
  99. }
  100. }, {
  101. "bool": {
  102. "should": [{
  103. "term": {
  104. "type": {
  105. "value": "经济简报"
  106. }
  107. }
  108. }, {
  109. "term": {
  110. "type": {
  111. "value": "外贸简报"
  112. }
  113. }
  114. }]
  115. }
  116. }, {
  117. "range": {
  118. "pubDate": {
  119. "gte": "2023-03-29",
  120. "lte": "2023-03-30"
  121. }
  122. }
  123. }],
  124. "must": [{
  125. "multi_match": {
  126. "fields": ["title", "content", "companies"],
  127. "query": "立足优势稳住外贸基本盘"
  128. }
  129. }]
  130. }
  131. },
  132. "score_mode": "sum"
  133. }
  134. },
  135. "track_scores": false,
  136. "version": true
  137. }

4.3 接口测试

image

原文链接:https://www.cnblogs.com/gdwkong/p/17331639.html

 友情链接:直通硅谷  点职佳  北美留学生论坛

本站QQ群:前端 618073944 | Java 606181507 | Python 626812652 | C/C++ 612253063 | 微信 634508462 | 苹果 692586424 | C#/.net 182808419 | PHP 305140648 | 运维 608723728

W3xue 的所有内容仅供测试,对任何法律问题及风险不承担任何责任。通过使用本站内容随之而来的风险与本站无关。
关于我们  |  意见建议  |  捐助我们  |  报错有奖  |  广告合作、友情链接(目前9元/月)请联系QQ:27243702 沸活量
皖ICP备17017327号-2 皖公网安备34020702000426号