查看“Java Elasticsearch:聚合查询(Aggregation)”的源代码
←
Java Elasticsearch:聚合查询(Aggregation)
跳到导航
跳到搜索
因为以下原因,您没有权限编辑本页:
您请求的操作仅限属于该用户组的用户执行:
用户
您可以查看和复制此页面的源代码。
[[category:Java&ElasticSearch]] == 关于 == Elasticsearch 中的聚合查询,类似 SQL 的 '''SUM'''/'''AVG'''/'''COUNT'''/'''GROUP BY''' 分组查询,主要用于统计分析场景。 示例: <syntaxhighlight lang="Java" highlight=""> import org.apache.http.HttpHost; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestClientBuilder; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.index.query.*; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.Aggregations; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.Avg; import org.elasticsearch.search.builder.SearchSourceBuilder; import java.io.IOException; public class Main { public static void main(String[] args) throws IOException { // 首先创建RestClient,后续章节通过RestClient对象进行参数配置。 RestClientBuilder restClientBuilder = RestClient.builder( new HttpHost("localhost", 9200, "http"), // 设置ES服务地址,支持多个 new HttpHost("localhost", 9201, "http")); // 创建RestHighLevelClient,请求都是通过RestHighLevelClient实例发出去的。 RestHighLevelClient client = new RestHighLevelClient(restClientBuilder); // 创建SearchRequest对象, 设置查询索引名=order SearchRequest searchRequest = new SearchRequest("order"); // 通过SearchSourceBuilder构建搜索参数 SearchSourceBuilder builder = new SearchSourceBuilder(); // 通过QueryBuilders构建ES查询条件,这里查询所有文档。 builder.query(QueryBuilders.matchAllQuery()); // 创建terms桶聚合,聚合名字=by_shop, 字段=shop_id,根据shop_id分组 TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("by_shop").field("shop_id"); // 嵌套聚合 // 设置Avg指标聚合,聚合名字=avg_price, 字段=price,计算平均价格 aggregationBuilder.subAggregation(AggregationBuilders.avg("avg_price").field("price")); // 设置聚合查询 builder.aggregation(aggregationBuilder); // 设置搜索条件 searchRequest.source(builder); // 如果只想返回聚合统计结果,不想返回查询结果可以将分页大小设置为0 builder.size(0); // 执行ES请求 SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); // 处理聚合查询结果 Aggregations aggregations = searchResponse.getAggregations(); // 根据by_shop名字查询terms聚合结果 Terms byShopAggregation = aggregations.get("by_shop"); // 遍历terms聚合结果 for (Terms.Bucket bucket : byShopAggregation.getBuckets()) { // 因为是根据shop_id分组,因此可以直接将桶的key转换成int类型 int shopId = bucket.getKeyAsNumber().intValue(); // 根据avg_price聚合名字,获取嵌套聚合结果 Avg avg = bucket.getAggregations().get("avg_price"); // 获取平均价格 double avgPrice = avg.getValue(); } // 关闭ES Client client.close(); } } </syntaxhighlight> 等效SQL: <syntaxhighlight lang="SQL" highlight=""> select shop_id, avg(price) as avg_price from order group by shop_id </syntaxhighlight> == 桶聚合(bucket) == Elasticsearch桶聚合,目的就是数据分组,先将数据按指定的条件分成多个组,然后对每一个组进行统计。 常用桶聚合: # Terms聚合 # Histogram聚合 # Date histogram聚合 # Range聚合 # 嵌套聚合 === Terms聚合 === 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> // terms聚合命名为: by_shop, 分组字段为: shop_id TermsAggregationBuilder byShop = AggregationBuilders.terms("by_shop").field("shop_id"); </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> Aggregations aggregations = searchResponse.getAggregations(); // 根据by_shop命名查询terms聚合结果 Terms byShopAggregation = aggregations.get("by_shop"); // 遍历terms聚合结果 for (Terms.Bucket bucket : byShopAggregation.getBuckets()) { // 因为是根据shop_id分组,因此可以直接将桶的key转换成int类型 int shopId = bucket.getKeyAsNumber().intValue(); // 如果分组的字段是字符串类型,可以直接转成String类型 // String key = bucket.getKeyAsString(); // 获取文档总数 long count = bucket.getDocCount(); } </syntaxhighlight> === Histogram聚合 === 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> // Histogram聚合命名为: prices HistogramAggregationBuilder histogramAggregationBuilder = AggregationBuilders.histogram("prices") .field("price") // 根据price字段值,对数据进行分组 .interval(100); // 分桶的间隔为100,意思就是price字段值按100间隔分组 </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> // 处理聚合查询结果 Aggregations aggregations = searchResponse.getAggregations(); // 根据prices命名查询Histogram聚合结果 Histogram histogram = aggregations.get("prices"); // 遍历聚合结果 for (Histogram.Bucket bucket : histogram.getBuckets()) { // 获取桶的Key值 String key = bucket.getKeyAsString(); // 获取文档总数 long count = bucket.getDocCount(); } </syntaxhighlight> === Date histogram聚合 === 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> // DateHistogram聚合命名为: sales_over_time DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("sales_over_time") .field("date") // 根据date字段值,对数据进行分组 .calendarInterval(DateHistogramInterval.MONTH) // 时间分组间隔:DateHistogramInterval.* 序列常量,支持每月,每年,每天等等时间间隔 .format("yyyy-MM-dd"); // 设置返回结果中桶key的时间格式 </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> // 处理聚合查询结果 Aggregations aggregations = searchResponse.getAggregations(); // 根据sales_over_time命名查询Histogram聚合结果 Histogram histogram = aggregations.get("sales_over_time"); // 遍历聚合结果 for (Histogram.Bucket bucket : histogram.getBuckets()) { // 获取桶的Key值 String key = bucket.getKeyAsString(); // 获取文档总数 long count = bucket.getDocCount(); } </syntaxhighlight> === Range聚合 === 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> //range聚合命名为: price_ranges RangeAggregationBuilder rangeAggregationBuilder = AggregationBuilders.range("price_ranges") .field("price") // 根据price字段分桶 .addUnboundedFrom(100) // 范围配置, 0 - 100 .addRange(100.0, 200.0) // 范围配置, 100 - 200 .addUnboundedTo(200.0); // 范围配置,> 200的值 </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> // 处理聚合查询结果 Aggregations aggregations = searchResponse.getAggregations(); Range range = aggregations.get("price_ranges"); // 遍历聚合结果 for (Range.Bucket bucket : range.getBuckets()) { // 获取桶的Key值 String key = bucket.getKeyAsString(); // 获取文档总数 long count = bucket.getDocCount(); } </syntaxhighlight> === 嵌套聚合 === 桶聚合之间支持互相嵌套,同时桶聚合也可以嵌套多个指标聚合。 创建嵌套聚合条件:【“桶聚合”嵌套两个“指标聚合”】 <syntaxhighlight lang="Java" highlight=""> // 创建Terms指标聚合 TermsAggregationBuilder byShop = AggregationBuilders.terms("by_shop").field("shop_id"); // 创建avg指标聚合 AvgAggregationBuilder avgPriceBuilder = AggregationBuilders.avg("avg_price").field("price"); // 设置嵌套聚合查询条件 byShop.subAggregation(avgPriceBuilder); // 创建sum指标聚合 SumAggregationBuilder sumPriceBulder = AggregationBuilders.sum("sum_price").field("price"); // 设置嵌套聚合查询条件 byShop.subAggregation(sumPriceBulder); </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> // 处理聚合查询结果 Aggregations aggregations = searchResponse.getAggregations(); Terms terms = aggregations.get("by_shop"); // 遍历聚合结果 for (Terms.Bucket bucket : terms.getBuckets()) { // 获取桶的Key值 String key = bucket.getKeyAsString(); // 获取文档总数 long count = bucket.getDocCount(); // 处理嵌套聚合结果 Aggregations subAggregations = bucket.getAggregations(); // 根据 avg_price 命名,查询 avg 聚合结果 Avg avgPriceResult = subAggregations.get("avg_price"); // 获取平均价格 double avgPrice = avgPriceResult.getValue(); // 根据 sum_price 命名,查询 sum 聚合结果 Sum sumPriceResult = subAggregations.get("sum_price"); // 获取总价格 double sumPrice = sumPriceResult.getValue(); } </syntaxhighlight> === ==示例== === <syntaxhighlight lang="Java" highlight=""> // 首先创建RestClient,后续章节通过RestClient对象进行参数配置。 RestClientBuilder restClientBuilder = RestClient.builder( new HttpHost("localhost", 9200, "http"), // 设置ES服务地址,支持多个 new HttpHost("localhost", 9201, "http")); // 创建RestHighLevelClient,请求都是通过RestHighLevelClient实例发出去的。 RestHighLevelClient client = new RestHighLevelClient(restClientBuilder); // 创建SearchRequest对象, 设置索引名=order SearchRequest searchRequest = new SearchRequest("order"); // 通过SearchSourceBuilder构建搜索参数 SearchSourceBuilder builder = new SearchSourceBuilder(); // 通过QueryBuilders构建ES查询条件,这里查询所有文档,复杂的查询语句设置请参考前面的章节。 builder.query(QueryBuilders.matchAllQuery()); // 创建Terms桶聚合条件 // terms聚合命名为: by_shop, 分组字段为: shop_id TermsAggregationBuilder byShop = AggregationBuilders.terms("by_shop").field("shop_id"); // 设置聚合条件 builder.aggregation(byShop); // 设置搜索条件 searchRequest.source(builder); // 执行ES请求 SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); // 处理聚合查询结果 Aggregations aggregations = searchResponse.getAggregations(); // 根据by_shop名字查询terms聚合结果 Terms byShopAggregation = aggregations.get("by_shop"); // 遍历terms聚合结果 for (Terms.Bucket bucket : byShopAggregation.getBuckets()) { // 因为是根据shop_id分组,因此可以直接将桶的key转换成int类型 int shopId = bucket.getKeyAsNumber().intValue(); // 如果分组的字段是字符串类型,可以直接转成String类型 // String key = bucket.getKeyAsString(); // 获取文档总数 long count = bucket.getDocCount(); } </syntaxhighlight> == 指标聚合(metrics) == Elasticsearch 指标聚合,就是类似 SQL 的统计函数,指标聚合可以单独使用,也可以跟桶聚合一起使用。 常用指标聚合: # Value Count # Cardinality # Avg # Sum # Max # Min === Value Count === '''值聚合''',主要用于统计文档总数,类似 SQL 的 '''count''' 函数。 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> // 创建Value Count指标聚合 // 聚合统计命名为:orders, 统计order_id字段值的数量 ValueCountAggregationBuilder valueCountAggregationBuilder = AggregationBuilders.count("orders").field("order_id"); </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> Aggregations aggregations = searchResponse.getAggregations(); // 根据orders命名查询,ValueCount统计结果 ValueCount valueCount = aggregations.get("orders"); // 打印结果 System.out.println(valueCount.getValue()); </syntaxhighlight> === Cardinality === '''基数聚合''',也是用于统计文档的总数,跟 Value Count 的区别是,'''基数聚合会去重''',不会统计重复的值,类似 SQL 的 '''count(DISTINCT 字段)''' 用法。 * 基数聚合是一种'''近似'''算法,统计的结果会有一定误差,不过性能很好。 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> // 聚合统计命名为:total, 近似统计id字段值的数量 CardinalityAggregationBuilder cardinalityAggregationBuilder = AggregationBuilders.cardinality("total").field("id"); </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> Aggregations aggregations = searchResponse.getAggregations(); // 根据total命名查询,Cardinality统计结果 Cardinality cardinality = aggregations.get("total"); // 打印结果 System.out.println(cardinality.getValue()); </syntaxhighlight> === Avg === 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price"); </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> Aggregations aggregations = searchResponse.getAggregations(); // 根据total命名查询,Avg统计结果 Avg avgPrice = aggregations.get("avg_price"); // 打印结果 System.out.println(avgPrice.getValue()); </syntaxhighlight> === Sum === 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("total_sale").field("price"); </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> Aggregations aggregations = searchResponse.getAggregations(); // 根据total命名查询,Sum统计结果 Sum totalPrice = aggregations.get("total_sale"); // 打印结果 System.out.println(totalPrice.getValue()); </syntaxhighlight> === Max === 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> MaxAggregationBuilder maxAggregationBuilder = AggregationBuilders.max("max_price").field("price"); </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> // 处理聚合查询结果 Aggregations aggregations = searchResponse.getAggregations(); // 根据max_price命名查询,Max统计结果 Max maxPrice = aggregations.get("max_price"); // 打印结果 System.out.println(maxPrice.getValue()); </syntaxhighlight> === Min === 创建聚合条件: <syntaxhighlight lang="Java" highlight=""> MinAggregationBuilder minAggregationBuilder = AggregationBuilders.min("min_price").field("price"); </syntaxhighlight> 处理聚合结果: <syntaxhighlight lang="Java" highlight=""> Aggregations aggregations = searchResponse.getAggregations(); // 根据min_price命名查询,Min统计结果 Min minPrice = aggregations.get("min_price"); // 打印结果 System.out.println(minPrice.getValue()); </syntaxhighlight> === ==示例== === <syntaxhighlight lang="Java" highlight=""> // 首先创建RestClient,后续章节通过RestClient对象进行参数配置。 RestClientBuilder restClientBuilder = RestClient.builder( new HttpHost("localhost", 9200, "http"), // 设置ES服务地址,支持多个 new HttpHost("localhost", 9201, "http")); // 创建RestHighLevelClient,�请求都是通过RestHighLevelClient实例发出去的。 RestHighLevelClient client = new RestHighLevelClient(restClientBuilder); // 创建SearchRequest对象, 索引名=order SearchRequest searchRequest = new SearchRequest("order"); // 通过SearchSourceBuilder构建搜索参数 SearchSourceBuilder builder = new SearchSourceBuilder(); // 通过QueryBuilders构建ES查询条件,这里查询所有文档,复杂的查询语句设置请参考前面的章节。 builder.query(QueryBuilders.matchAllQuery()); // 创建Value Count指标聚合 // 聚合统计命名为:orders, 统计order_id字段值的数量 ValueCountAggregationBuilder valueCountAggregationBuilder = AggregationBuilders.count("orders") .field("order_id"); // 创建Sum指标聚合 // 聚合统计命名为:total_sale, 统计price字段值的总和 SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("total_sale") .field("price"); // 设置聚合查询,可以设置多个聚合查询条件,只要聚合查询命名不同就行 builder.aggregation(valueCountAggregationBuilder); builder.aggregation(sumAggregationBuilder); // 设置搜索条件 searchRequest.source(builder); // 执行ES请求 SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); // 处理聚合查询结果 Aggregations aggregations = searchResponse.getAggregations(); // 根据orders命名查询,ValueCount统计结果 ValueCount valueCount = aggregations.get("orders"); System.out.println(valueCount.getValue()); // 根据total_sale命名查询,Sum统计结果 Sum sum = aggregations.get("total_sale"); System.out.println(sum.getValue()); </syntaxhighlight>
返回至“
Java Elasticsearch:聚合查询(Aggregation)
”。
导航菜单
个人工具
登录
命名空间
页面
讨论
大陆简体
已展开
已折叠
查看
阅读
查看源代码
查看历史
更多
已展开
已折叠
搜索
导航
首页
最近更改
随机页面
MediaWiki帮助
笔记
服务器
数据库
后端
前端
工具
《To do list》
日常
阅读
电影
摄影
其他
Software
Windows
WIKIOE
所有分类
所有页面
侧边栏
站点日志
工具
链入页面
相关更改
特殊页面
页面信息