newbornzhao 2019-07-01
在实际应用中,通过from+size不可避免会出现深分页的瓶颈,那么通过scoll技术就是一个很好的解决深分页的方法。比如如果我们一次性要查出10万条数据,那么使用from+size很显然性能会非常的差,priority queue会非常的大。此时如果采用scroll滚动查询,就可以一批一批的查,直到所有数据都查询完。
scoll搜索会在第一次搜索的时候,保存一个当时的视图快照,之后只会基于该旧的视图快照提供数据搜索,如果这个期间数据变更,是不会让用户看到的。而且ES内部是基于_doc进行排序的方式,性能较高。
示例:
POST /test_index/_search?scroll=1m { "query": { "match_all": {} }, "sort": [ "_doc" ], "size": 3 } { "_scroll_id" : "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAABu4oWUC1iLVRFdnlRT3lsTXlFY01FaEFwUQ==", "took" : 7, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 10, "relation" : "eq" }, "max_score" : null, "hits" : [ { "_index" : "test_index", "_type" : "_doc", "_id" : "1", "_score" : null, "_source" : { "field1" : "one" }, "sort" : [ 0 ] }, { "_index" : "test_index", "_type" : "_doc", "_id" : "2", "_score" : null, "_source" : { "field1" : "two" }, "sort" : [ 1 ] }, { "_index" : "test_index", "_type" : "_doc", "_id" : "3", "_score" : null, "_source" : { "field1" : "three" }, "sort" : [ 2 ] } ] } }
POST /_search/scroll { "scroll": "1m", "scroll_id": "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAABu4oWUC1iLVRFdnlRT3lsTXlFY01FaEFwUQ==" }
{ "_scroll_id" : "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAABu4oWUC1iLVRFdnlRT3lsTXlFY01FaEFwUQ==", "took" : 1, "timed_out" : false, "terminated_early" : true, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 10, "relation" : "eq" }, "max_score" : null, "hits" : [ { "_index" : "test_index", "_type" : "_doc", "_id" : "4", "_score" : null, "_source" : { "field1" : "four" }, "sort" : [ 3 ] }, { "_index" : "test_index", "_type" : "_doc", "_id" : "5", "_score" : null, "_source" : { "field1" : "five" }, "sort" : [ 4 ] }, { "_index" : "test_index", "_type" : "_doc", "_id" : "6", "_score" : null, "_source" : { "field1" : "six" }, "sort" : [ 5 ] } ] } }