scroll.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. // Copyright 2012-present Oliver Eilhard. All rights reserved.
  2. // Use of this source code is governed by a MIT-license.
  3. // See http://olivere.mit-license.org/license.txt for details.
  4. package elastic
  5. import (
  6. "context"
  7. "fmt"
  8. "io"
  9. "net/url"
  10. "strings"
  11. "sync"
  12. "gopkg.in/olivere/elastic.v5/uritemplates"
  13. )
  14. const (
  15. // DefaultScrollKeepAlive is the default time a scroll cursor will be kept alive.
  16. DefaultScrollKeepAlive = "5m"
  17. )
  18. // ScrollService iterates over pages of search results from Elasticsearch.
  19. type ScrollService struct {
  20. client *Client
  21. retrier Retrier
  22. indices []string
  23. types []string
  24. keepAlive string
  25. body interface{}
  26. ss *SearchSource
  27. size *int
  28. pretty bool
  29. routing string
  30. preference string
  31. ignoreUnavailable *bool
  32. allowNoIndices *bool
  33. expandWildcards string
  34. filterPath []string
  35. mu sync.RWMutex
  36. scrollId string
  37. }
  38. // NewScrollService initializes and returns a new ScrollService.
  39. func NewScrollService(client *Client) *ScrollService {
  40. builder := &ScrollService{
  41. client: client,
  42. ss: NewSearchSource(),
  43. keepAlive: DefaultScrollKeepAlive,
  44. }
  45. return builder
  46. }
  47. // Retrier allows to set specific retry logic for this ScrollService.
  48. // If not specified, it will use the client's default retrier.
  49. func (s *ScrollService) Retrier(retrier Retrier) *ScrollService {
  50. s.retrier = retrier
  51. return s
  52. }
  53. // Index sets the name of one or more indices to iterate over.
  54. func (s *ScrollService) Index(indices ...string) *ScrollService {
  55. if s.indices == nil {
  56. s.indices = make([]string, 0)
  57. }
  58. s.indices = append(s.indices, indices...)
  59. return s
  60. }
  61. // Type sets the name of one or more types to iterate over.
  62. func (s *ScrollService) Type(types ...string) *ScrollService {
  63. if s.types == nil {
  64. s.types = make([]string, 0)
  65. }
  66. s.types = append(s.types, types...)
  67. return s
  68. }
  69. // Scroll is an alias for KeepAlive, the time to keep
  70. // the cursor alive (e.g. "5m" for 5 minutes).
  71. func (s *ScrollService) Scroll(keepAlive string) *ScrollService {
  72. s.keepAlive = keepAlive
  73. return s
  74. }
  75. // KeepAlive sets the maximum time after which the cursor will expire.
  76. // It is "2m" by default.
  77. func (s *ScrollService) KeepAlive(keepAlive string) *ScrollService {
  78. s.keepAlive = keepAlive
  79. return s
  80. }
  81. // Size specifies the number of documents Elasticsearch should return
  82. // from each shard, per page.
  83. func (s *ScrollService) Size(size int) *ScrollService {
  84. s.size = &size
  85. return s
  86. }
  87. // Body sets the raw body to send to Elasticsearch. This can be e.g. a string,
  88. // a map[string]interface{} or anything that can be serialized into JSON.
  89. // Notice that setting the body disables the use of SearchSource and many
  90. // other properties of the SearchService.
  91. func (s *ScrollService) Body(body interface{}) *ScrollService {
  92. s.body = body
  93. return s
  94. }
  95. // SearchSource sets the search source builder to use with this iterator.
  96. // Notice that only a certain number of properties can be used when scrolling,
  97. // e.g. query and sorting.
  98. func (s *ScrollService) SearchSource(searchSource *SearchSource) *ScrollService {
  99. s.ss = searchSource
  100. if s.ss == nil {
  101. s.ss = NewSearchSource()
  102. }
  103. return s
  104. }
  105. // Query sets the query to perform, e.g. a MatchAllQuery.
  106. func (s *ScrollService) Query(query Query) *ScrollService {
  107. s.ss = s.ss.Query(query)
  108. return s
  109. }
  110. // PostFilter is executed as the last filter. It only affects the
  111. // search hits but not facets. See
  112. // https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-request-post-filter.html
  113. // for details.
  114. func (s *ScrollService) PostFilter(postFilter Query) *ScrollService {
  115. s.ss = s.ss.PostFilter(postFilter)
  116. return s
  117. }
  118. // Slice allows slicing the scroll request into several batches.
  119. // This is supported in Elasticsearch 5.0 or later.
  120. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-request-scroll.html#sliced-scroll
  121. // for details.
  122. func (s *ScrollService) Slice(sliceQuery Query) *ScrollService {
  123. s.ss = s.ss.Slice(sliceQuery)
  124. return s
  125. }
  126. // FetchSource indicates whether the response should contain the stored
  127. // _source for every hit.
  128. func (s *ScrollService) FetchSource(fetchSource bool) *ScrollService {
  129. s.ss = s.ss.FetchSource(fetchSource)
  130. return s
  131. }
  132. // FetchSourceContext indicates how the _source should be fetched.
  133. func (s *ScrollService) FetchSourceContext(fetchSourceContext *FetchSourceContext) *ScrollService {
  134. s.ss = s.ss.FetchSourceContext(fetchSourceContext)
  135. return s
  136. }
  137. // Version can be set to true to return a version for each search hit.
  138. // See https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-request-version.html.
  139. func (s *ScrollService) Version(version bool) *ScrollService {
  140. s.ss = s.ss.Version(version)
  141. return s
  142. }
  143. // Sort adds a sort order. This can have negative effects on the performance
  144. // of the scroll operation as Elasticsearch needs to sort first.
  145. func (s *ScrollService) Sort(field string, ascending bool) *ScrollService {
  146. s.ss = s.ss.Sort(field, ascending)
  147. return s
  148. }
  149. // SortWithInfo specifies a sort order. Notice that sorting can have a
  150. // negative impact on scroll performance.
  151. func (s *ScrollService) SortWithInfo(info SortInfo) *ScrollService {
  152. s.ss = s.ss.SortWithInfo(info)
  153. return s
  154. }
  155. // SortBy specifies a sort order. Notice that sorting can have a
  156. // negative impact on scroll performance.
  157. func (s *ScrollService) SortBy(sorter ...Sorter) *ScrollService {
  158. s.ss = s.ss.SortBy(sorter...)
  159. return s
  160. }
  161. // Pretty asks Elasticsearch to pretty-print the returned JSON.
  162. func (s *ScrollService) Pretty(pretty bool) *ScrollService {
  163. s.pretty = pretty
  164. return s
  165. }
  166. // Routing is a list of specific routing values to control the shards
  167. // the search will be executed on.
  168. func (s *ScrollService) Routing(routings ...string) *ScrollService {
  169. s.routing = strings.Join(routings, ",")
  170. return s
  171. }
  172. // Preference sets the preference to execute the search. Defaults to
  173. // randomize across shards ("random"). Can be set to "_local" to prefer
  174. // local shards, "_primary" to execute on primary shards only,
  175. // or a custom value which guarantees that the same order will be used
  176. // across different requests.
  177. func (s *ScrollService) Preference(preference string) *ScrollService {
  178. s.preference = preference
  179. return s
  180. }
  181. // IgnoreUnavailable indicates whether the specified concrete indices
  182. // should be ignored when unavailable (missing or closed).
  183. func (s *ScrollService) IgnoreUnavailable(ignoreUnavailable bool) *ScrollService {
  184. s.ignoreUnavailable = &ignoreUnavailable
  185. return s
  186. }
  187. // AllowNoIndices indicates whether to ignore if a wildcard indices
  188. // expression resolves into no concrete indices. (This includes `_all` string
  189. // or when no indices have been specified).
  190. func (s *ScrollService) AllowNoIndices(allowNoIndices bool) *ScrollService {
  191. s.allowNoIndices = &allowNoIndices
  192. return s
  193. }
  194. // ExpandWildcards indicates whether to expand wildcard expression to
  195. // concrete indices that are open, closed or both.
  196. func (s *ScrollService) ExpandWildcards(expandWildcards string) *ScrollService {
  197. s.expandWildcards = expandWildcards
  198. return s
  199. }
  200. // FilterPath allows reducing the response, a mechanism known as
  201. // response filtering and described here:
  202. // https://www.elastic.co/guide/en/elasticsearch/reference/5.6/common-options.html#common-options-response-filtering.
  203. func (s *ScrollService) FilterPath(filterPath ...string) *ScrollService {
  204. s.filterPath = append(s.filterPath, filterPath...)
  205. return s
  206. }
  207. // ScrollId specifies the identifier of a scroll in action.
  208. func (s *ScrollService) ScrollId(scrollId string) *ScrollService {
  209. s.mu.Lock()
  210. s.scrollId = scrollId
  211. s.mu.Unlock()
  212. return s
  213. }
  214. // Do returns the next search result. It will return io.EOF as error if there
  215. // are no more search results.
  216. func (s *ScrollService) Do(ctx context.Context) (*SearchResult, error) {
  217. s.mu.RLock()
  218. nextScrollId := s.scrollId
  219. s.mu.RUnlock()
  220. if len(nextScrollId) == 0 {
  221. return s.first(ctx)
  222. }
  223. return s.next(ctx)
  224. }
  225. // Clear cancels the current scroll operation. If you don't do this manually,
  226. // the scroll will be expired automatically by Elasticsearch. You can control
  227. // how long a scroll cursor is kept alive with the KeepAlive func.
  228. func (s *ScrollService) Clear(ctx context.Context) error {
  229. s.mu.RLock()
  230. scrollId := s.scrollId
  231. s.mu.RUnlock()
  232. if len(scrollId) == 0 {
  233. return nil
  234. }
  235. path := "/_search/scroll"
  236. params := url.Values{}
  237. body := struct {
  238. ScrollId []string `json:"scroll_id,omitempty"`
  239. }{
  240. ScrollId: []string{scrollId},
  241. }
  242. _, err := s.client.PerformRequestWithOptions(ctx, PerformRequestOptions{
  243. Method: "DELETE",
  244. Path: path,
  245. Params: params,
  246. Body: body,
  247. Retrier: s.retrier,
  248. })
  249. if err != nil {
  250. return err
  251. }
  252. return nil
  253. }
  254. // -- First --
  255. // first takes the first page of search results.
  256. func (s *ScrollService) first(ctx context.Context) (*SearchResult, error) {
  257. // Get URL and parameters for request
  258. path, params, err := s.buildFirstURL()
  259. if err != nil {
  260. return nil, err
  261. }
  262. // Get HTTP request body
  263. body, err := s.bodyFirst()
  264. if err != nil {
  265. return nil, err
  266. }
  267. // Get HTTP response
  268. res, err := s.client.PerformRequestWithOptions(ctx, PerformRequestOptions{
  269. Method: "POST",
  270. Path: path,
  271. Params: params,
  272. Body: body,
  273. Retrier: s.retrier,
  274. })
  275. if err != nil {
  276. return nil, err
  277. }
  278. // Return operation response
  279. ret := new(SearchResult)
  280. if err := s.client.decoder.Decode(res.Body, ret); err != nil {
  281. return nil, err
  282. }
  283. s.mu.Lock()
  284. s.scrollId = ret.ScrollId
  285. s.mu.Unlock()
  286. if ret.Hits == nil || len(ret.Hits.Hits) == 0 {
  287. return ret, io.EOF
  288. }
  289. return ret, nil
  290. }
  291. // buildFirstURL builds the URL for retrieving the first page.
  292. func (s *ScrollService) buildFirstURL() (string, url.Values, error) {
  293. // Build URL
  294. var err error
  295. var path string
  296. if len(s.indices) == 0 && len(s.types) == 0 {
  297. path = "/_search"
  298. } else if len(s.indices) > 0 && len(s.types) == 0 {
  299. path, err = uritemplates.Expand("/{index}/_search", map[string]string{
  300. "index": strings.Join(s.indices, ","),
  301. })
  302. } else if len(s.indices) == 0 && len(s.types) > 0 {
  303. path, err = uritemplates.Expand("/_all/{typ}/_search", map[string]string{
  304. "typ": strings.Join(s.types, ","),
  305. })
  306. } else {
  307. path, err = uritemplates.Expand("/{index}/{typ}/_search", map[string]string{
  308. "index": strings.Join(s.indices, ","),
  309. "typ": strings.Join(s.types, ","),
  310. })
  311. }
  312. if err != nil {
  313. return "", url.Values{}, err
  314. }
  315. // Add query string parameters
  316. params := url.Values{}
  317. if s.pretty {
  318. params.Set("pretty", "1")
  319. }
  320. if s.size != nil && *s.size > 0 {
  321. params.Set("size", fmt.Sprintf("%d", *s.size))
  322. }
  323. if len(s.keepAlive) > 0 {
  324. params.Set("scroll", s.keepAlive)
  325. }
  326. if len(s.routing) > 0 {
  327. params.Set("routing", s.routing)
  328. }
  329. if len(s.preference) > 0 {
  330. params.Set("preference", s.preference)
  331. }
  332. if s.allowNoIndices != nil {
  333. params.Set("allow_no_indices", fmt.Sprintf("%v", *s.allowNoIndices))
  334. }
  335. if len(s.expandWildcards) > 0 {
  336. params.Set("expand_wildcards", s.expandWildcards)
  337. }
  338. if s.ignoreUnavailable != nil {
  339. params.Set("ignore_unavailable", fmt.Sprintf("%v", *s.ignoreUnavailable))
  340. }
  341. if len(s.filterPath) > 0 {
  342. // Always add "hits._scroll_id", otherwise we cannot scroll
  343. s.filterPath = append(s.filterPath, "_scroll_id")
  344. params.Set("filter_path", strings.Join(s.filterPath, ","))
  345. }
  346. return path, params, nil
  347. }
  348. // bodyFirst returns the request to fetch the first batch of results.
  349. func (s *ScrollService) bodyFirst() (interface{}, error) {
  350. var err error
  351. var body interface{}
  352. if s.body != nil {
  353. body = s.body
  354. } else {
  355. // Use _doc sort by default if none is specified
  356. if !s.ss.hasSort() {
  357. // Use efficient sorting when no user-defined query/body is specified
  358. s.ss = s.ss.SortBy(SortByDoc{})
  359. }
  360. // Body from search source
  361. body, err = s.ss.Source()
  362. if err != nil {
  363. return nil, err
  364. }
  365. }
  366. return body, nil
  367. }
  368. // -- Next --
  369. func (s *ScrollService) next(ctx context.Context) (*SearchResult, error) {
  370. // Get URL for request
  371. path, params, err := s.buildNextURL()
  372. if err != nil {
  373. return nil, err
  374. }
  375. // Setup HTTP request body
  376. body, err := s.bodyNext()
  377. if err != nil {
  378. return nil, err
  379. }
  380. // Get HTTP response
  381. res, err := s.client.PerformRequestWithOptions(ctx, PerformRequestOptions{
  382. Method: "POST",
  383. Path: path,
  384. Params: params,
  385. Body: body,
  386. Retrier: s.retrier,
  387. })
  388. if err != nil {
  389. return nil, err
  390. }
  391. // Return operation response
  392. ret := new(SearchResult)
  393. if err := s.client.decoder.Decode(res.Body, ret); err != nil {
  394. return nil, err
  395. }
  396. s.mu.Lock()
  397. s.scrollId = ret.ScrollId
  398. s.mu.Unlock()
  399. if ret.Hits == nil || len(ret.Hits.Hits) == 0 {
  400. return ret, io.EOF
  401. }
  402. return ret, nil
  403. }
  404. // buildNextURL builds the URL for the operation.
  405. func (s *ScrollService) buildNextURL() (string, url.Values, error) {
  406. path := "/_search/scroll"
  407. // Add query string parameters
  408. params := url.Values{}
  409. if s.pretty {
  410. params.Set("pretty", "1")
  411. }
  412. return path, params, nil
  413. }
  414. // body returns the request to fetch the next batch of results.
  415. func (s *ScrollService) bodyNext() (interface{}, error) {
  416. s.mu.RLock()
  417. body := struct {
  418. Scroll string `json:"scroll"`
  419. ScrollId string `json:"scroll_id,omitempty"`
  420. }{
  421. Scroll: s.keepAlive,
  422. ScrollId: s.scrollId,
  423. }
  424. s.mu.RUnlock()
  425. return body, nil
  426. }