search_queries_more_like_this.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. // Copyright 2012-present Oliver Eilhard. All rights reserved.
  2. // Use of this source code is governed by a MIT-license.
  3. // See http://olivere.mit-license.org/license.txt for details.
  4. package elastic
  5. import "errors"
  6. // MoreLikeThis query (MLT Query) finds documents that are "like" a given
  7. // set of documents. In order to do so, MLT selects a set of representative
  8. // terms of these input documents, forms a query using these terms, executes
  9. // the query and returns the results. The user controls the input documents,
  10. // how the terms should be selected and how the query is formed.
  11. //
  12. // For more details, see
  13. // https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-mlt-query.html
  14. type MoreLikeThisQuery struct {
  15. fields []string
  16. docs []*MoreLikeThisQueryItem
  17. unlikeDocs []*MoreLikeThisQueryItem
  18. include *bool
  19. minimumShouldMatch string
  20. minTermFreq *int
  21. maxQueryTerms *int
  22. stopWords []string
  23. minDocFreq *int
  24. maxDocFreq *int
  25. minWordLen *int
  26. maxWordLen *int
  27. boostTerms *float64
  28. boost *float64
  29. analyzer string
  30. failOnUnsupportedField *bool
  31. queryName string
  32. }
  33. // NewMoreLikeThisQuery creates and initializes a new MoreLikeThisQuery.
  34. func NewMoreLikeThisQuery() *MoreLikeThisQuery {
  35. return &MoreLikeThisQuery{
  36. fields: make([]string, 0),
  37. stopWords: make([]string, 0),
  38. docs: make([]*MoreLikeThisQueryItem, 0),
  39. unlikeDocs: make([]*MoreLikeThisQueryItem, 0),
  40. }
  41. }
  42. // Field adds one or more field names to the query.
  43. func (q *MoreLikeThisQuery) Field(fields ...string) *MoreLikeThisQuery {
  44. q.fields = append(q.fields, fields...)
  45. return q
  46. }
  47. // StopWord sets the stopwords. Any word in this set is considered
  48. // "uninteresting" and ignored. Even if your Analyzer allows stopwords,
  49. // you might want to tell the MoreLikeThis code to ignore them, as for
  50. // the purposes of document similarity it seems reasonable to assume that
  51. // "a stop word is never interesting".
  52. func (q *MoreLikeThisQuery) StopWord(stopWords ...string) *MoreLikeThisQuery {
  53. q.stopWords = append(q.stopWords, stopWords...)
  54. return q
  55. }
  56. // LikeText sets the text to use in order to find documents that are "like" this.
  57. func (q *MoreLikeThisQuery) LikeText(likeTexts ...string) *MoreLikeThisQuery {
  58. for _, s := range likeTexts {
  59. item := NewMoreLikeThisQueryItem().LikeText(s)
  60. q.docs = append(q.docs, item)
  61. }
  62. return q
  63. }
  64. // LikeItems sets the documents to use in order to find documents that are "like" this.
  65. func (q *MoreLikeThisQuery) LikeItems(docs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
  66. q.docs = append(q.docs, docs...)
  67. return q
  68. }
  69. // IgnoreLikeText sets the text from which the terms should not be selected from.
  70. func (q *MoreLikeThisQuery) IgnoreLikeText(ignoreLikeText ...string) *MoreLikeThisQuery {
  71. for _, s := range ignoreLikeText {
  72. item := NewMoreLikeThisQueryItem().LikeText(s)
  73. q.unlikeDocs = append(q.unlikeDocs, item)
  74. }
  75. return q
  76. }
  77. // IgnoreLikeItems sets the documents from which the terms should not be selected from.
  78. func (q *MoreLikeThisQuery) IgnoreLikeItems(ignoreDocs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
  79. q.unlikeDocs = append(q.unlikeDocs, ignoreDocs...)
  80. return q
  81. }
  82. // Ids sets the document ids to use in order to find documents that are "like" this.
  83. func (q *MoreLikeThisQuery) Ids(ids ...string) *MoreLikeThisQuery {
  84. for _, id := range ids {
  85. item := NewMoreLikeThisQueryItem().Id(id)
  86. q.docs = append(q.docs, item)
  87. }
  88. return q
  89. }
  90. // Include specifies whether the input documents should also be included
  91. // in the results returned. Defaults to false.
  92. func (q *MoreLikeThisQuery) Include(include bool) *MoreLikeThisQuery {
  93. q.include = &include
  94. return q
  95. }
  96. // MinimumShouldMatch sets the number of terms that must match the generated
  97. // query expressed in the common syntax for minimum should match.
  98. // The default value is "30%".
  99. //
  100. // This used to be "PercentTermsToMatch" in Elasticsearch versions before 2.0.
  101. func (q *MoreLikeThisQuery) MinimumShouldMatch(minimumShouldMatch string) *MoreLikeThisQuery {
  102. q.minimumShouldMatch = minimumShouldMatch
  103. return q
  104. }
  105. // MinTermFreq is the frequency below which terms will be ignored in the
  106. // source doc. The default frequency is 2.
  107. func (q *MoreLikeThisQuery) MinTermFreq(minTermFreq int) *MoreLikeThisQuery {
  108. q.minTermFreq = &minTermFreq
  109. return q
  110. }
  111. // MaxQueryTerms sets the maximum number of query terms that will be included
  112. // in any generated query. It defaults to 25.
  113. func (q *MoreLikeThisQuery) MaxQueryTerms(maxQueryTerms int) *MoreLikeThisQuery {
  114. q.maxQueryTerms = &maxQueryTerms
  115. return q
  116. }
  117. // MinDocFreq sets the frequency at which words will be ignored which do
  118. // not occur in at least this many docs. The default is 5.
  119. func (q *MoreLikeThisQuery) MinDocFreq(minDocFreq int) *MoreLikeThisQuery {
  120. q.minDocFreq = &minDocFreq
  121. return q
  122. }
  123. // MaxDocFreq sets the maximum frequency for which words may still appear.
  124. // Words that appear in more than this many docs will be ignored.
  125. // It defaults to unbounded.
  126. func (q *MoreLikeThisQuery) MaxDocFreq(maxDocFreq int) *MoreLikeThisQuery {
  127. q.maxDocFreq = &maxDocFreq
  128. return q
  129. }
  130. // MinWordLength sets the minimum word length below which words will be
  131. // ignored. It defaults to 0.
  132. func (q *MoreLikeThisQuery) MinWordLen(minWordLen int) *MoreLikeThisQuery {
  133. q.minWordLen = &minWordLen
  134. return q
  135. }
  136. // MaxWordLen sets the maximum word length above which words will be ignored.
  137. // Defaults to unbounded (0).
  138. func (q *MoreLikeThisQuery) MaxWordLen(maxWordLen int) *MoreLikeThisQuery {
  139. q.maxWordLen = &maxWordLen
  140. return q
  141. }
  142. // BoostTerms sets the boost factor to use when boosting terms.
  143. // It defaults to 1.
  144. func (q *MoreLikeThisQuery) BoostTerms(boostTerms float64) *MoreLikeThisQuery {
  145. q.boostTerms = &boostTerms
  146. return q
  147. }
  148. // Analyzer specifies the analyzer that will be use to analyze the text.
  149. // Defaults to the analyzer associated with the field.
  150. func (q *MoreLikeThisQuery) Analyzer(analyzer string) *MoreLikeThisQuery {
  151. q.analyzer = analyzer
  152. return q
  153. }
  154. // Boost sets the boost for this query.
  155. func (q *MoreLikeThisQuery) Boost(boost float64) *MoreLikeThisQuery {
  156. q.boost = &boost
  157. return q
  158. }
  159. // FailOnUnsupportedField indicates whether to fail or return no result
  160. // when this query is run against a field which is not supported such as
  161. // a binary/numeric field.
  162. func (q *MoreLikeThisQuery) FailOnUnsupportedField(fail bool) *MoreLikeThisQuery {
  163. q.failOnUnsupportedField = &fail
  164. return q
  165. }
  166. // QueryName sets the query name for the filter that can be used when
  167. // searching for matched_filters per hit.
  168. func (q *MoreLikeThisQuery) QueryName(queryName string) *MoreLikeThisQuery {
  169. q.queryName = queryName
  170. return q
  171. }
  172. // Source creates the source for the MLT query.
  173. // It may return an error if the caller forgot to specify any documents to
  174. // be "liked" in the MoreLikeThisQuery.
  175. func (q *MoreLikeThisQuery) Source() (interface{}, error) {
  176. // {
  177. // "match_all" : { ... }
  178. // }
  179. if len(q.docs) == 0 {
  180. return nil, errors.New(`more_like_this requires some documents to be "liked"`)
  181. }
  182. source := make(map[string]interface{})
  183. params := make(map[string]interface{})
  184. source["more_like_this"] = params
  185. if len(q.fields) > 0 {
  186. params["fields"] = q.fields
  187. }
  188. var likes []interface{}
  189. for _, doc := range q.docs {
  190. src, err := doc.Source()
  191. if err != nil {
  192. return nil, err
  193. }
  194. likes = append(likes, src)
  195. }
  196. params["like"] = likes
  197. if len(q.unlikeDocs) > 0 {
  198. var dontLikes []interface{}
  199. for _, doc := range q.unlikeDocs {
  200. src, err := doc.Source()
  201. if err != nil {
  202. return nil, err
  203. }
  204. dontLikes = append(dontLikes, src)
  205. }
  206. params["unlike"] = dontLikes
  207. }
  208. if q.minimumShouldMatch != "" {
  209. params["minimum_should_match"] = q.minimumShouldMatch
  210. }
  211. if q.minTermFreq != nil {
  212. params["min_term_freq"] = *q.minTermFreq
  213. }
  214. if q.maxQueryTerms != nil {
  215. params["max_query_terms"] = *q.maxQueryTerms
  216. }
  217. if len(q.stopWords) > 0 {
  218. params["stop_words"] = q.stopWords
  219. }
  220. if q.minDocFreq != nil {
  221. params["min_doc_freq"] = *q.minDocFreq
  222. }
  223. if q.maxDocFreq != nil {
  224. params["max_doc_freq"] = *q.maxDocFreq
  225. }
  226. if q.minWordLen != nil {
  227. params["min_word_len"] = *q.minWordLen
  228. }
  229. if q.maxWordLen != nil {
  230. params["max_word_len"] = *q.maxWordLen
  231. }
  232. if q.boostTerms != nil {
  233. params["boost_terms"] = *q.boostTerms
  234. }
  235. if q.boost != nil {
  236. params["boost"] = *q.boost
  237. }
  238. if q.analyzer != "" {
  239. params["analyzer"] = q.analyzer
  240. }
  241. if q.failOnUnsupportedField != nil {
  242. params["fail_on_unsupported_field"] = *q.failOnUnsupportedField
  243. }
  244. if q.queryName != "" {
  245. params["_name"] = q.queryName
  246. }
  247. if q.include != nil {
  248. params["include"] = *q.include
  249. }
  250. return source, nil
  251. }
  252. // -- MoreLikeThisQueryItem --
  253. // MoreLikeThisQueryItem represents a single item of a MoreLikeThisQuery
  254. // to be "liked" or "unliked".
  255. type MoreLikeThisQueryItem struct {
  256. likeText string
  257. index string
  258. typ string
  259. id string
  260. doc interface{}
  261. fields []string
  262. routing string
  263. fsc *FetchSourceContext
  264. version int64
  265. versionType string
  266. }
  267. // NewMoreLikeThisQueryItem creates and initializes a MoreLikeThisQueryItem.
  268. func NewMoreLikeThisQueryItem() *MoreLikeThisQueryItem {
  269. return &MoreLikeThisQueryItem{
  270. version: -1,
  271. }
  272. }
  273. // LikeText represents a text to be "liked".
  274. func (item *MoreLikeThisQueryItem) LikeText(likeText string) *MoreLikeThisQueryItem {
  275. item.likeText = likeText
  276. return item
  277. }
  278. // Index represents the index of the item.
  279. func (item *MoreLikeThisQueryItem) Index(index string) *MoreLikeThisQueryItem {
  280. item.index = index
  281. return item
  282. }
  283. // Type represents the document type of the item.
  284. func (item *MoreLikeThisQueryItem) Type(typ string) *MoreLikeThisQueryItem {
  285. item.typ = typ
  286. return item
  287. }
  288. // Id represents the document id of the item.
  289. func (item *MoreLikeThisQueryItem) Id(id string) *MoreLikeThisQueryItem {
  290. item.id = id
  291. return item
  292. }
  293. // Doc represents a raw document template for the item.
  294. func (item *MoreLikeThisQueryItem) Doc(doc interface{}) *MoreLikeThisQueryItem {
  295. item.doc = doc
  296. return item
  297. }
  298. // Fields represents the list of fields of the item.
  299. func (item *MoreLikeThisQueryItem) Fields(fields ...string) *MoreLikeThisQueryItem {
  300. item.fields = append(item.fields, fields...)
  301. return item
  302. }
  303. // Routing sets the routing associated with the item.
  304. func (item *MoreLikeThisQueryItem) Routing(routing string) *MoreLikeThisQueryItem {
  305. item.routing = routing
  306. return item
  307. }
  308. // FetchSourceContext represents the fetch source of the item which controls
  309. // if and how _source should be returned.
  310. func (item *MoreLikeThisQueryItem) FetchSourceContext(fsc *FetchSourceContext) *MoreLikeThisQueryItem {
  311. item.fsc = fsc
  312. return item
  313. }
  314. // Version specifies the version of the item.
  315. func (item *MoreLikeThisQueryItem) Version(version int64) *MoreLikeThisQueryItem {
  316. item.version = version
  317. return item
  318. }
  319. // VersionType represents the version type of the item.
  320. func (item *MoreLikeThisQueryItem) VersionType(versionType string) *MoreLikeThisQueryItem {
  321. item.versionType = versionType
  322. return item
  323. }
  324. // Source returns the JSON-serializable fragment of the entity.
  325. func (item *MoreLikeThisQueryItem) Source() (interface{}, error) {
  326. if item.likeText != "" {
  327. return item.likeText, nil
  328. }
  329. source := make(map[string]interface{})
  330. if item.index != "" {
  331. source["_index"] = item.index
  332. }
  333. if item.typ != "" {
  334. source["_type"] = item.typ
  335. }
  336. if item.id != "" {
  337. source["_id"] = item.id
  338. }
  339. if item.doc != nil {
  340. source["doc"] = item.doc
  341. }
  342. if len(item.fields) > 0 {
  343. source["fields"] = item.fields
  344. }
  345. if item.routing != "" {
  346. source["_routing"] = item.routing
  347. }
  348. if item.fsc != nil {
  349. src, err := item.fsc.Source()
  350. if err != nil {
  351. return nil, err
  352. }
  353. source["_source"] = src
  354. }
  355. if item.version >= 0 {
  356. source["_version"] = item.version
  357. }
  358. if item.versionType != "" {
  359. source["_version_type"] = item.versionType
  360. }
  361. return source, nil
  362. }