From 8767748f9cad7d096b0c72fae91624cd8fc5b7ab Mon Sep 17 00:00:00 2001 From: frankporras Date: Thu, 20 Jun 2024 18:20:16 +0200 Subject: [PATCH] Ongoing improvement in OpenSearch query --- src/services/dataset.service.ts | 196 ++++++++++++++++++++++++++++---- 1 file changed, 174 insertions(+), 22 deletions(-) diff --git a/src/services/dataset.service.ts b/src/services/dataset.service.ts index a976cce..98412e9 100644 --- a/src/services/dataset.service.ts +++ b/src/services/dataset.service.ts @@ -28,8 +28,8 @@ class DatasetService { public searchTerm(term: string, openCore: string, openHost: string): Observable<{ datasets: Dataset[], highlights: HitHighlight[] }> { // OpenSearch endpoint - // const host = "https://" + openHost; // When using geoinformation.dev - const host = "http://" + openHost; // When using local OpenSearch dev endpoint + const host = "https://" + openHost; // When using geoinformation.dev + // const host = "http://" + openHost; // When using local OpenSearch dev endpoint const path = "/" + openCore + "/_search"; const base = host + path; /** @@ -55,12 +55,20 @@ class DatasetService { // sort: [{ server_date_published: { order: "desc" } }], sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated + // aggs: { + // subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! + // language: { terms: { field: "language" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + // author: { terms: { field: "author.keyword", size: 1000 } }, + // year: { terms: { field: "year", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + // }, + // HOME OFFICE ================================================================================ aggs: { subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! - language: { terms: { field: "language" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + language: { terms: { field: "language.keyword" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS author: { terms: { field: "author.keyword", size: 1000 } }, - year: { terms: { field: "year", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + year: { terms: { field: "year.keyword", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS }, + // =========================================================================================== highlight: { fields: { title: {}, @@ -139,6 +147,140 @@ class DatasetService { openCore: string, openHost: string, start?: string, // Starting page + ): Observable { + // OpenSearch endpoint + const host = "http://" + openHost; // When using local OpenSearch dev endpoint + const path = "/" + openCore + "/_search"; + const base = host + path; + + // Determine search term and query fields based on the suggestion type + let mainQuery; + if (typeof suggestion === "string") { // If suggestion is a string, append a wildcard (*) for partial matches + const lowercaseTerm = suggestion.toLowerCase() + mainQuery = { + bool: { + should: [ + { match: { title: { query: suggestion, fuzziness: "AUTO", boost: 3 } } }, + { match: { author: { query: suggestion, fuzziness: "AUTO", boost: 2 } } }, + { match: { subjects: { query: suggestion, fuzziness: "AUTO", boost: 1 } } }, + { wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } }, + { wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } }, + { wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } } + ], + minimum_should_match: 1 + } + }; + } else if (suggestion instanceof Suggestion) { // If suggestion is a Suggestion object, form a specific query + mainQuery = { + match: { + [suggestion.type]: { + query: suggestion.value, + operator: 'and' // all the terms in the query must be present in the field + } + } + }; + } + + // Set default value for start if not provided + const startValue = start ? parseInt(start) : 0; + + // Construct filter fields based on active filter categories + const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({ + terms: { [`${category}.keyword`]: values } + })); + + // Construct the body of the OpenSearch query + const body = { + + query: { + bool: { + must: [ + mainQuery, // Ensure the main query must be satisfied + ...filters // Ensure all filters must be satisfied + ] + } + }, + + TODO: SEGUIR AQUI! + // // THIS WORKS: + // query: { + // bool: { + // "must": [ + // { "match": { "title": "blatt" } }, + // { "terms": { "subjects": "bayern" } } + // ], + // } + // }, + + // // TRY THIS + // query: { + // bool: { + // "must": [ + // { "match": { "title": "blatt" } }, + // { "terms": { "subjects": "bayern" } } + // { "terms": { "subjects": "salzburg" } } + // ], + // } + // }, + + // // TRY THIS + // query: { + // bool: { + // "must": [ + // { "match": { "title": "blatt" } }, + // { "terms": { "subjects": "bayern" } } + // ], + // "should": [ + // { match: { title: { query: "blatt", fuzziness: "AUTO", boost: 3 } } }, + // { match: { author: { query: "blatt", fuzziness: "AUTO", boost: 2 } } }, + // { match: { subjects: { query: "blatt", fuzziness: "AUTO", boost: 1 } } }, + // { wildcard: { title: { value: "blatt", boost: 3 } } }, + // { wildcard: { author: { value: "blatt", boost: 2 } } }, + // { wildcard: { subjects: { value: "blatt", boost: 1 } } } + // ], + // minimum_should_match: 1 + // } + // }, + + size: 10, + from: startValue, + sort: [{ _score: { order: "desc" } }], + track_scores: true, + + aggs: { + subjects: { terms: { field: "subjects.keyword", size: 1000 } }, + language: { terms: { field: "language.keyword" } }, + author: { terms: { field: "author.keyword", size: 1000 } }, + year: { terms: { field: "year.keyword", size: 100 } } + }, + + highlight: { + fields: { + title: {}, + author: {}, + subjects: {} + } + } + }; + + console.log("mainQuery:", mainQuery); + console.log("filters:", filters); + console.log("body:", body); + + // Make API call to OpenSearch and return the result + const stations = api.post(base, body); + + return stations; + } + + + + public facetedSearchOPEN2( + suggestion: Suggestion | string, + activeFilterCategories: ActiveFilterCategories, + openCore: string, + openHost: string, + start?: string, // Starting page ): Observable { // OpenSearch endpoint // const host = "https://" + openHost; // When using geoinformation.dev @@ -180,19 +322,21 @@ class DatasetService { } }; + // HOME OFFICE ==================================================== + // Constructing Filters Based on Active Filter Categories + const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({ + terms: { [`${category}.keyword`]: values } + })); + // ================================================================ + // // Constructing Filters Based on Active Filter Categories - // const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({ - // // terms: { [`${category}.keyword`]: values } - // terms: { [category]: values } - // })); - - const filters = Object.entries(activeFilterCategories).map(([category, values]) => { - if (category === "language" || category === "year") { - return { terms: { [category]: values } }; - } else { - return { terms: { [`${category}.keyword`]: values } }; - } - }); + // const filters = Object.entries(activeFilterCategories).map(([category, values]) => { + // if (category === "language" || category === "year") { + // return { terms: { [category]: values } }; + // } else { + // return { terms: { [`${category}.keyword`]: values } }; + // } + // }); // console.log(activeFilterCategories); console.log("mainQuery:", mainQuery); @@ -221,15 +365,23 @@ class DatasetService { from: start ? parseInt(start) : 0, sort: [{ _score: { order: "desc" } }], track_scores: true, - aggs: { // Defines aggregations for facets - // terms: Aggregation type that returns the most common terms in a field. - // !For a large number of terms setting an extremely large size might not be efficient - // If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms. + // aggs: { // Defines aggregations for facets + // // terms: Aggregation type that returns the most common terms in a field. + // // !For a large number of terms setting an extremely large size might not be efficient + // // If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms. + // subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! + // language: { terms: { field: "language" } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + // author: { terms: { field: "author.keyword", size: 1000 } }, + // year: { terms: { field: "year", size: 100 } } // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + // }, + // HOME OFFICE ================================================================================ + aggs: { subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! - language: { terms: { field: "language" } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + language: { terms: { field: "language.keyword" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS author: { terms: { field: "author.keyword", size: 1000 } }, - year: { terms: { field: "year", size: 100 } } // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + year: { terms: { field: "year.keyword", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS }, + // =========================================================================================== highlight: { fields: { title: {},