From 332923df2d5a5686333c8f87c313d13e13c4666b Mon Sep 17 00:00:00 2001 From: frankporras Date: Fri, 21 Jun 2024 15:07:57 +0200 Subject: [PATCH] Apparently OpenSearch works properly. Still some final tests pending --- src/services/dataset.service.ts | 398 ++++++++++++++++++-------------- 1 file changed, 230 insertions(+), 168 deletions(-) diff --git a/src/services/dataset.service.ts b/src/services/dataset.service.ts index 98412e9..fb1cd34 100644 --- a/src/services/dataset.service.ts +++ b/src/services/dataset.service.ts @@ -28,8 +28,7 @@ class DatasetService { public searchTerm(term: string, openCore: string, openHost: string): Observable<{ datasets: Dataset[], highlights: HitHighlight[] }> { // OpenSearch endpoint - const host = "https://" + openHost; // When using geoinformation.dev - // const host = "http://" + openHost; // When using local OpenSearch dev endpoint + const host = openHost; // When using local OpenSearch dev endpoint const path = "/" + openCore + "/_search"; const base = host + path; /** @@ -55,20 +54,20 @@ class DatasetService { // sort: [{ server_date_published: { order: "desc" } }], sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated - // aggs: { - // subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! - // language: { terms: { field: "language" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS - // author: { terms: { field: "author.keyword", size: 1000 } }, - // year: { terms: { field: "year", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS - // }, - // HOME OFFICE ================================================================================ aggs: { subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! - language: { terms: { field: "language.keyword" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + language: { terms: { field: "language" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS author: { terms: { field: "author.keyword", size: 1000 } }, - year: { terms: { field: "year.keyword", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + year: { terms: { field: "year", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS }, - // =========================================================================================== + // // CONTABO ================================================================================ + // aggs: { + // subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! + // language: { terms: { field: "language.keyword" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + // author: { terms: { field: "author.keyword", size: 1000 } }, + // year: { terms: { field: "year.keyword", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + // }, + // // =========================================================================================== highlight: { fields: { title: {}, @@ -141,6 +140,170 @@ class DatasetService { // return stations; // } + // public facetedSearchOPEN( + // suggestion: Suggestion | string, + // activeFilterCategories: ActiveFilterCategories, + // openCore: string, + // openHost: string, + // start?: string, // Starting page + // ): Observable { + // // OpenSearch endpoint + // const host = openHost; + // const path = "/" + openCore + "/_search"; + // const base = host + path; + + // // Constructing Filters Based on Active Filter Categories + // const filters = Object.entries(activeFilterCategories).map(([category, values]) => { + // if (category === "language" || category === "year") { + // return { terms: { [category]: values } }; + // } else { + // return { terms: { [`${category}.keyword`]: values } }; + // } + // }); + // console.log("filters:", filters); + + // // Determine search term and query fields based on the suggestion type + // let query; + // if (typeof suggestion === "string") { // If suggestion is a string, append a wildcard (*) for partial matches + // const lowercaseTerm = suggestion.toLowerCase() + // query = { + // bool: { + // should: [ + // { match: { title: { query: suggestion, fuzziness: "AUTO", boost: 3 } } }, + // { match: { author: { query: suggestion, fuzziness: "AUTO", boost: 2 } } }, + // { match: { subjects: { query: suggestion, fuzziness: "AUTO", boost: 1 } } }, + // { wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } }, + // { wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } }, + // { wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } } + // ], + // minimum_should_match: 1 + // } + // }; + // } else if (suggestion instanceof Suggestion) { // If suggestion is a Suggestion object, form a specific query + // query = { + // match: { + // [suggestion.type]: { + // query: suggestion.value, + // operator: 'and' // all the terms in the query must be present in the field e.g. if is a title, the complete title + // } + // } + // }; + // } + + // // Set default value for start if not provided + // const startValue = start ? parseInt(start) : 0; + + // // console.log(activeFilterCategories); + // // console.log("mainQuery:", mainQuery); + + // // Construct the body of the OpenSearch query + // const body = { + + // query: { + // bool: { + // must: [ + // mainQuery, // Ensure the main query must be satisfied + // ...filters // Ensure all filters must be satisfied + // ] + // } + // }, + + // // // WORKS // Expected: 12 results + // // query: { + // // bool: { + // // "must": [ + // // { "term": { "language": "en" } }, + // // { "term": { "subjects.keyword": "Lower Austria" } }, + // // { "term": { "subjects.keyword": "counting data" } } + // // ], + // // } + // // }, + + // // // THIS WORKS: // Expected: 19 results + // // query: { + // // bool: { + // // must: [ + // // { "match": { "title": "Blatt" } }, + // // { "term": { "subjects": "bayern" } } + // // ], + // // } + // // }, + + // // // WORKS // Expected: 4 results + // // query: { + // // bool: { + // // "must": [ + // // { "match": { "title": "blatt" } }, + // // { "term": { "subjects": "bayern" } }, + // // { "term": { "subjects": "salzburg" } } + // // ], + // // } + // // }, + + // // // WORKS // Expected: 2 results + // // query: { + // // bool: { + // // "must": [ + // // { "match": { "title": "blatt" } }, + // // { "term": { "subjects": "ungarn" } }, + // // { "term": { "subjects": "steiermark" } } + // // ], + // // } + // // }, + + // // WORKS // Expected: 12 results + // query: { + // bool: { + // "must": [ + // { "term": { "language": "en" } }, + // { "term": { "subjects.keyword": "Lower Austria" } }, + // { "term": { "subjects.keyword": "counting data" } } + // ], + // "should": [ + // { match: { title: { query: "halger", fuzziness: "AUTO", boost: 3 } } }, + // { match: { author: { query: "halger", fuzziness: "AUTO", boost: 2 } } }, + // { match: { subjects: { query: "halger", fuzziness: "AUTO", boost: 1 } } }, + // { wildcard: { title: { value: "halger", boost: 3 } } }, + // { wildcard: { author: { value: "halger", boost: 2 } } }, + // { wildcard: { subjects: { value: "halger", boost: 1 } } } + // ], + // minimum_should_match: 1 + // } + // }, + + // size: 10, + // from: startValue, + // sort: [{ _score: { order: "desc" } }], + // track_scores: true, + + // aggs: { + // subjects: { terms: { field: "subjects.keyword", size: 1000 } }, + // language: { terms: { field: "language" } }, + // author: { terms: { field: "author.keyword", size: 1000 } }, + // year: { terms: { field: "year", size: 100 } } + // }, + + // highlight: { + // fields: { + // title: {}, + // author: {}, + // subjects: {} + // } + // } + // }; + + // console.log("mainQuery:", mainQuery); + // console.log("filters:", filters); + // console.log("body:", body); + + // // Make API call to OpenSearch and return the result + // const stations = api.post(base, body); + + // return stations; + // } + + + public facetedSearchOPEN( suggestion: Suggestion | string, activeFilterCategories: ActiveFilterCategories, @@ -149,142 +312,7 @@ class DatasetService { start?: string, // Starting page ): Observable { // OpenSearch endpoint - const host = "http://" + openHost; // When using local OpenSearch dev endpoint - const path = "/" + openCore + "/_search"; - const base = host + path; - - // Determine search term and query fields based on the suggestion type - let mainQuery; - if (typeof suggestion === "string") { // If suggestion is a string, append a wildcard (*) for partial matches - const lowercaseTerm = suggestion.toLowerCase() - mainQuery = { - bool: { - should: [ - { match: { title: { query: suggestion, fuzziness: "AUTO", boost: 3 } } }, - { match: { author: { query: suggestion, fuzziness: "AUTO", boost: 2 } } }, - { match: { subjects: { query: suggestion, fuzziness: "AUTO", boost: 1 } } }, - { wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } }, - { wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } }, - { wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } } - ], - minimum_should_match: 1 - } - }; - } else if (suggestion instanceof Suggestion) { // If suggestion is a Suggestion object, form a specific query - mainQuery = { - match: { - [suggestion.type]: { - query: suggestion.value, - operator: 'and' // all the terms in the query must be present in the field - } - } - }; - } - - // Set default value for start if not provided - const startValue = start ? parseInt(start) : 0; - - // Construct filter fields based on active filter categories - const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({ - terms: { [`${category}.keyword`]: values } - })); - - // Construct the body of the OpenSearch query - const body = { - - query: { - bool: { - must: [ - mainQuery, // Ensure the main query must be satisfied - ...filters // Ensure all filters must be satisfied - ] - } - }, - - TODO: SEGUIR AQUI! - // // THIS WORKS: - // query: { - // bool: { - // "must": [ - // { "match": { "title": "blatt" } }, - // { "terms": { "subjects": "bayern" } } - // ], - // } - // }, - - // // TRY THIS - // query: { - // bool: { - // "must": [ - // { "match": { "title": "blatt" } }, - // { "terms": { "subjects": "bayern" } } - // { "terms": { "subjects": "salzburg" } } - // ], - // } - // }, - - // // TRY THIS - // query: { - // bool: { - // "must": [ - // { "match": { "title": "blatt" } }, - // { "terms": { "subjects": "bayern" } } - // ], - // "should": [ - // { match: { title: { query: "blatt", fuzziness: "AUTO", boost: 3 } } }, - // { match: { author: { query: "blatt", fuzziness: "AUTO", boost: 2 } } }, - // { match: { subjects: { query: "blatt", fuzziness: "AUTO", boost: 1 } } }, - // { wildcard: { title: { value: "blatt", boost: 3 } } }, - // { wildcard: { author: { value: "blatt", boost: 2 } } }, - // { wildcard: { subjects: { value: "blatt", boost: 1 } } } - // ], - // minimum_should_match: 1 - // } - // }, - - size: 10, - from: startValue, - sort: [{ _score: { order: "desc" } }], - track_scores: true, - - aggs: { - subjects: { terms: { field: "subjects.keyword", size: 1000 } }, - language: { terms: { field: "language.keyword" } }, - author: { terms: { field: "author.keyword", size: 1000 } }, - year: { terms: { field: "year.keyword", size: 100 } } - }, - - highlight: { - fields: { - title: {}, - author: {}, - subjects: {} - } - } - }; - - console.log("mainQuery:", mainQuery); - console.log("filters:", filters); - console.log("body:", body); - - // Make API call to OpenSearch and return the result - const stations = api.post(base, body); - - return stations; - } - - - - public facetedSearchOPEN2( - suggestion: Suggestion | string, - activeFilterCategories: ActiveFilterCategories, - openCore: string, - openHost: string, - start?: string, // Starting page - ): Observable { - // OpenSearch endpoint - // const host = "https://" + openHost; // When using geoinformation.dev - const host = "http://" + openHost; // When using local OpenSearch dev endpoint + const host = openHost; const path = "/" + openCore + "/_search"; const base = host + path; @@ -322,11 +350,11 @@ class DatasetService { } }; - // HOME OFFICE ==================================================== - // Constructing Filters Based on Active Filter Categories - const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({ - terms: { [`${category}.keyword`]: values } - })); + // CONTABO ==================================================== + // // Constructing Filters Based on Active Filter Categories + // const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({ + // terms: { [`${category}.keyword`]: values } + // })); // ================================================================ // // Constructing Filters Based on Active Filter Categories @@ -338,6 +366,20 @@ class DatasetService { // } // }); + + // const filters = Object.entries(activeFilterCategories).map(([category, values]) => + // values.map(value => ({ term: { [`${category}.keyword`]: value } })) + // ).flat(); + + + const filters = Object.entries(activeFilterCategories).map(([category, values]) => { + if (category === "language" || category === "year") { + return values.map(value => ({ term: { [category]: value } })); + } else { + return values.map(value => ({ term: { [`${category}.keyword`]: value } })); + } + }).flat(); + // console.log(activeFilterCategories); console.log("mainQuery:", mainQuery); console.log("filters:", filters); @@ -353,6 +395,7 @@ class DatasetService { // // ] // Contains the filters constructed from activeFilterCategories. // } // }, + query: { bool: { must: [ @@ -361,26 +404,46 @@ class DatasetService { ] } }, + + // // THIS DOESNT WORK // Expected: 12 results, Ouput: 16 + // query: { + // bool: { + // "must": [ + // { "term": { "language": "en" } }, + // { "terms": { "subjects.keyword": ["Lower Austria", "counting data"] } } + // ], + // "should": [ + // { match: { title: { query: "halger", fuzziness: "AUTO", boost: 3 } } }, + // { match: { author: { query: "halger", fuzziness: "AUTO", boost: 2 } } }, + // { match: { subjects: { query: "halger", fuzziness: "AUTO", boost: 1 } } }, + // { wildcard: { title: { value: "halger", boost: 3 } } }, + // { wildcard: { author: { value: "halger", boost: 2 } } }, + // { wildcard: { subjects: { value: "halger", boost: 1 } } } + // ], + // minimum_should_match: 1 + // } + // }, + size: 10, from: start ? parseInt(start) : 0, sort: [{ _score: { order: "desc" } }], track_scores: true, - // aggs: { // Defines aggregations for facets - // // terms: Aggregation type that returns the most common terms in a field. - // // !For a large number of terms setting an extremely large size might not be efficient - // // If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms. - // subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! - // language: { terms: { field: "language" } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS - // author: { terms: { field: "author.keyword", size: 1000 } }, - // year: { terms: { field: "year", size: 100 } } // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS - // }, - // HOME OFFICE ================================================================================ - aggs: { + aggs: { // Defines aggregations for facets + // terms: Aggregation type that returns the most common terms in a field. + // !For a large number of terms setting an extremely large size might not be efficient + // If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms. subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! - language: { terms: { field: "language.keyword" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + language: { terms: { field: "language" } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS author: { terms: { field: "author.keyword", size: 1000 } }, - year: { terms: { field: "year.keyword", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + year: { terms: { field: "year", size: 100 } } // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS }, + // CONTABO ================================================================================ + // aggs: { + // subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! + // language: { terms: { field: "language.keyword" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + // author: { terms: { field: "author.keyword", size: 1000 } }, + // year: { terms: { field: "year.keyword", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS + // }, // =========================================================================================== highlight: { fields: { @@ -391,8 +454,7 @@ class DatasetService { } }; -console.log("body:", body); - + console.log("body:", body); const stations = api.post(base, body);