Ongoing improvement in OpenSearch query

This commit is contained in:
Porras-Bernardez 2024-06-20 18:20:16 +02:00
parent 32f743c04e
commit 8767748f9c

View File

@ -28,8 +28,8 @@ class DatasetService {
public searchTerm(term: string, openCore: string, openHost: string): Observable<{ datasets: Dataset[], highlights: HitHighlight[] }> { public searchTerm(term: string, openCore: string, openHost: string): Observable<{ datasets: Dataset[], highlights: HitHighlight[] }> {
// OpenSearch endpoint // OpenSearch endpoint
// const host = "https://" + openHost; // When using geoinformation.dev const host = "https://" + openHost; // When using geoinformation.dev
const host = "http://" + openHost; // When using local OpenSearch dev endpoint // const host = "http://" + openHost; // When using local OpenSearch dev endpoint
const path = "/" + openCore + "/_search"; const path = "/" + openCore + "/_search";
const base = host + path; const base = host + path;
/** /**
@ -55,12 +55,20 @@ class DatasetService {
// sort: [{ server_date_published: { order: "desc" } }], // sort: [{ server_date_published: { order: "desc" } }],
sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order
track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated
// aggs: {
// subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"!
// language: { terms: { field: "language" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
// author: { terms: { field: "author.keyword", size: 1000 } },
// year: { terms: { field: "year", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
// },
// HOME OFFICE ================================================================================
aggs: { aggs: {
subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"!
language: { terms: { field: "language" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS language: { terms: { field: "language.keyword" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
author: { terms: { field: "author.keyword", size: 1000 } }, author: { terms: { field: "author.keyword", size: 1000 } },
year: { terms: { field: "year", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS year: { terms: { field: "year.keyword", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
}, },
// ===========================================================================================
highlight: { highlight: {
fields: { fields: {
title: {}, title: {},
@ -139,6 +147,140 @@ class DatasetService {
openCore: string, openCore: string,
openHost: string, openHost: string,
start?: string, // Starting page start?: string, // Starting page
): Observable<OpenSearchResponse> {
// OpenSearch endpoint
const host = "http://" + openHost; // When using local OpenSearch dev endpoint
const path = "/" + openCore + "/_search";
const base = host + path;
// Determine search term and query fields based on the suggestion type
let mainQuery;
if (typeof suggestion === "string") { // If suggestion is a string, append a wildcard (*) for partial matches
const lowercaseTerm = suggestion.toLowerCase()
mainQuery = {
bool: {
should: [
{ match: { title: { query: suggestion, fuzziness: "AUTO", boost: 3 } } },
{ match: { author: { query: suggestion, fuzziness: "AUTO", boost: 2 } } },
{ match: { subjects: { query: suggestion, fuzziness: "AUTO", boost: 1 } } },
{ wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } },
{ wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } },
{ wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } }
],
minimum_should_match: 1
}
};
} else if (suggestion instanceof Suggestion) { // If suggestion is a Suggestion object, form a specific query
mainQuery = {
match: {
[suggestion.type]: {
query: suggestion.value,
operator: 'and' // all the terms in the query must be present in the field
}
}
};
}
// Set default value for start if not provided
const startValue = start ? parseInt(start) : 0;
// Construct filter fields based on active filter categories
const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({
terms: { [`${category}.keyword`]: values }
}));
// Construct the body of the OpenSearch query
const body = {
query: {
bool: {
must: [
mainQuery, // Ensure the main query must be satisfied
...filters // Ensure all filters must be satisfied
]
}
},
TODO: SEGUIR AQUI!
// // THIS WORKS:
// query: {
// bool: {
// "must": [
// { "match": { "title": "blatt" } },
// { "terms": { "subjects": "bayern" } }
// ],
// }
// },
// // TRY THIS
// query: {
// bool: {
// "must": [
// { "match": { "title": "blatt" } },
// { "terms": { "subjects": "bayern" } }
// { "terms": { "subjects": "salzburg" } }
// ],
// }
// },
// // TRY THIS
// query: {
// bool: {
// "must": [
// { "match": { "title": "blatt" } },
// { "terms": { "subjects": "bayern" } }
// ],
// "should": [
// { match: { title: { query: "blatt", fuzziness: "AUTO", boost: 3 } } },
// { match: { author: { query: "blatt", fuzziness: "AUTO", boost: 2 } } },
// { match: { subjects: { query: "blatt", fuzziness: "AUTO", boost: 1 } } },
// { wildcard: { title: { value: "blatt", boost: 3 } } },
// { wildcard: { author: { value: "blatt", boost: 2 } } },
// { wildcard: { subjects: { value: "blatt", boost: 1 } } }
// ],
// minimum_should_match: 1
// }
// },
size: 10,
from: startValue,
sort: [{ _score: { order: "desc" } }],
track_scores: true,
aggs: {
subjects: { terms: { field: "subjects.keyword", size: 1000 } },
language: { terms: { field: "language.keyword" } },
author: { terms: { field: "author.keyword", size: 1000 } },
year: { terms: { field: "year.keyword", size: 100 } }
},
highlight: {
fields: {
title: {},
author: {},
subjects: {}
}
}
};
console.log("mainQuery:", mainQuery);
console.log("filters:", filters);
console.log("body:", body);
// Make API call to OpenSearch and return the result
const stations = api.post<OpenSearchResponse>(base, body);
return stations;
}
public facetedSearchOPEN2(
suggestion: Suggestion | string,
activeFilterCategories: ActiveFilterCategories,
openCore: string,
openHost: string,
start?: string, // Starting page
): Observable<OpenSearchResponse> { ): Observable<OpenSearchResponse> {
// OpenSearch endpoint // OpenSearch endpoint
// const host = "https://" + openHost; // When using geoinformation.dev // const host = "https://" + openHost; // When using geoinformation.dev
@ -180,19 +322,21 @@ class DatasetService {
} }
}; };
// // Constructing Filters Based on Active Filter Categories // HOME OFFICE ====================================================
// const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({ // Constructing Filters Based on Active Filter Categories
// // terms: { [`${category}.keyword`]: values } const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({
// terms: { [category]: values } terms: { [`${category}.keyword`]: values }
// })); }));
// ================================================================
const filters = Object.entries(activeFilterCategories).map(([category, values]) => { // // Constructing Filters Based on Active Filter Categories
if (category === "language" || category === "year") { // const filters = Object.entries(activeFilterCategories).map(([category, values]) => {
return { terms: { [category]: values } }; // if (category === "language" || category === "year") {
} else { // return { terms: { [category]: values } };
return { terms: { [`${category}.keyword`]: values } }; // } else {
} // return { terms: { [`${category}.keyword`]: values } };
}); // }
// });
// console.log(activeFilterCategories); // console.log(activeFilterCategories);
console.log("mainQuery:", mainQuery); console.log("mainQuery:", mainQuery);
@ -221,15 +365,23 @@ class DatasetService {
from: start ? parseInt(start) : 0, from: start ? parseInt(start) : 0,
sort: [{ _score: { order: "desc" } }], sort: [{ _score: { order: "desc" } }],
track_scores: true, track_scores: true,
aggs: { // Defines aggregations for facets // aggs: { // Defines aggregations for facets
// terms: Aggregation type that returns the most common terms in a field. // // terms: Aggregation type that returns the most common terms in a field.
// !For a large number of terms setting an extremely large size might not be efficient // // !For a large number of terms setting an extremely large size might not be efficient
// If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms. // // If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms.
// subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"!
// language: { terms: { field: "language" } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
// author: { terms: { field: "author.keyword", size: 1000 } },
// year: { terms: { field: "year", size: 100 } } // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
// },
// HOME OFFICE ================================================================================
aggs: {
subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"!
language: { terms: { field: "language" } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS language: { terms: { field: "language.keyword" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
author: { terms: { field: "author.keyword", size: 1000 } }, author: { terms: { field: "author.keyword", size: 1000 } },
year: { terms: { field: "year", size: 100 } } // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS year: { terms: { field: "year.keyword", size: 100 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
}, },
// ===========================================================================================
highlight: { highlight: {
fields: { fields: {
title: {}, title: {},