tethys.backend/commands/IndexDatasets.ts

125 lines
4.9 KiB
TypeScript
Raw Normal View History

// import Logger from '@ioc:Adonis/Core/Logger';
2024-03-14 19:25:27 +00:00
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
import { create } from 'xmlbuilder2';
2024-03-14 19:25:27 +00:00
import Dataset from '#app/Models/Dataset';
import XmlModel from '#app/Library/XmlModel';
import { readFileSync } from 'fs';
2024-03-14 19:25:27 +00:00
import SaxonJS from 'saxon-js';
import { Client } from '@opensearch-project/opensearch';
2024-03-14 19:25:27 +00:00
import { getDomain } from '#app/Utils/utility-functions';
import { BaseCommand, flags } from '@adonisjs/core/ace';
import { CommandOptions } from '@adonisjs/core/types/ace';
const opensearchNode = process.env.OPENSEARCH_HOST || 'localhost';
const client = new Client({ node: `http://${opensearchNode}` }); // replace with your OpenSearch endpoint
export default class IndexDatasets extends BaseCommand {
2024-03-14 19:25:27 +00:00
static commandName = 'index:datasets';
static description = 'Index datasets based on publish_id';
@flags.number({ alias: 'p' })
public publish_id: number;
2024-03-14 19:25:27 +00:00
static options: CommandOptions = {
loadApp: true,
2024-03-14 19:25:27 +00:00
staysAlive: false,
};
2024-03-14 19:25:27 +00:00
async run() {
this.logger.info('Hello world!');
2024-03-14 19:25:27 +00:00
const { default: Dataset } = await import('#app/Models/Dataset');
const datasets = await Dataset.query().where('server_state', 'published').exec(); //this.getDatasets();
const proc = readFileSync('public/assets2/solr.sef.json');
const index_name = 'tethys-records';
for (var dataset of datasets) {
// Logger.info(`File publish_id ${dataset.publish_id}`);
// const jsonString = await this.getJsonString(dataset, proc);
// console.log(jsonString);
await this.indexDocument(dataset, index_name, proc);
}
}
2024-03-14 19:25:27 +00:00
// private async getDatasets(): Promise<any[]> {
// // const { default: Dataset } = await import('#app/Models/Dataset');
// // const Dataset = (await import('#app/Models/Dataset')).default
// const query = Dataset.query().where('server_state', 'published');
// if (this.publish_id) {
// query.where('publish_id', this.publish_id);
// }
// return await query;
// }
private async indexDocument(dataset: Dataset, index_name: string, proc: Buffer): Promise<void> {
try {
const doc = await this.getJsonString(dataset, proc);
let document = JSON.parse(doc);
await client.index({
id: dataset.publish_id?.toString(),
index: index_name,
body: document,
refresh: true,
});
this.logger.info(`dataset with publish_id ${dataset.publish_id} successfully indexed`);
} catch (error) {
this.logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`);
}
}
2024-03-14 19:25:27 +00:00
private async getJsonString(dataset: Dataset, proc: Buffer) {
let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
const datasetNode = xml.root().ele('Dataset');
await this.createXmlRecord(dataset, datasetNode);
const xmlString = xml.end({ prettyPrint: false });
try {
2024-03-14 19:25:27 +00:00
const result = await SaxonJS.transform({
stylesheetText: proc,
destination: 'serialized',
sourceText: xmlString,
});
return result.principalResult;
} catch (error) {
this.logger.error(`An error occurred while creating the user, error: ${error.message},`);
return '';
}
}
private async createXmlRecord(dataset: Dataset, datasetNode: XMLBuilder): Promise<void> {
const domNode = await this.getDatasetXmlDomNode(dataset);
if (domNode) {
dataset.publish_id && this.addLandingPageAttribute(domNode, dataset.publish_id.toString());
this.addSpecInformation(domNode, 'data-type:' + dataset.type);
datasetNode.import(domNode);
}
}
private async getDatasetXmlDomNode(dataset: Dataset): Promise<XMLBuilder | null> {
const xmlModel = new XmlModel(dataset);
// xmlModel.setModel(dataset);
xmlModel.excludeEmptyFields();
xmlModel.caching = true;
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
// dataset.load('xmlCache');
if (dataset.xmlCache) {
xmlModel.xmlCache = dataset.xmlCache;
}
// return cache.getDomDocument();
const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
return domDocument;
}
private addSpecInformation(domNode: XMLBuilder, information: string) {
domNode.ele('SetSpec').att('Value', information);
}
private addLandingPageAttribute(domNode: XMLBuilder, dataid: string) {
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid;
// add attribute du dataset xml element
domNode.att('landingpage', url);
}
}