import type { HttpContextContract } from '@ioc:Adonis/Core/HttpContext'; import { RequestContract } from '@ioc:Adonis/Core/Request'; import { XMLBuilder } from 'xmlbuilder2/lib/interfaces'; import { create } from 'xmlbuilder2'; import dayjs, { Dayjs } from 'dayjs'; import utc from 'dayjs/plugin/utc'; import timezone from 'dayjs/plugin/timezone'; import { readFileSync } from 'fs'; import { StatusCodes } from 'http-status-codes'; import { transform } from 'saxon-js'; // import { Xslt, xmlParse } from 'xslt-processor' import { OaiErrorCodes, OaiModelError } from 'App/Exceptions/OaiErrorCodes'; import { OaiModelException, BadOaiModelException } from 'App/Exceptions/OaiModelException'; import Dataset from 'App/Models/Dataset'; import Collection from 'App/Models/Collection'; import { getDomain } from 'App/Utils/utility-functions'; import XmlModel from 'App/Library/XmlModel'; interface XslTParameter { [key: string]: any; } interface Dictionary { [index: string]: string; } export default class OaiController { private deliveringDocumentStates = ['published', 'deleted']; // private sampleRegEx = /^[A-Za-zäüÄÜß0-9\-_.!~]+$/; private xsltParameter: XslTParameter; // private configuration: Configuration; // private tokenWorker: TokenWorker; /** * Holds xml representation of document information to be processed. * * @var xmlbuilder.XMLDocument | null Defaults to null. */ private xml: XMLBuilder; private proc; constructor() { // Load the XSLT file this.proc = readFileSync('public/assets2/datasetxml2oai.sef.json'); // tests // const xslPath = 'assets/datasetxml2oai-pmh.xslt'; // Replace with the actual path to your XSLT file // this.proc = readFileSync(xslPath, 'utf-8'); // this.configuration = new Configuration(); dayjs.extend(utc); dayjs.extend(timezone); } public async index({ response, request }: HttpContextContract): Promise { this.xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, ''); // this.proc = new XSLTProcessor(); // const stylesheet = readFileSync(__dirname + "/datasetxml2oai.sef.json"); const xsltParameter = (this.xsltParameter = {}); let oaiRequest: Dictionary = {}; if (request.method() === 'POST') { oaiRequest = request.body(); } else if (request.method() === 'GET') { oaiRequest = request.qs(); } else { xsltParameter['oai_error_code'] = 'unknown'; xsltParameter['oai_error_message'] = 'Only POST and GET methods are allowed for OAI-PMH.'; } // const oaiRequest: OaiParameter = request.body; try { await this.handleRequest(oaiRequest, request); } catch (error) { if (error instanceof OaiModelException) { const code = error.oaiCode; let oaiErrorCode: string | undefined = 'Unknown oai error code ' + code; if (OaiModelError.has(error.oaiCode) && OaiModelError.get(code) !== undefined) { oaiErrorCode = OaiModelError.get(error.oaiCode); } this.xsltParameter['oai_error_code'] = oaiErrorCode; this.xsltParameter['oai_error_message'] = error.message; } else { // // return next(error); // passing to default express middleware error handler this.xsltParameter['oai_error_code'] = 'unknown'; this.xsltParameter['oai_error_message'] = 'An internal error occured.'; } } const xmlString = this.xml.end({ prettyPrint: true }); let xmlOutput; try { const result = await transform({ // stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`, stylesheetText: this.proc, destination: 'serialized', // sourceFileName: sourceFile, sourceText: xmlString, stylesheetParams: xsltParameter, // logLevel: 10, }); xmlOutput = result.principalResult; } catch (error) { return response.status(500).json({ message: 'An error occurred while creating the user', error: error.message, }); } response .header('Content-Type', 'application/xml; charset=utf-8') .header('Access-Control-Allow-Origin', '*') .header('Access-Control-Allow-Methods', 'GET,POST'); response.status(StatusCodes.OK).send(xmlOutput); } protected async handleRequest(oaiRequest: Dictionary, request: RequestContract) { // Setup stylesheet // $this->loadStyleSheet('datasetxml2oai-pmh.xslt'); // Set response time const now: Dayjs = dayjs(); this.xsltParameter['responseDate'] = now.format('YYYY-MM-DDTHH:mm:ss[Z]'); this.xsltParameter['unixTimestamp'] = now.unix(); // set OAI base url const baseDomain = process.env.BASE_DOMAIN || 'localhost'; this.xsltParameter['baseURL'] = baseDomain + '/oai'; this.xsltParameter['repURL'] = request.protocol() + '://' + request.hostname(); this.xsltParameter['downloadLink'] = request.protocol() + '://' + request.hostname() + '/file/download/'; this.xsltParameter['doiLink'] = 'https://doi.org/'; this.xsltParameter['doiPrefix'] = 'info:eu-repo/semantics/altIdentifier/doi/'; if (oaiRequest['verb']) { const verb = oaiRequest['verb']; this.xsltParameter['oai_verb'] = verb; if (verb === 'Identify') { this.handleIdentify(); } else if (verb === 'ListMetadataFormats') { this.handleListMetadataFormats(); } else if (verb == 'GetRecord') { await this.handleGetRecord(oaiRequest); } // else if (verb == "ListRecords") { // await this.handleListRecords(oaiRequest); // } else if (verb == "ListIdentifiers") { // await this.handleListIdentifiers(oaiRequest); // } else if (verb == 'ListSets') { await this.handleListSets(); } else { this.handleIllegalVerb(); } } else { // // try { // // console.log("Async code example.") // const err = new PageNotFoundException("verb not found"); // throw err; // // } catch (error) { // manually catching // // next(error); // passing to default middleware error handler // // } throw new OaiModelException( StatusCodes.INTERNAL_SERVER_ERROR, 'The verb provided in the request is illegal.', OaiErrorCodes.BADVERB, ); } } protected handleIdentify() { const email = process.env.OAI_EMAIL || 'repository@geosphere.at'; const repositoryName = 'Tethys RDR'; const repIdentifier = 'tethys.at'; const sampleIdentifier = 'oai:' + repIdentifier + ':1'; //$this->_configuration->getSampleIdentifier(); // Dataset::earliestPublicationDate()->server_date_published->format('Y-m-d\TH:i:s\Z') : null; // earliestDateFromDb!= null && (this.xsltParameter['earliestDatestamp'] = earliestDateFromDb?.server_date_published); // set parameters for oai-pmh.xslt this.xsltParameter['email'] = email; this.xsltParameter['repositoryName'] = repositoryName; this.xsltParameter['repIdentifier'] = repIdentifier; this.xsltParameter['sampleIdentifier'] = sampleIdentifier; // $this->proc->setParameter('', 'earliestDatestamp', $earliestDateFromDb); this.xml.root().ele('Datasets'); } protected handleListMetadataFormats() { this.xml.root().ele('Datasets'); } protected async handleListSets() { const repIdentifier = 'tethys.at'; this.xsltParameter['repIdentifier'] = repIdentifier; const datasetElement = this.xml.root().ele('Datasets'); const sets: { [key: string]: string } = { 'open_access': 'Set for open access licenses', 'doc-type:ResearchData': 'Set for document type ResearchData', // ...(await this.getSetsForDatasetTypes()), ...(await this.getSetsForCollections()), // ... await this.getSetsForProjects(), } as Dictionary; for (const [key, value] of Object.entries(sets)) { const setElement = datasetElement.ele('Rdr_Sets'); setElement.att('Type', key); setElement.att('TypeName', value); } } protected async handleGetRecord(oaiRequest: Dictionary) { const repIdentifier = 'tethys.at'; this.xsltParameter['repIdentifier'] = repIdentifier; const dataId = this.validateAndGetIdentifier(oaiRequest); const dataset = await Dataset.query().where('publish_id', dataId).preload('xmlCache').preload('collections').first(); if (!dataset || !dataset.publish_id) { throw new OaiModelException( StatusCodes.INTERNAL_SERVER_ERROR, 'The value of the identifier argument is unknown or illegal in this repository.', OaiErrorCodes.IDDOESNOTEXIST, ); } const metadataPrefix = this.validateAndGetMetadataPrefix(oaiRequest); this.xsltParameter['oai_metadataPrefix'] = metadataPrefix; // do not deliver datasets which are restricted by document state defined in deliveringStates this.validateDatasetState(dataset); // add xml elements const datasetNode = this.xml.root().ele('Datasets'); await this.createXmlRecord(dataset, datasetNode); } private validateAndGetIdentifier(oaiRequest: Dictionary): number { // Identifier references metadata Urn, not plain Id! // Currently implemented as 'oai:foo.bar.de:{docId}' or 'urn:nbn...-123' if (!('identifier' in oaiRequest)) { throw new BadOaiModelException('The prefix of the identifier argument is unknown.'); } const dataId = Number(this.getDocumentIdByIdentifier(oaiRequest.identifier)); if (isNaN(dataId)) { throw new OaiModelException( StatusCodes.INTERNAL_SERVER_ERROR, 'The value of the identifier argument is illegal in this repository.', OaiErrorCodes.BADARGUMENT, ); } return dataId; } private validateAndGetMetadataPrefix(oaiRequest: Dictionary): string { let metadataPrefix = ''; if ('metadataPrefix' in oaiRequest) { metadataPrefix = oaiRequest['metadataPrefix']; } else { throw new OaiModelException( StatusCodes.INTERNAL_SERVER_ERROR, 'The prefix of the metadata argument is unknown.', OaiErrorCodes.BADARGUMENT, ); } return metadataPrefix; } private validateDatasetState(dataset: Dataset): void { if (dataset.server_state == null || !this.deliveringDocumentStates.includes(dataset.server_state)) { throw new OaiModelException( StatusCodes.INTERNAL_SERVER_ERROR, 'Document is not available for OAI export!', OaiErrorCodes.NORECORDSMATCH, ); } } private async createXmlRecord(dataset: Dataset, datasetNode: XMLBuilder) { const domNode = await this.getDatasetXmlDomNode(dataset); if (domNode) { // add frontdoor url and data-type dataset.publish_id && this.addLandingPageAttribute(domNode, dataset.publish_id.toString()); this.addSpecInformation(domNode, 'data-type:' + dataset.type); // if (dataset.collections) { // for (const coll of dataset.collections) { // const collRole = await coll.getCollectionRole(); // this.addSpecInformation(domNode, collRole.oai_name + ':' + coll.number); // } // } datasetNode.import(domNode); } } private async getDatasetXmlDomNode(dataset: Dataset) { const xmlModel = new XmlModel(dataset); // xmlModel.setModel(dataset); xmlModel.excludeEmptyFields(); xmlModel.caching = true; // const cache = dataset.xmlCache ? dataset.xmlCache : null; // dataset.load('xmlCache'); if (dataset.xmlCache) { xmlModel.xmlCache = dataset.xmlCache; } // return cache.getDomDocument(); const domDocument: XMLBuilder | null = await xmlModel.getDomDocument(); return domDocument; } private addSpecInformation(domNode: XMLBuilder, information: string) { domNode.ele('SetSpec').att('Value', information); } private addLandingPageAttribute(domNode: XMLBuilder, dataid: string) { const baseDomain = process.env.BASE_DOMAIN || 'localhost'; const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid; // add attribute du dataset xml element domNode.att('landingpage', url); } private getDocumentIdByIdentifier(oaiIdentifier: string): string { const identifierParts: string[] = oaiIdentifier.split(':'); // explode(":", $oaiIdentifier); const dataId: string = identifierParts[2]; // switch (identifierParts[0]) { // case 'oai': // if (isset($identifierParts[2])) { // $dataId = $identifierParts[2]; // } // break; // default: // throw new OaiModelException( // 'The prefix of the identifier argument is unknown.', // OaiModelError::BADARGUMENT // ); // break; // } // if (empty($dataId) or !preg_match('/^\d+$/', $dataId)) { // throw new OaiModelException( // 'The value of the identifier argument is unknown or illegal in this repository.', // OaiModelError::IDDOESNOTEXIST // ); return dataId; } private async getSetsForCollections(): Promise { const sets: { [key: string]: string } = {} as Dictionary; const collections = await Collection.query() .select('name', 'number', 'role_id') .whereHas('collectionRole', (query) => { query.where('visible_oai', true); }) .preload('collectionRole'); collections.forEach((collection) => { // if collection has a collection role (classification like ddc): if (collection.number) { // collection.load('collectionRole'); const setSpec = collection.collectionRole?.oai_name + ':' + collection.number; sets[setSpec] = `Set ${collection.number} '${collection.name}'`; } }); return sets; } // private async getSetsForDatasetTypes(): Promise { // const sets: { [key: string]: string } = {} as IDictionary; // const datasets: Array = await Dataset.findAll({ // attributes: ["type"], // where: { server_state: { [Sequelize.Op.eq]: "published" } }, // }); // datasets.forEach((dataset) => { // if (dataset.type && false == preg_match(this.sampleRegEx, dataset.type)) { // const msg = `Invalid SetSpec (data-type='${dataset.type}'). // Allowed characters are [${this.sampleRegEx}].`; // Logger.err(`OAI: ${msg}`); // // Log::error("OAI-PMH: $msg"); // return; // } // const setSpec = "data-type:" + dataset.type; // sets[setSpec] = `Set for document type '${dataset.type}'`; // }); // return sets; // } private handleIllegalVerb() { this.xsltParameter['oai_error_code'] = 'badVerb'; this.xsltParameter['oai_error_message'] = 'The verb provided in the request is illegal.'; } }