forked from geolba/tethys.backend
Arno Kaimbacher
7915f66dd6
- new classes TokenWorkerService.ts, TokenWorker.ts and ResumptionToken.ts for using REDIS with paging OAI results - deletd public/asstes2/langCodeMap.xml: integrated it directly in datasetxml2oai-pmh.xslt - added redis npm package - added TokenWorkerProvider.ts for using singleton of TokenWorkerService inside OaiController.ts - added config/oai.ts for oai related configs from .env-file - adapted XmlModel.ts for grting domDocument from database
639 lines
26 KiB
TypeScript
639 lines
26 KiB
TypeScript
import type { HttpContextContract } from '@ioc:Adonis/Core/HttpContext';
|
|
import { RequestContract } from '@ioc:Adonis/Core/Request';
|
|
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces';
|
|
import { create } from 'xmlbuilder2';
|
|
import dayjs, { Dayjs } from 'dayjs';
|
|
import utc from 'dayjs/plugin/utc';
|
|
import timezone from 'dayjs/plugin/timezone';
|
|
import { readFileSync } from 'fs';
|
|
import { StatusCodes } from 'http-status-codes';
|
|
import { transform } from 'saxon-js';
|
|
// import { Xslt, xmlParse } from 'xslt-processor'
|
|
import { OaiErrorCodes, OaiModelError } from 'App/Exceptions/OaiErrorCodes';
|
|
import { OaiModelException, BadOaiModelException } from 'App/Exceptions/OaiModelException';
|
|
import Dataset from 'App/Models/Dataset';
|
|
import Collection from 'App/Models/Collection';
|
|
import { getDomain, preg_match } from 'App/Utils/utility-functions';
|
|
import XmlModel from 'App/Library/XmlModel';
|
|
import Logger from '@ioc:Adonis/Core/Logger';
|
|
import ResumptionToken from 'App/Library/Oai/ResumptionToken';
|
|
import { ModelQueryBuilderContract } from '@ioc:Adonis/Lucid/Orm';
|
|
import Config from '@ioc:Adonis/Core/Config';
|
|
import { inject } from '@adonisjs/fold';
|
|
// import { TokenWorkerContract } from "MyApp/Models/TokenWorker";
|
|
import TokenWorkerContract from 'App/Library/Oai/TokenWorker';
|
|
|
|
interface XslTParameter {
|
|
[key: string]: any;
|
|
}
|
|
|
|
interface Dictionary {
|
|
[index: string]: string;
|
|
}
|
|
|
|
interface ListParameter {
|
|
cursor: number;
|
|
totalIds: number;
|
|
start: number;
|
|
reldocIds: (number | null)[];
|
|
metadataPrefix: string;
|
|
}
|
|
|
|
@inject(['App/Library/Oai/TokenWorkerContract'])
|
|
export default class OaiController {
|
|
private deliveringDocumentStates = ['published', 'deleted'];
|
|
private sampleRegEx = /^[A-Za-zäüÄÜß0-9\-_.!~]+$/;
|
|
private xsltParameter: XslTParameter;
|
|
|
|
/**
|
|
* Holds xml representation of document information to be processed.
|
|
*
|
|
* @var xmlbuilder.XMLDocument | null Defaults to null.
|
|
*/
|
|
private xml: XMLBuilder;
|
|
private proc;
|
|
|
|
constructor(public tokenWorker: TokenWorkerContract) {
|
|
// Load the XSLT file
|
|
this.proc = readFileSync('public/assets2/datasetxml2oai.sef.json');
|
|
dayjs.extend(utc);
|
|
dayjs.extend(timezone);
|
|
}
|
|
|
|
public async index({ response, request }: HttpContextContract): Promise<void> {
|
|
this.xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
|
|
|
|
// this.proc = new XSLTProcessor();
|
|
// const stylesheet = readFileSync(__dirname + "/datasetxml2oai.sef.json");
|
|
const xsltParameter = (this.xsltParameter = {});
|
|
|
|
let oaiRequest: Dictionary = {};
|
|
if (request.method() === 'POST') {
|
|
oaiRequest = request.body();
|
|
} else if (request.method() === 'GET') {
|
|
oaiRequest = request.qs();
|
|
} else {
|
|
xsltParameter['oai_error_code'] = 'unknown';
|
|
xsltParameter['oai_error_message'] = 'Only POST and GET methods are allowed for OAI-PMH.';
|
|
}
|
|
|
|
let earliestDateFromDb;
|
|
// const oaiRequest: OaiParameter = request.body;
|
|
try {
|
|
const firstPublishedDataset: Dataset | null = await Dataset.earliestPublicationDate();
|
|
firstPublishedDataset != null &&
|
|
(earliestDateFromDb = firstPublishedDataset.server_date_published.toFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"));
|
|
this.xsltParameter['earliestDatestamp'] = earliestDateFromDb;
|
|
// start the request
|
|
await this.handleRequest(oaiRequest, request);
|
|
} catch (error) {
|
|
if (error instanceof OaiModelException) {
|
|
const code = error.oaiCode;
|
|
let oaiErrorCode: string | undefined = 'Unknown oai error code ' + code;
|
|
if (OaiModelError.has(error.oaiCode) && OaiModelError.get(code) !== undefined) {
|
|
oaiErrorCode = OaiModelError.get(error.oaiCode);
|
|
}
|
|
this.xsltParameter['oai_error_code'] = oaiErrorCode;
|
|
this.xsltParameter['oai_error_message'] = error.message;
|
|
} else {
|
|
// // return next(error); // passing to default express middleware error handler
|
|
this.xsltParameter['oai_error_code'] = 'unknown';
|
|
this.xsltParameter['oai_error_message'] = 'An internal error occured.';
|
|
}
|
|
}
|
|
|
|
const xmlString = this.xml.end({ prettyPrint: true });
|
|
|
|
let xmlOutput; // = xmlString;
|
|
try {
|
|
const result = await transform({
|
|
// stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`,
|
|
stylesheetText: this.proc,
|
|
destination: 'serialized',
|
|
// sourceFileName: sourceFile,
|
|
sourceText: xmlString,
|
|
stylesheetParams: xsltParameter,
|
|
// logLevel: 10,
|
|
});
|
|
xmlOutput = result.principalResult;
|
|
} catch (error) {
|
|
return response.status(500).json({
|
|
message: 'An error occurred while creating the user',
|
|
error: error.message,
|
|
});
|
|
}
|
|
|
|
response
|
|
.header('Content-Type', 'application/xml; charset=utf-8')
|
|
.header('Access-Control-Allow-Origin', '*')
|
|
.header('Access-Control-Allow-Methods', 'GET,POST');
|
|
response.status(StatusCodes.OK).send(xmlOutput);
|
|
}
|
|
|
|
protected async handleRequest(oaiRequest: Dictionary, request: RequestContract) {
|
|
// Setup stylesheet
|
|
// $this->loadStyleSheet('datasetxml2oai-pmh.xslt');
|
|
|
|
// Set response time
|
|
const now: Dayjs = dayjs();
|
|
this.xsltParameter['responseDate'] = now.format('YYYY-MM-DDTHH:mm:ss[Z]');
|
|
this.xsltParameter['unixTimestamp'] = now.unix();
|
|
|
|
// set OAI base url
|
|
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
|
|
this.xsltParameter['baseURL'] = baseDomain + '/oai';
|
|
this.xsltParameter['repURL'] = request.protocol() + '://' + request.hostname();
|
|
this.xsltParameter['downloadLink'] = request.protocol() + '://' + request.hostname() + '/file/download/';
|
|
this.xsltParameter['doiLink'] = 'https://doi.org/';
|
|
this.xsltParameter['doiPrefix'] = 'info:eu-repo/semantics/altIdentifier/doi/';
|
|
|
|
if (oaiRequest['verb']) {
|
|
const verb = oaiRequest['verb'];
|
|
this.xsltParameter['oai_verb'] = verb;
|
|
if (verb === 'Identify') {
|
|
this.handleIdentify();
|
|
} else if (verb === 'ListMetadataFormats') {
|
|
this.handleListMetadataFormats();
|
|
} else if (verb == 'GetRecord') {
|
|
await this.handleGetRecord(oaiRequest);
|
|
} else if (verb == 'ListRecords') {
|
|
await this.handleListRecords(oaiRequest);
|
|
} else if (verb == 'ListIdentifiers') {
|
|
await this.handleListIdentifiers(oaiRequest);
|
|
} else if (verb == 'ListSets') {
|
|
await this.handleListSets();
|
|
} else {
|
|
this.handleIllegalVerb();
|
|
}
|
|
} else {
|
|
// // try {
|
|
// // console.log("Async code example.")
|
|
// const err = new PageNotFoundException("verb not found");
|
|
// throw err;
|
|
// // } catch (error) { // manually catching
|
|
// // next(error); // passing to default middleware error handler
|
|
// // }
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'The verb provided in the request is illegal.',
|
|
OaiErrorCodes.BADVERB,
|
|
);
|
|
}
|
|
}
|
|
|
|
protected handleIdentify() {
|
|
const email = process.env.OAI_EMAIL || 'repository@geosphere.at';
|
|
const repositoryName = 'Tethys RDR';
|
|
const repIdentifier = 'tethys.at';
|
|
const sampleIdentifier = 'oai:' + repIdentifier + ':1'; //$this->_configuration->getSampleIdentifier();
|
|
|
|
// Dataset::earliestPublicationDate()->server_date_published->format('Y-m-d\TH:i:s\Z') : null;
|
|
// earliestDateFromDb!= null && (this.xsltParameter['earliestDatestamp'] = earliestDateFromDb?.server_date_published);
|
|
|
|
// set parameters for oai-pmh.xslt
|
|
this.xsltParameter['email'] = email;
|
|
this.xsltParameter['repositoryName'] = repositoryName;
|
|
this.xsltParameter['repIdentifier'] = repIdentifier;
|
|
this.xsltParameter['sampleIdentifier'] = sampleIdentifier;
|
|
// $this->proc->setParameter('', 'earliestDatestamp', $earliestDateFromDb);
|
|
|
|
this.xml.root().ele('Datasets');
|
|
}
|
|
|
|
protected handleListMetadataFormats() {
|
|
this.xml.root().ele('Datasets');
|
|
}
|
|
|
|
protected async handleListSets() {
|
|
const repIdentifier = 'tethys.at';
|
|
this.xsltParameter['repIdentifier'] = repIdentifier;
|
|
const datasetElement = this.xml.root().ele('Datasets');
|
|
|
|
const sets: { [key: string]: string } = {
|
|
'open_access': 'Set for open access licenses',
|
|
'doc-type:ResearchData': 'Set for document type ResearchData',
|
|
...(await this.getSetsForDatasetTypes()),
|
|
...(await this.getSetsForCollections()),
|
|
// ... await this.getSetsForProjects(),
|
|
} as Dictionary;
|
|
|
|
for (const [key, value] of Object.entries(sets)) {
|
|
const setElement = datasetElement.ele('Rdr_Sets');
|
|
setElement.att('Type', key);
|
|
setElement.att('TypeName', value);
|
|
}
|
|
}
|
|
|
|
protected async handleGetRecord(oaiRequest: Dictionary) {
|
|
const repIdentifier = 'tethys.at';
|
|
this.xsltParameter['repIdentifier'] = repIdentifier;
|
|
|
|
const dataId = this.validateAndGetIdentifier(oaiRequest);
|
|
const dataset = await Dataset.query()
|
|
.where('publish_id', dataId)
|
|
.preload('xmlCache')
|
|
.preload('collections', (builder) => {
|
|
builder.preload('collectionRole');
|
|
})
|
|
.first();
|
|
|
|
if (!dataset || !dataset.publish_id) {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'The value of the identifier argument is unknown or illegal in this repository.',
|
|
OaiErrorCodes.IDDOESNOTEXIST,
|
|
);
|
|
}
|
|
|
|
const metadataPrefix = this.validateAndGetMetadataPrefix(oaiRequest);
|
|
this.xsltParameter['oai_metadataPrefix'] = metadataPrefix;
|
|
// do not deliver datasets which are restricted by document state defined in deliveringStates
|
|
this.validateDatasetState(dataset);
|
|
|
|
// add xml elements
|
|
const datasetNode = this.xml.root().ele('Datasets');
|
|
await this.createXmlRecord(dataset, datasetNode);
|
|
}
|
|
|
|
protected async handleListIdentifiers(oaiRequest: Dictionary) {
|
|
!this.tokenWorker.isConnected && (await this.tokenWorker.connect());
|
|
|
|
const maxIdentifier: number = Config.get('oai.max.listidentifiers', 100);
|
|
await this.handleLists(oaiRequest, maxIdentifier);
|
|
}
|
|
|
|
protected async handleListRecords(oaiRequest) {
|
|
!this.tokenWorker.isConnected && (await this.tokenWorker.connect());
|
|
|
|
const maxRecords: number = Config.get('oai.max.listrecords', 100);
|
|
await this.handleLists(oaiRequest, maxRecords);
|
|
}
|
|
|
|
private async handleLists(oaiRequest: Dictionary, maxRecords: number) {
|
|
maxRecords = maxRecords || 100;
|
|
const repIdentifier = 'tethys.at';
|
|
this.xsltParameter['repIdentifier'] = repIdentifier;
|
|
const datasetNode = this.xml.root().ele('Datasets');
|
|
|
|
// list initialisation
|
|
const numWrapper: ListParameter = {
|
|
cursor: 0,
|
|
totalIds: 0,
|
|
start: maxRecords + 1,
|
|
reldocIds: [],
|
|
metadataPrefix: '',
|
|
};
|
|
|
|
// resumptionToken is defined
|
|
if ('resumptionToken' in oaiRequest) {
|
|
await this.handleResumptionToken(oaiRequest, maxRecords, numWrapper);
|
|
} else {
|
|
// no resumptionToken is given
|
|
await this.handleNoResumptionToken(oaiRequest, numWrapper);
|
|
}
|
|
|
|
// handling of document ids
|
|
const restIds = numWrapper.reldocIds as number[];
|
|
const workIds = restIds.splice(0, maxRecords) as number[]; // array_splice(restIds, 0, maxRecords);
|
|
|
|
// no records returned
|
|
if (workIds.length == 0) {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'The combination of the given values results in an empty list.',
|
|
OaiErrorCodes.NORECORDSMATCH,
|
|
);
|
|
}
|
|
|
|
const datasets: Dataset[] = await Dataset.query()
|
|
.whereIn('publish_id', workIds)
|
|
.preload('xmlCache')
|
|
.preload('collections', (builder) => {
|
|
builder.preload('collectionRole');
|
|
})
|
|
.orderBy('publish_id');
|
|
|
|
for (const dataset of datasets) {
|
|
await this.createXmlRecord(dataset, datasetNode);
|
|
}
|
|
|
|
// store the further Ids in a resumption-file
|
|
const countRestIds = restIds.length; //84
|
|
if (countRestIds > 0) {
|
|
const token = new ResumptionToken();
|
|
token.startPosition = numWrapper.start; //101
|
|
token.totalIds = numWrapper.totalIds; //184
|
|
token.documentIds = restIds; //101 -184
|
|
token.metadataPrefix = numWrapper.metadataPrefix;
|
|
|
|
// $tokenWorker->storeResumptionToken($token);
|
|
const res: string = await this.tokenWorker.set(token);
|
|
|
|
// set parameters for the resumptionToken-node
|
|
// const res = token.ResumptionId;
|
|
this.setParamResumption(res, numWrapper.cursor, numWrapper.totalIds);
|
|
}
|
|
}
|
|
|
|
private async handleResumptionToken(oaiRequest: Dictionary, maxRecords: number, numWrapper) {
|
|
const resParam = oaiRequest['resumptionToken']; //e.g. "158886496600000"
|
|
const token = await this.tokenWorker.get(resParam);
|
|
|
|
if (!token) {
|
|
throw new OaiModelException(StatusCodes.INTERNAL_SERVER_ERROR, 'cache is outdated.', OaiErrorCodes.BADRESUMPTIONTOKEN);
|
|
}
|
|
|
|
numWrapper.cursor = token.startPosition - 1; //startet dann bei Index 10
|
|
numWrapper.start = token.startPosition + maxRecords;
|
|
numWrapper.totalIds = token.totalIds;
|
|
numWrapper.reldocIds = token.documentIds;
|
|
numWrapper.metadataPrefix = token.metadataPrefix;
|
|
|
|
this.xsltParameter['oai_metadataPrefix'] = numWrapper.metadataPrefix;
|
|
}
|
|
|
|
private async handleNoResumptionToken(oaiRequest: Dictionary, numWrapper) {
|
|
// no resumptionToken is given
|
|
if ('metadataPrefix' in oaiRequest) {
|
|
numWrapper.metadataPrefix = oaiRequest['metadataPrefix'];
|
|
} else {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'The prefix of the metadata argument is unknown.',
|
|
OaiErrorCodes.BADARGUMENT,
|
|
);
|
|
}
|
|
this.xsltParameter['oai_metadataPrefix'] = numWrapper.metadataPrefix;
|
|
|
|
let finder: ModelQueryBuilderContract<typeof Dataset, Dataset> = Dataset.query();
|
|
// add server state restrictions
|
|
finder.whereIn('server_state', this.deliveringDocumentStates);
|
|
if ('set' in oaiRequest) {
|
|
const set = oaiRequest['set'] as string;
|
|
const setArray = set.split(':');
|
|
|
|
if (setArray[0] == 'data-type') {
|
|
if (setArray.length == 2 && setArray[1]) {
|
|
finder.where('type', setArray[1]);
|
|
}
|
|
} else if (setArray[0] == 'open_access') {
|
|
const openAccessLicences = ['CC-BY-4.0', 'CC-BY-SA-4.0'];
|
|
finder.andWhereHas('licenses', (query) => {
|
|
query.whereIn('name', openAccessLicences);
|
|
});
|
|
} else if (setArray[0] == 'ddc') {
|
|
if (setArray.length == 2 && setArray[1] != '') {
|
|
finder.andWhereHas('collections', (query) => {
|
|
query.where('number', setArray[1]);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// const timeZone = "Europe/Vienna"; // Canonical time zone name
|
|
// &from=2020-09-03&until2020-09-03
|
|
// &from=2020-09-11&until=2021-05-11
|
|
if ('from' in oaiRequest && 'until' in oaiRequest) {
|
|
const from = oaiRequest['from'] as string;
|
|
let fromDate = dayjs(from); //.tz(timeZone);
|
|
const until = oaiRequest['until'] as string;
|
|
let untilDate = dayjs(until); //.tz(timeZone);
|
|
if (!fromDate.isValid() || !untilDate.isValid()) {
|
|
throw new OaiModelException(StatusCodes.INTERNAL_SERVER_ERROR, 'Date Parameter is not valid.', OaiErrorCodes.BADARGUMENT);
|
|
}
|
|
fromDate = dayjs.tz(from, 'Europe/Vienna');
|
|
untilDate = dayjs.tz(until, 'Europe/Vienna');
|
|
|
|
if (from.length != until.length) {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'The request has different granularities for the from and until parameters.',
|
|
OaiErrorCodes.BADARGUMENT,
|
|
);
|
|
}
|
|
fromDate.hour() == 0 && (fromDate = fromDate.startOf('day'));
|
|
untilDate.hour() == 0 && (untilDate = untilDate.endOf('day'));
|
|
|
|
finder.whereBetween('server_date_published', [fromDate.format('YYYY-MM-DD HH:mm:ss'), untilDate.format('YYYY-MM-DD HH:mm:ss')]);
|
|
} else if ('from' in oaiRequest && !('until' in oaiRequest)) {
|
|
const from = oaiRequest['from'] as string;
|
|
let fromDate = dayjs(from);
|
|
if (!fromDate.isValid()) {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'From date parameter is not valid.',
|
|
OaiErrorCodes.BADARGUMENT,
|
|
);
|
|
}
|
|
fromDate = dayjs.tz(from, 'Europe/Vienna');
|
|
fromDate.hour() == 0 && (fromDate = fromDate.startOf('day'));
|
|
|
|
const now = dayjs();
|
|
if (fromDate.isAfter(now)) {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'Given from date is greater than now. The given values results in an empty list.',
|
|
OaiErrorCodes.NORECORDSMATCH,
|
|
);
|
|
} else {
|
|
finder.andWhere('server_date_published', '>=', fromDate.format('YYYY-MM-DD HH:mm:ss'));
|
|
}
|
|
} else if (!('from' in oaiRequest) && 'until' in oaiRequest) {
|
|
const until = oaiRequest['until'] as string;
|
|
let untilDate = dayjs(until);
|
|
if (!untilDate.isValid()) {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'Until date parameter is not valid.',
|
|
OaiErrorCodes.BADARGUMENT,
|
|
);
|
|
}
|
|
untilDate = dayjs.tz(until, 'Europe/Vienna');
|
|
untilDate.hour() == 0 && (untilDate = untilDate.endOf('day'));
|
|
|
|
const firstPublishedDataset: Dataset = (await Dataset.earliestPublicationDate()) as Dataset;
|
|
const earliestPublicationDate = dayjs(firstPublishedDataset.server_date_published.toISO()); //format("YYYY-MM-DDThh:mm:ss[Z]"));
|
|
if (earliestPublicationDate.isAfter(untilDate)) {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
`earliestDatestamp is greater than given until date.
|
|
The given values results in an empty list.`,
|
|
OaiErrorCodes.NORECORDSMATCH,
|
|
);
|
|
} else {
|
|
finder.andWhere('server_date_published', '<=', untilDate.format('YYYY-MM-DD HH:mm:ss'));
|
|
}
|
|
}
|
|
|
|
let reldocIdsDocs = await finder.select('publish_id').orderBy('publish_id');
|
|
numWrapper.reldocIds = reldocIdsDocs.map((dat) => dat.publish_id);
|
|
numWrapper.totalIds = numWrapper.reldocIds.length; //212
|
|
}
|
|
|
|
private setParamResumption(res: string, cursor: number, totalIds: number) {
|
|
const tomorrow = dayjs().add(1, 'day').format('YYYY-MM-DDThh:mm:ss[Z]');
|
|
this.xsltParameter['dateDelete'] = tomorrow;
|
|
this.xsltParameter['res'] = res;
|
|
this.xsltParameter['cursor'] = cursor;
|
|
this.xsltParameter['totalIds'] = totalIds;
|
|
}
|
|
|
|
private validateAndGetIdentifier(oaiRequest: Dictionary): number {
|
|
// Identifier references metadata Urn, not plain Id!
|
|
// Currently implemented as 'oai:foo.bar.de:{docId}' or 'urn:nbn...-123'
|
|
if (!('identifier' in oaiRequest)) {
|
|
throw new BadOaiModelException('The prefix of the identifier argument is unknown.');
|
|
}
|
|
const dataId = Number(this.getDocumentIdByIdentifier(oaiRequest.identifier));
|
|
if (isNaN(dataId)) {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'The value of the identifier argument is illegal in this repository.',
|
|
OaiErrorCodes.BADARGUMENT,
|
|
);
|
|
}
|
|
return dataId;
|
|
}
|
|
|
|
private validateAndGetMetadataPrefix(oaiRequest: Dictionary): string {
|
|
let metadataPrefix = '';
|
|
if ('metadataPrefix' in oaiRequest) {
|
|
metadataPrefix = oaiRequest['metadataPrefix'];
|
|
} else {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'The prefix of the metadata argument is unknown.',
|
|
OaiErrorCodes.BADARGUMENT,
|
|
);
|
|
}
|
|
return metadataPrefix;
|
|
}
|
|
|
|
private validateDatasetState(dataset: Dataset): void {
|
|
if (dataset.server_state == null || !this.deliveringDocumentStates.includes(dataset.server_state)) {
|
|
throw new OaiModelException(
|
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
|
'Document is not available for OAI export!',
|
|
OaiErrorCodes.NORECORDSMATCH,
|
|
);
|
|
}
|
|
}
|
|
|
|
private async createXmlRecord(dataset: Dataset, datasetNode: XMLBuilder) {
|
|
const domNode = await this.getDatasetXmlDomNode(dataset);
|
|
|
|
if (domNode) {
|
|
// add frontdoor url and data-type
|
|
dataset.publish_id && this.addLandingPageAttribute(domNode, dataset.publish_id.toString());
|
|
this.addSpecInformation(domNode, 'data-type:' + dataset.type);
|
|
|
|
if (dataset.collections) {
|
|
for (const coll of dataset.collections) {
|
|
const collRole = coll.collectionRole;
|
|
this.addSpecInformation(domNode, collRole.oai_name + ':' + coll.number);
|
|
}
|
|
}
|
|
|
|
datasetNode.import(domNode);
|
|
}
|
|
}
|
|
|
|
private async getDatasetXmlDomNode(dataset: Dataset) {
|
|
const xmlModel = new XmlModel(dataset);
|
|
// xmlModel.setModel(dataset);
|
|
xmlModel.excludeEmptyFields();
|
|
xmlModel.caching = true;
|
|
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
|
|
// dataset.load('xmlCache');
|
|
if (dataset.xmlCache) {
|
|
xmlModel.xmlCache = dataset.xmlCache;
|
|
}
|
|
|
|
// return cache.getDomDocument();
|
|
const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
|
|
return domDocument;
|
|
}
|
|
|
|
private addSpecInformation(domNode: XMLBuilder, information: string) {
|
|
domNode.ele('SetSpec').att('Value', information);
|
|
}
|
|
|
|
private addLandingPageAttribute(domNode: XMLBuilder, dataid: string) {
|
|
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
|
|
const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid;
|
|
// add attribute du dataset xml element
|
|
domNode.att('landingpage', url);
|
|
}
|
|
|
|
private getDocumentIdByIdentifier(oaiIdentifier: string): string {
|
|
const identifierParts: string[] = oaiIdentifier.split(':'); // explode(":", $oaiIdentifier);
|
|
const dataId: string = identifierParts[2];
|
|
// switch (identifierParts[0]) {
|
|
// case 'oai':
|
|
// if (isset($identifierParts[2])) {
|
|
// $dataId = $identifierParts[2];
|
|
// }
|
|
// break;
|
|
// default:
|
|
// throw new OaiModelException(
|
|
// 'The prefix of the identifier argument is unknown.',
|
|
// OaiModelError::BADARGUMENT
|
|
// );
|
|
// break;
|
|
// }
|
|
|
|
// if (empty($dataId) or !preg_match('/^\d+$/', $dataId)) {
|
|
// throw new OaiModelException(
|
|
// 'The value of the identifier argument is unknown or illegal in this repository.',
|
|
// OaiModelError::IDDOESNOTEXIST
|
|
// );
|
|
|
|
return dataId;
|
|
}
|
|
|
|
private async getSetsForCollections(): Promise<Dictionary> {
|
|
const sets: { [key: string]: string } = {} as Dictionary;
|
|
|
|
const collections = await Collection.query()
|
|
.select('name', 'number', 'role_id')
|
|
.whereHas('collectionRole', (query) => {
|
|
query.where('visible_oai', true);
|
|
})
|
|
.preload('collectionRole');
|
|
|
|
collections.forEach((collection) => {
|
|
// if collection has a collection role (classification like ddc):
|
|
if (collection.number) {
|
|
// collection.load('collectionRole');
|
|
const setSpec = collection.collectionRole?.oai_name + ':' + collection.number;
|
|
sets[setSpec] = `Set ${collection.number} '${collection.name}'`;
|
|
}
|
|
});
|
|
return sets;
|
|
}
|
|
|
|
private async getSetsForDatasetTypes(): Promise<Dictionary> {
|
|
const sets: { [key: string]: string } = {} as Dictionary;
|
|
|
|
const datasets: Array<Dataset> = await Dataset.query().select('type').where('server_state', 'published');
|
|
|
|
datasets.forEach((dataset) => {
|
|
if (dataset.type && false == preg_match(this.sampleRegEx, dataset.type)) {
|
|
const msg = `Invalid SetSpec (data-type='${dataset.type}').
|
|
Allowed characters are [${this.sampleRegEx}].`;
|
|
// Log::error("OAI-PMH: $msg");
|
|
Logger.error(`OAI-PMH: ${msg}`);
|
|
return;
|
|
}
|
|
const setSpec = 'data-type:' + dataset.type;
|
|
sets[setSpec] = `Set for document type '${dataset.type}'`;
|
|
});
|
|
return sets;
|
|
}
|
|
|
|
private handleIllegalVerb() {
|
|
this.xsltParameter['oai_error_code'] = 'badVerb';
|
|
this.xsltParameter['oai_error_message'] = 'The verb provided in the request is illegal.';
|
|
}
|
|
}
|