tethys/app/Http/Controllers/Oai/RequestController.php

425 lines
15 KiB
PHP
Raw Normal View History

2018-08-06 12:30:51 +00:00
<?php
namespace App\Http\Controllers\Oai;
use Illuminate\Http\Request;
use App\Http\Controllers\Controller;
2018-09-10 13:09:10 +00:00
use App\Models\Dataset;
2018-08-06 12:30:51 +00:00
use Illuminate\Support\Facades\Log;
class RequestController extends Controller
{
/**
* Holds information about which dataset state aka server_state
* are delivered out
*
* @var array
*/
2018-09-10 13:09:10 +00:00
private $deliveringDocumentStates = array('published', 'deleted'); // maybe deleted documents too
2018-08-06 12:30:51 +00:00
const SET_SPEC_PATTERN = '[A-Za-z0-9\-_\.!~\*\'\(\)]+';
/**
* Holds xml representation of document information to be processed.
*
* @var \DomDocument Defaults to null.
*/
protected $_xml = null;
/**
* Holds the stylesheet for the transformation.
*
* @var \DomDocument Defaults to null.
*/
2018-09-10 13:09:10 +00:00
protected $xslt = null;
2018-08-06 12:30:51 +00:00
/**
* Holds the xslt processor.
*
* @var \XSLTProcessor Defaults to null.
*/
protected $_proc = null;
/**
* Load an xslt stylesheet.
*
* @return void
*/
private function loadStyleSheet($stylesheet)
{
2018-09-10 13:09:10 +00:00
$this->xslt = new \DomDocument;
$this->xslt->load($stylesheet);
$this->_proc->importStyleSheet($this->xslt);
2018-08-06 12:30:51 +00:00
if (isset($_SERVER['HTTP_HOST'])) {
$this->_proc->setParameter('', 'host', $_SERVER['HTTP_HOST']);
}
//$this->_proc->setParameter('', 'server', $this->getRequest()->getBaseUrl());
}
public function __construct()
{
//$this->middleware('auth');
// Initialize member variables.
$this->_xml = new \DomDocument;
$this->_proc = new \XSLTProcessor;
}
public function index(Request $request)
{
$oaiRequest = $request->all();
$safeRemoveParameters = array('module', 'controller', 'action', 'role');
foreach ($safeRemoveParameters as $parameter) {
unset($oaiRequest[$parameter]);
}
return $this->__handleRequest($oaiRequest);
}
private function __handleRequest(array $oaiRequest)
{
// Setup stylesheet
$this->loadStyleSheet('oai-pmh.xslt');
// Set response time
$this->_proc->setParameter('', 'responseDate', date("Y-m-d\TH:i:s\Z"));
// set OAI base url
$uri = explode('?', $_SERVER['REQUEST_URI'], 2);
$this->_proc->setParameter('', 'baseURL', url('/') . $uri[0]);
if (isset($oaiRequest['verb'])) {
$this->_proc->setParameter('', 'oai_verb', $oaiRequest['verb']);
if ($oaiRequest['verb'] == 'Identify') {
2018-09-10 13:09:10 +00:00
$this->handleIdentify();
2018-08-06 12:30:51 +00:00
} elseif ($oaiRequest['verb'] == 'ListMetadataFormats') {
2018-09-10 13:09:10 +00:00
$this->handleListMetadataFormats();
2018-08-06 12:30:51 +00:00
} elseif ($oaiRequest['verb'] == 'ListRecords') {
2018-09-10 13:09:10 +00:00
$this->handleListRecords($oaiRequest);
2018-08-06 12:30:51 +00:00
} elseif ($oaiRequest['verb'] == 'ListIdentifiers') {
2018-09-10 13:09:10 +00:00
$this->handleListIdentifiers($oaiRequest);
2018-08-06 12:30:51 +00:00
} elseif ($oaiRequest['verb'] == 'ListSets') {
2018-09-10 13:09:10 +00:00
$this->handleListSets($oaiRequest);
2018-08-06 12:30:51 +00:00
} else {
2018-09-10 13:09:10 +00:00
$this->handleIllegalVerb();
2018-08-06 12:30:51 +00:00
}
} else {
$oaiRequest['verb'] = 'Identify';
$this->_proc->setParameter('', 'oai_verb', $oaiRequest['verb']);
2018-09-10 13:09:10 +00:00
$this->doc = $this->handleIdentify();
2018-08-06 12:30:51 +00:00
}
//$xml = $this->_xml->saveXML();
$xml = $this->_proc->transformToXML($this->_xml);
//$xml = $this->doc->asXML();
return response($xml)//->view('rss', array('rss'=>$this->rss))
->header('Content-Type', 'application/xml')
->header('charset', 'utf-8');
}
/**
* Implements response for OAI-PMH verb 'Identify'.
*
* @return void
*/
2018-09-10 13:09:10 +00:00
private function handleIdentify()
2018-08-06 12:30:51 +00:00
{
$email = "repository@geologie.ac.at";
$repositoryName = "Data Research Repository";
$repIdentifier = "rdr.gba.ac.at";
//$sampleIdentifier = $this->_configuration->getSampleIdentifier();
$earliestDateFromDb = Dataset::earliestPublicationDate();
// set parameters for oai-pmh.xslt
$this->_proc->setParameter('', 'email', $email);
$this->_proc->setParameter('', 'repositoryName', $repositoryName);
$this->_proc->setParameter('', 'repIdentifier', $repIdentifier);
//$this->_proc->setParameter('', 'sampleIdentifier', $sampleIdentifier);
$this->_proc->setParameter('', 'earliestDatestamp', $earliestDateFromDb);
$this->_xml->appendChild($this->_xml->createElement('Documents'));
}
/**
* Implements response for OAI-PMH verb 'ListMetadataFormats'.
*
* @param array &$oaiRequest Contains full request information
* @return void
*/
2018-09-10 13:09:10 +00:00
private function handleListMetadataFormats()
2018-08-06 12:30:51 +00:00
{
$this->_xml->appendChild($this->_xml->createElement('Documents'));
}
/**
* Implements response for OAI-PMH verb 'ListRecords'.
*
* @param array &$oaiRequest Contains full request information
* @return void
*/
2018-09-10 13:09:10 +00:00
private function handleListRecords($oaiRequest)
2018-08-06 12:30:51 +00:00
{
$maxRecords = 20;//$this->_configuration->getMaxListRecords();
2018-09-10 13:09:10 +00:00
$this->handlingOfLists($oaiRequest, $maxRecords);
2018-08-06 12:30:51 +00:00
}
/**
* Implements response for OAI-PMH verb 'ListIdentifiers'.
*
* @param array &$oaiRequest Contains full request information
* @return void
*/
2018-09-10 13:09:10 +00:00
private function handleListIdentifiers(array &$oaiRequest)
2018-08-06 12:30:51 +00:00
{
$maxIdentifier = 20;//$this->_configuration->getMaxListIdentifiers();
2018-09-10 13:09:10 +00:00
$this->handlingOfLists($oaiRequest, $maxIdentifier);
2018-08-06 12:30:51 +00:00
}
/**
* Implements response for OAI-PMH verb 'ListSets'.
*
* @param array &$oaiRequest Contains full request information
* @return void
*/
2018-09-10 13:09:10 +00:00
private function handleListSets()
2018-08-06 12:30:51 +00:00
{
$repIdentifier = "rdr.gba.ac.at";
$this->_proc->setParameter('', 'repIdentifier', $repIdentifier);
$this->_xml->appendChild($this->_xml->createElement('Documents'));
//$oaiSets = new Oai_Model_Sets();
$sets = array(
'bibliography:true' => 'Set for bibliographic entries',
'bibliography:false' => 'Set for non-bibliographic entries',
);
$sets = array_merge(
$sets,
$this->getSetsForDocumentTypes()
);
//$sets = $this->getSetsForDocumentTypes();
foreach ($sets as $type => $name) {
$opusDoc = $this->_xml->createElement('Rdr_Sets');
$typeAttr = $this->_xml->createAttribute('Type');
$typeValue = $this->_xml->createTextNode($type);
$typeAttr->appendChild($typeValue);
$opusDoc->appendChild($typeAttr);
$nameAttr = $this->_xml->createAttribute('TypeName');
$nameValue = $this->_xml->createTextNode($name);
$nameAttr->appendChild($nameValue);
$opusDoc->appendChild($nameAttr);
$this->_xml->documentElement->appendChild($opusDoc);
}
}
2018-09-10 13:09:10 +00:00
private function handleIllegalVerb()
2018-08-06 12:30:51 +00:00
{
$this->_proc->setParameter('', 'oai_error_code', 'badVerb');
$this->_proc->setParameter('', 'oai_error_message', 'The verb provided in the request is illegal.');
}
/**
* Helper method for handling lists.
*
* @param array $oaiRequest query parameter
* @param mixed $maxRecords max count of records
*
* @return void
*/
2018-09-10 13:09:10 +00:00
private function handlingOfLists(array &$oaiRequest, $maxRecords)
2018-08-06 12:30:51 +00:00
{
if (true === empty($maxRecords)) {
$maxRecords = 100;
}
$repIdentifier = "rdr.gba.ac.at";
$this->_proc->setParameter('', 'repIdentifier', $repIdentifier);
$this->_xml->appendChild($this->_xml->createElement('Documents'));
// do some initialisation
$cursor = 0;
//$totalIds = 0;
$start = $maxRecords + 1;
$reldocIds = array();
$metadataPrefix = null;
if (true === array_key_exists('metadataPrefix', $oaiRequest)) {
$metadataPrefix = $oaiRequest['metadataPrefix'];
}
$this->_proc->setParameter('', 'oai_metadataPrefix', $metadataPrefix);
// no resumptionToken is given
$finder = Dataset::query();
// add server state restrictions
2018-09-10 13:09:10 +00:00
$finder->whereIn('server_state', $this->deliveringDocumentStates);
2018-08-06 12:30:51 +00:00
if (array_key_exists('set', $oaiRequest)) {
$setarray = explode(':', $oaiRequest['set']);
if ($setarray[0] == 'doc-type') {
if (count($setarray) === 2 and !empty($setarray[1])) {
$finder->where('type', $setarray[1]);
}
}
}
$totalIds = $finder->count();
$reldocIds = $finder->pluck('id')->toArray();
// handling of document ids
$restIds = $reldocIds;
$workIds = array_splice($restIds, 0, $maxRecords);
//foreach ($datasets as $dataset)
foreach ($workIds as $dataId) {
$dataset = Dataset::findOrFail($dataId);
$this->createXmlRecord($dataset);
}
}
private function createXmlRecord(Dataset $dataset)
{
//$node = $this->_xml->createElement('Rdr_Dataset');
$domNode = $this->getDatasetXmlDomNode($dataset);
// add frontdoor url
2018-09-10 13:09:10 +00:00
$this->addLandingPageAttribute($domNode, $dataset->id);
2018-08-06 12:30:51 +00:00
// add access rights to element
//$this->_addAccessRights($domNode, $dataset);
$node = $this->_xml->importNode($domNode, true);
//$node->setAttribute("Id", $dataset->id);
//$node->setAttribute("ServerState", $dataset->server_state);
////$child = new \DOMElement("ServerDateModified");
//$child = $this->_xml->createElement('ServerDateModified');
//$child->setAttribute("Year", $dataset->server_date_modified->format('Y'));
//$child->setAttribute("Month", $dataset->server_date_modified->month);
//$child->setAttribute("Day", $dataset->server_date_modified->day);
//$node->appendChild($child);
//$type = $dataset->type;
2018-09-10 13:09:10 +00:00
$this->addSpecInformation($node, 'doc-type:' . $dataset->type);
//$this->addSpecInformation($node, 'bibliography:' . 'false');
2018-08-06 12:30:51 +00:00
$this->_xml->documentElement->appendChild($node);
}
/**
* Add the landingpage attribute to Rdr_Dataset XML output.
*
* @param \DOMNode $document Rdr_Dataset XML serialisation
* @param string $docid Id of the dataset
* @return void
*/
2018-09-10 13:09:10 +00:00
private function addLandingPageAttribute(\DOMNode $document, $dataid)
2018-08-06 12:30:51 +00:00
{
2018-09-06 15:58:54 +00:00
$url = route('frontend.dataset.show', $dataid);
2018-08-06 12:30:51 +00:00
$owner = $document->ownerDocument;
$attr = $owner->createAttribute('landingpage');
$attr->appendChild($owner->createTextNode($url));
$document->appendChild($attr);
}
2018-09-10 13:09:10 +00:00
private function addSpecInformation(\DOMNode $document, $information)
2018-08-06 12:30:51 +00:00
{
$setSpecAttribute = $this->_xml->createAttribute('Value');
$setSpecAttributeValue = $this->_xml->createTextNode($information);
$setSpecAttribute->appendChild($setSpecAttributeValue);
$setSpecElement = $this->_xml->createElement('SetSpec');
//$setSpecElement =new \DOMElement("SetSpec");
$setSpecElement->appendChild($setSpecAttribute);
$document->appendChild($setSpecElement);
}
private function getDatasetXmlDomNode($dataset)
{
2018-09-10 13:09:10 +00:00
if (!in_array($dataset->server_state, $this->deliveringDocumentStates)) {
2018-08-06 12:30:51 +00:00
$message = 'Trying to get a document in server state "' . $dataset->server_state . '"';
//Zend_Registry::get('Zend_Log')->err($message);
Log::error("server state: $message");
throw new \Exception($message);
}
$dataset->fetchValues();
$xmlModel = new \App\Library\Xml\XmlModel();
$xmlModel->setModel($dataset);
$xmlModel->excludeEmptyFields();
2018-09-10 13:09:10 +00:00
$xmlModel->setXmlCache(new \App\Models\XmlCache());
2018-08-06 12:30:51 +00:00
return $xmlModel->getDomDocument()->getElementsByTagName('Rdr_Dataset')->item(0);
}
/**
* Returns oai sets for document types.
* @return array
*/
private function getSetsForDocumentTypes()
{
$setSpecPattern = self::SET_SPEC_PATTERN;
$sets = array();
2018-09-10 13:09:10 +00:00
$finder = new \App\Models\DatasetFinder();
2018-08-06 12:30:51 +00:00
$finder->setServerState('published');
foreach ($finder->groupedTypesPlusCount() as $doctype => $row) {
if (0 == preg_match("/^$setSpecPattern$/", $doctype)) {
$msg = "Invalid SetSpec (doctype='" . $doctype . "')."
. " Allowed characters are [$setSpecPattern].";
Log::error("OAI-PMH: $msg");
continue;
}
$setSpec = 'doc-type:' . $doctype;
// $count = $row['count'];
$sets[$setSpec] = "Set for document type '$doctype'";
}
return $sets;
}
private function handleIdentifyOld()
{
//$earliestDateFromDb = Opus_Document::getEarliestPublicationDate();
2018-09-10 13:09:10 +00:00
// $earliestDateFromDb = Dataset::select('server_date_created')
// ->orderBy('server_date_created', 'desc')
// ->first()->toDateTimeString();
2018-08-06 12:30:51 +00:00
$earliestDateFromDb = Dataset::earliestPublicationDate();
2018-09-10 13:09:10 +00:00
$sxe = new \SimpleXMLElement(
'<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="xsl/oai2.xslt"?><OAI-PMH/>'
);
2018-08-06 12:30:51 +00:00
$sxe->addAttribute('xmlns', 'http://www.openarchives.org/OAI/2.0/');
$sxe->addAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance');
$sxe->addAttribute('xmlns:mml', 'http://www.w3.org/1998/Math/MathML');
2018-09-10 13:09:10 +00:00
$sxe->addAttribute(
'xsi:schemaLocation',
'http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd'
);
2018-08-06 12:30:51 +00:00
$sxe->addChild('responseDate', date("Y-m-d\TH:i:s\Z"));
$uri = explode('?', $_SERVER['REQUEST_URI'], 2);
$requestChild = $sxe->addChild('request', url('/') . $uri[0]);
$requestChild->addAttribute('verb', 'Identify');
$identify = $sxe->addChild('Identify');
$identify->addChild('repositoryName', "Data Research Repository");
$identify->addChild('baseURL', "http://rdr.gba.geolba.ac.at/");
$identify->addChild('protocolVersion', '2.0');
$identify->addChild('adminEmail', 'repository@geologie.ac.at');
//$identify->addChild('earliestDatestamp', '2017-04-07');
$identify->addChild('earliestDatestamp', $earliestDateFromDb);
$identify->addChild('deletedRecord', 'persistent');
//$description = $identify->addChild('description');
//$oaiIdentifier = $description->addChild('oai-identifier');
//$oaiIdentifier->addAttribute('xmlns', 'http://www.openarchives.org/OAI/2.0/oai-identifier');
//$oaiIdentifier->addAttribute('xsi:schemaLocation', 'http://www.openarchives.org/OAI/2.0/oai-identifier');
//$oaiIdentifier->addChild('scheme', 'oai');
2018-09-10 13:09:10 +00:00
2018-08-06 12:30:51 +00:00
return $sxe;
}
}