tethys/app/Library/Util/SolrSearchQuery.php

414 lines
12 KiB
PHP
Raw Permalink Normal View History

2018-08-06 12:30:51 +00:00
<?php
namespace App\Library\Util;
/**
* Encapsulates all parameter values needed to build the Solr query URL.
*/
class SolrSearchQuery
{
// currently available search types
const SIMPLE = 'simple';
const ADVANCED = 'advanced';
const FACET_ONLY = 'facet_only';
const LATEST_DOCS = 'latest';
const ALL_DOCS = 'all_docs';
const DOC_ID = 'doc_id';
const DEFAULT_START = 0;
const DEFAULT_ROWS = 10;
// java.lang.Integer.MAX_VALUE
const MAX_ROWS = 2147483647;
const DEFAULT_SORTFIELD = 'score';
const DEFAULT_SORTORDER = 'desc';
const SEARCH_MODIFIER_CONTAINS_ALL = "contains_all";
const SEARCH_MODIFIER_CONTAINS_ANY = "contains_any";
const SEARCH_MODIFIER_CONTAINS_NONE = "contains_none";
private $start = self::DEFAULT_START;
private $rows = self::DEFAULT_ROWS;
private $sortField = self::DEFAULT_SORTFIELD;
private $sortOrder = self::DEFAULT_SORTORDER;
private $filterQueries = array();
private $catchAll;
private $searchType;
private $modifier;
private $fieldValues = array();
private $escapingEnabled = true;
private $q;
private $facetField;
private $returnIdsOnly = false;
private $seriesId = null;
/**
*
* @param string $searchType
*/
public function __construct($searchType = self::SIMPLE)
{
//$this->invalidQCache();
$this->q = null;
if ($searchType === self::SIMPLE || $searchType === self::ADVANCED || $searchType === self::ALL_DOCS) {
$this->searchType = $searchType;
return;
}
if ($searchType === self::FACET_ONLY) {
$this->searchType = self::FACET_ONLY;
$this->setRows(0);
return;
}
if ($searchType === self::LATEST_DOCS) {
$this->searchType = self::LATEST_DOCS;
$this->sortField = 'server_date_published';
$this->sortOrder = 'desc';
return;
}
if ($searchType === self::DOC_ID) {
$this->searchType = self::DOC_ID;
return;
}
}
public function getSearchType()
{
return $this->searchType;
}
public function getFacetField()
{
return $this->facetField;
}
public function setFacetField($facetField)
{
$this->facetField = $facetField;
}
public function getStart()
{
return $this->start;
}
public function setStart($start)
{
$this->start = $start;
}
public static function getDefaultRows()
{
return SolrSearchQuery::getDefaultRows();
}
public function getRows()
{
return $this->rows;
}
public function setRows($rows)
{
$this->rows = $rows;
}
public function getSortField()
{
return $this->sortField;
}
public function setSortField($sortField)
{
if ($sortField === self::DEFAULT_SORTFIELD) {
if ($this->searchType === self::ALL_DOCS) {
// change the default sortfield for searchtype all
// since sorting by relevance does not make any sense here
$this->sortField = 'server_date_published';
} else {
$this->sortField = self::DEFAULT_SORTFIELD;
}
return;
}
$this->sortField = $sortField;
if (strpos($sortField, 'doc_sort_order_for_seriesid_') !== 0 && strpos($sortField, 'server_date_published') !== 0) {
// add _sort to the end of $sortField if not already done
$suffix = '_sort';
if (substr($sortField, strlen($sortField) - strlen($suffix)) !== $suffix) {
$this->sortField .= $suffix;
}
}
}
public function getSortOrder()
{
return $this->sortOrder;
}
public function setSortOrder($sortOrder)
{
$this->sortOrder = $sortOrder;
}
public function getSeriesId()
{
return $this->seriesId;
}
/**
*
* @return array An array that contains all specified filter queries.
*/
public function getFilterQueries()
{
return $this->filterQueries;
}
/**
*
* @param string $filterField The field that should be used in a filter query.
* @param string $filterValue The field value that should be used in a filter query.
*/
public function addFilterQuery($filterField, $filterValue)
{
if ($filterField == 'has_fulltext') {
$filterQuery = $filterField . ':' . $filterValue;
} else {
$filterQuery = '{!raw f=' . $filterField . '}' . $filterValue;
}
array_push($this->filterQueries, $filterQuery);
// we need to store the ID of the requested series here,
// since we need it later to build the index field name
if ($filterField === 'series_ids') {
$this->seriesId = $filterValue;
}
}
/**
*
* @param array $filterQueries An array of queries that should be used as filter queries.
*/
public function setFilterQueries($filterQueries)
{
$this->filterQueries = $filterQueries;
}
public function getCatchAll()
{
return $this->catchAll;
}
public function setCatchAll($catchAll)
{
$this->catchAll = $catchAll;
$this->invalidQCache();
}
/**
*
* @param string $name
* @param string $value
* @param string $modifier
*/
public function setField($name, $value, $modifier = self::SEARCH_MODIFIER_CONTAINS_ALL)
{
if (!empty($value)) {
$this->fieldValues[$name] = $value;
$this->modifier[$name] = $modifier;
$this->invalidQCache();
}
}
/**
*
* @param string $name
* @return Returns null if no values was specified for the given field name.
*/
public function getField($name)
{
if (array_key_exists($name, $this->fieldValues)) {
return $this->fieldValues[$name];
}
return null;
}
/**
*
* @param string $fieldname
* @return returns null if no modifier was specified for the given field name.
*/
public function getModifier($fieldname)
{
if (array_key_exists($fieldname, $this->modifier)) {
return $this->modifier[$fieldname];
}
return null;
}
public function getQ()
{
if (is_null($this->q)) {
// earlier cached query was marked as invalid: perform new setup of query cache
$this->q = $this->setupQCache();
}
// return cached result (caching is done here since building q is an expensive operation)
return $this->q;
}
private function setupQCache()
{
if ($this->searchType === self::SIMPLE) {
if ($this->getCatchAll() === '*:*') {
return $this->catchAll;
}
return $this->escape($this->getCatchAll());
}
if ($this->searchType === self::FACET_ONLY || $this->searchType === self::LATEST_DOCS || $this->searchType === self::ALL_DOCS) {
return '*:*';
}
if ($this->searchType === self::DOC_ID) {
return 'id:' . $this->fieldValues['id'];
}
return $this->buildAdvancedQString();
}
private function invalidQCache()
{
$this->q = null;
}
private function buildAdvancedQString()
{
$q = "{!lucene q.op=AND}";
$first = true;
foreach ($this->fieldValues as $fieldname => $fieldvalue) {
if ($first) {
$first = false;
} else {
$q .= ' ';
}
if ($this->modifier[$fieldname] === self::SEARCH_MODIFIER_CONTAINS_ANY) {
$q .= $this->combineSearchTerms($fieldname, $fieldvalue, 'OR');
continue;
}
if ($this->modifier[$fieldname] === self::SEARCH_MODIFIER_CONTAINS_NONE) {
$q .= '-' . $this->combineSearchTerms($fieldname, $fieldvalue, 'OR');
continue;
}
// self::SEARCH_MODIFIER_CONTAINS_ALL
$q .= $this->combineSearchTerms($fieldname, $fieldvalue);
}
return $q;
}
private function combineSearchTerms($fieldname, $fieldvalue, $conjunction = null)
{
$result = $fieldname . ':(';
$firstTerm = true;
$queryTerms = preg_split("/[\s]+/", $this->escape($fieldvalue), null, PREG_SPLIT_NO_EMPTY);
foreach ($queryTerms as $queryTerm) {
if ($firstTerm) {
$firstTerm = false;
} else {
$result .= is_null($conjunction) ? " " : " $conjunction ";
}
$result .= $queryTerm;
}
$result .= ')';
return $result;
}
public function disableEscaping()
{
$this->invalidQCache();
$this->escapingEnabled = false;
}
/**
* Escape Lucene's special query characters specified in
* http://lucene.apache.org/java/3_0_2/queryparsersyntax.html#Escaping%20Special%20Characters
* Escaping currently ignores * and ? which are used as wildcard operators.
* Additionally, double-quotes are not escaped and a double-quote is added to
* the end of $query in case it contains an odd number of double-quotes.
* @param string $query The query which needs to be escaped.
*/
private function escape($query)
{
if (!$this->escapingEnabled) {
return $query;
}
$query = trim($query);
// add one " to the end of $query if it contains an odd number of "
if (substr_count($query, '"') % 2 == 1) {
$query .= '"';
}
// escape special characters (currently ignore " \* \?) outside of ""
$insidePhrase = false;
$result = '';
foreach (explode('"', $query) as $phrase) {
if ($insidePhrase) {
$result .= '"' . $phrase . '"';
} else {
$result .= preg_replace(
'/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|~|:|\\\)/',
'\\\$1',
$this->lowercaseWildcardQuery($phrase)
);
}
$insidePhrase = !$insidePhrase;
}
return $result;
}
private function lowercaseWildcardQuery($query)
{
// check if $query is a wildcard query
if (strpos($query, '*') === false && strpos($query, '?') === false) {
return $query;
}
// lowercase query
return strtolower($query);
}
public function __toString()
{
if ($this->searchType === self::SIMPLE) {
return 'simple search with query ' . $this->getQ();
}
if ($this->searchType === self::FACET_ONLY) {
return 'facet only search with query *:*';
}
if ($this->searchType === self::LATEST_DOCS) {
return 'search for latest documents with query *:*';
}
if ($this->searchType === self::ALL_DOCS) {
return 'search for all documents';
}
if ($this->searchType === self::DOC_ID) {
return 'search for document id ' . $this->getQ();
}
return 'advanced search with query ' . $this->getQ();
}
/**
*
* @param boolean $returnIdsOnly
*/
public function setReturnIdsOnly($returnIdsOnly)
{
$this->returnIdsOnly = $returnIdsOnly;
}
/**
* @return boolean
*/
public function isReturnIdsOnly()
{
return $this->returnIdsOnly;
}
}